Skip to content

Commit 629b6bf

Browse files
feat(glossary): support multiple ownership types
1 parent cb7d687 commit 629b6bf

10 files changed

+1062
-18
lines changed

metadata-ingestion/src/datahub/ingestion/source/metadata/business_glossary.py

+33-18
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ class Owners(ConfigModel):
4545
groups: Optional[List[str]] = None
4646

4747

48+
OwnersMultipleTypes = Union[List[Owners], Owners]
49+
50+
4851
class KnowledgeCard(ConfigModel):
4952
url: Optional[str] = None
5053
label: Optional[str] = None
@@ -57,7 +60,7 @@ class GlossaryTermConfig(ConfigModel):
5760
term_source: Optional[str] = None
5861
source_ref: Optional[str] = None
5962
source_url: Optional[str] = None
60-
owners: Optional[Owners] = None
63+
owners: Optional[OwnersMultipleTypes] = None
6164
inherits: Optional[List[str]] = None
6265
contains: Optional[List[str]] = None
6366
values: Optional[List[str]] = None
@@ -74,7 +77,7 @@ class GlossaryNodeConfig(ConfigModel):
7477
id: Optional[str] = None
7578
name: str
7679
description: str
77-
owners: Optional[Owners] = None
80+
owners: Optional[OwnersMultipleTypes] = None
7881
terms: Optional[List["GlossaryTermConfig"]] = None
7982
nodes: Optional[List["GlossaryNodeConfig"]] = None
8083
knowledge_links: Optional[List[KnowledgeCard]] = None
@@ -88,7 +91,7 @@ class DefaultConfig(ConfigModel):
8891
"""Holds defaults for populating fields in glossary terms"""
8992

9093
source: Optional[str] = None
91-
owners: Owners
94+
owners: OwnersMultipleTypes
9295
url: Optional[str] = None
9396
source_type: str = "INTERNAL"
9497

@@ -153,30 +156,42 @@ def make_glossary_term_urn(
153156
return "urn:li:glossaryTerm:" + create_id(path, default_id, enable_auto_id)
154157

155158

156-
def get_owners(owners: Owners) -> models.OwnershipClass:
157-
ownership_type, ownership_type_urn = validate_ownership_type(owners.type)
159+
def get_owners_multiple_types(owners: OwnersMultipleTypes) -> models.OwnershipClass:
160+
"""Allows owner types to be a list and maintains backward compatibility"""
161+
if isinstance(owners, Owners):
162+
return models.OwnershipClass(owners=list(get_owners(owners)))
163+
164+
owners_meta = []
165+
for owner in owners:
166+
owners_meta.extend(get_owners(owner))
167+
168+
return models.OwnershipClass(owners=owners_meta)
169+
170+
171+
def get_owners(owners: Owners) -> Iterable[models.OwnerClass]:
172+
if owners.type.startswith("urn:li:ownershipType:"):
173+
ownership_type = "CUSTOM"
174+
ownership_type_urn = owners.type
175+
else:
176+
ownership_type, ownership_type_urn = validate_ownership_type(owners.type)
177+
158178
if owners.typeUrn is not None:
159179
ownership_type_urn = owners.typeUrn
160-
owners_meta: List[models.OwnerClass] = []
180+
161181
if owners.users is not None:
162-
owners_meta = owners_meta + [
163-
models.OwnerClass(
182+
for o in owners.users:
183+
yield models.OwnerClass(
164184
owner=make_user_urn(o),
165185
type=ownership_type,
166186
typeUrn=ownership_type_urn,
167187
)
168-
for o in owners.users
169-
]
170188
if owners.groups is not None:
171-
owners_meta = owners_meta + [
172-
models.OwnerClass(
189+
for o in owners.groups:
190+
yield models.OwnerClass(
173191
owner=make_group_urn(o),
174192
type=ownership_type,
175193
typeUrn=ownership_type_urn,
176194
)
177-
for o in owners.groups
178-
]
179-
return models.OwnershipClass(owners=owners_meta)
180195

181196

182197
def get_mces(
@@ -185,7 +200,7 @@ def get_mces(
185200
ingestion_config: BusinessGlossarySourceConfig,
186201
ctx: PipelineContext,
187202
) -> Iterable[Union[MetadataChangeProposalWrapper, models.MetadataChangeEventClass]]:
188-
root_owners = get_owners(glossary.owners)
203+
root_owners = get_owners_multiple_types(glossary.owners)
189204

190205
if glossary.nodes:
191206
for node in glossary.nodes:
@@ -270,7 +285,7 @@ def get_mces_from_node(
270285
node_owners = parentOwners
271286
if glossaryNode.owners is not None:
272287
assert glossaryNode.owners is not None
273-
node_owners = get_owners(glossaryNode.owners)
288+
node_owners = get_owners_multiple_types(glossaryNode.owners)
274289

275290
node_snapshot = models.GlossaryNodeSnapshotClass(
276291
urn=node_urn,
@@ -426,7 +441,7 @@ def get_mces_from_term(
426441
ownership: models.OwnershipClass = parentOwnership
427442
if glossaryTerm.owners is not None:
428443
assert glossaryTerm.owners is not None
429-
ownership = get_owners(glossaryTerm.owners)
444+
ownership = get_owners_multiple_types(glossaryTerm.owners)
430445
aspects.append(ownership)
431446

432447
if glossaryTerm.domain is not None:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
version: "1"
2+
source: DataHub
3+
owners:
4+
users:
5+
- mjames
6+
url: "https://github.com/datahub-project/datahub/"
7+
8+
nodes:
9+
- name: Custom URN Types
10+
description: Testing custom ownership URN types
11+
owners:
12+
- type: urn:li:ownershipType:custom_type_1
13+
users:
14+
- user1
15+
groups:
16+
- group1
17+
- type: urn:li:ownershipType:custom_type_2
18+
users:
19+
- user2
20+
terms:
21+
- name: Mixed URN Types
22+
description: Term with custom URN types
23+
owners:
24+
- type: urn:li:ownershipType:custom_type_3
25+
users:
26+
- user3
27+
- type: urn:li:ownershipType:custom_type_4
28+
groups:
29+
- group2
30+
- name: Mixed Standard and URN
31+
description: Term with both standard and URN types
32+
owners:
33+
- type: DEVELOPER
34+
users:
35+
- dev1
36+
- type: urn:li:ownershipType:custom_type_5
37+
groups:
38+
- group3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
[
2+
{
3+
"proposedSnapshot": {
4+
"com.linkedin.pegasus2avro.metadata.snapshot.GlossaryNodeSnapshot": {
5+
"urn": "urn:li:glossaryNode:Custom URN Types",
6+
"aspects": [
7+
{
8+
"com.linkedin.pegasus2avro.glossary.GlossaryNodeInfo": {
9+
"customProperties": {},
10+
"definition": "Testing custom ownership URN types",
11+
"name": "Custom URN Types"
12+
}
13+
},
14+
{
15+
"com.linkedin.pegasus2avro.common.Ownership": {
16+
"owners": [
17+
{
18+
"owner": "urn:li:corpuser:user1",
19+
"type": "CUSTOM",
20+
"typeUrn": "urn:li:ownershipType:custom_type_1"
21+
},
22+
{
23+
"owner": "urn:li:corpGroup:group1",
24+
"type": "CUSTOM",
25+
"typeUrn": "urn:li:ownershipType:custom_type_1"
26+
},
27+
{
28+
"owner": "urn:li:corpuser:user2",
29+
"type": "CUSTOM",
30+
"typeUrn": "urn:li:ownershipType:custom_type_2"
31+
}
32+
],
33+
"ownerTypes": {},
34+
"lastModified": {
35+
"time": 0,
36+
"actor": "urn:li:corpuser:unknown"
37+
}
38+
}
39+
}
40+
]
41+
}
42+
},
43+
"systemMetadata": {
44+
"lastObserved": 1586847600000,
45+
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
46+
"lastRunId": "no-run-id-provided"
47+
}
48+
},
49+
{
50+
"proposedSnapshot": {
51+
"com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": {
52+
"urn": "urn:li:glossaryTerm:Custom URN Types.Mixed URN Types",
53+
"aspects": [
54+
{
55+
"com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": {
56+
"customProperties": {},
57+
"name": "Mixed URN Types",
58+
"definition": "Term with custom URN types",
59+
"parentNode": "urn:li:glossaryNode:Custom URN Types",
60+
"termSource": "INTERNAL",
61+
"sourceRef": "DataHub",
62+
"sourceUrl": "https://github.com/datahub-project/datahub/"
63+
}
64+
},
65+
{
66+
"com.linkedin.pegasus2avro.common.Ownership": {
67+
"owners": [
68+
{
69+
"owner": "urn:li:corpuser:user3",
70+
"type": "CUSTOM",
71+
"typeUrn": "urn:li:ownershipType:custom_type_3"
72+
},
73+
{
74+
"owner": "urn:li:corpGroup:group2",
75+
"type": "CUSTOM",
76+
"typeUrn": "urn:li:ownershipType:custom_type_4"
77+
}
78+
],
79+
"ownerTypes": {},
80+
"lastModified": {
81+
"time": 0,
82+
"actor": "urn:li:corpuser:unknown"
83+
}
84+
}
85+
}
86+
]
87+
}
88+
},
89+
"systemMetadata": {
90+
"lastObserved": 1586847600000,
91+
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
92+
"lastRunId": "no-run-id-provided"
93+
}
94+
},
95+
{
96+
"proposedSnapshot": {
97+
"com.linkedin.pegasus2avro.metadata.snapshot.GlossaryTermSnapshot": {
98+
"urn": "urn:li:glossaryTerm:Custom URN Types.Mixed Standard and URN",
99+
"aspects": [
100+
{
101+
"com.linkedin.pegasus2avro.glossary.GlossaryTermInfo": {
102+
"customProperties": {},
103+
"name": "Mixed Standard and URN",
104+
"definition": "Term with both standard and URN types",
105+
"parentNode": "urn:li:glossaryNode:Custom URN Types",
106+
"termSource": "INTERNAL",
107+
"sourceRef": "DataHub",
108+
"sourceUrl": "https://github.com/datahub-project/datahub/"
109+
}
110+
},
111+
{
112+
"com.linkedin.pegasus2avro.common.Ownership": {
113+
"owners": [
114+
{
115+
"owner": "urn:li:corpuser:dev1",
116+
"type": "DEVELOPER"
117+
},
118+
{
119+
"owner": "urn:li:corpGroup:group3",
120+
"type": "CUSTOM",
121+
"typeUrn": "urn:li:ownershipType:custom_type_5"
122+
}
123+
],
124+
"ownerTypes": {},
125+
"lastModified": {
126+
"time": 0,
127+
"actor": "urn:li:corpuser:unknown"
128+
}
129+
}
130+
}
131+
]
132+
}
133+
},
134+
"systemMetadata": {
135+
"lastObserved": 1586847600000,
136+
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
137+
"lastRunId": "no-run-id-provided"
138+
}
139+
},
140+
{
141+
"entityType": "glossaryNode",
142+
"entityUrn": "urn:li:glossaryNode:Custom URN Types",
143+
"changeType": "UPSERT",
144+
"aspectName": "status",
145+
"aspect": {
146+
"json": {
147+
"removed": false
148+
}
149+
},
150+
"systemMetadata": {
151+
"lastObserved": 1586847600000,
152+
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
153+
"lastRunId": "no-run-id-provided"
154+
}
155+
},
156+
{
157+
"entityType": "glossaryTerm",
158+
"entityUrn": "urn:li:glossaryTerm:Custom URN Types.Mixed Standard and URN",
159+
"changeType": "UPSERT",
160+
"aspectName": "status",
161+
"aspect": {
162+
"json": {
163+
"removed": false
164+
}
165+
},
166+
"systemMetadata": {
167+
"lastObserved": 1586847600000,
168+
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
169+
"lastRunId": "no-run-id-provided"
170+
}
171+
},
172+
{
173+
"entityType": "glossaryTerm",
174+
"entityUrn": "urn:li:glossaryTerm:Custom URN Types.Mixed URN Types",
175+
"changeType": "UPSERT",
176+
"aspectName": "status",
177+
"aspect": {
178+
"json": {
179+
"removed": false
180+
}
181+
},
182+
"systemMetadata": {
183+
"lastObserved": 1586847600000,
184+
"runId": "datahub-business-glossary-2020_04_14-07_00_00-dlsmlo",
185+
"lastRunId": "no-run-id-provided"
186+
}
187+
}
188+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
version: "1"
2+
source: DataHub
3+
owners:
4+
users:
5+
- mjames
6+
url: "https://github.com/datahub-project/datahub/"
7+
8+
nodes:
9+
- name: Different Owner Types
10+
description: Testing multiple owners with different types
11+
owners:
12+
- type: DEVELOPER
13+
users:
14+
- dev1
15+
groups:
16+
- engineering
17+
- type: DATAOWNER
18+
users:
19+
- owner1
20+
groups:
21+
- data_stewards
22+
- type: PRODUCER
23+
users:
24+
- producer1
25+
terms:
26+
- name: Mixed Ownership
27+
description: Term with different owner types
28+
owners:
29+
- type: STAKEHOLDER
30+
users:
31+
- stakeholder1
32+
groups:
33+
- business
34+
- type: DEVELOPER
35+
users:
36+
- dev2
37+
- type: DATAOWNER
38+
groups:
39+
- compliance

0 commit comments

Comments
 (0)