Skip to content

Commit 6097820

Browse files
authored
feat(sdk): add support for institutional memory links (#12770)
1 parent 3b4f383 commit 6097820

9 files changed

+327
-14
lines changed

metadata-ingestion/src/datahub/sdk/_shared.py

+87-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
Callable,
88
List,
99
Optional,
10+
Sequence,
1011
Tuple,
1112
Union,
1213
)
@@ -49,6 +50,8 @@
4950

5051
ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
5152

53+
_DEFAULT_ACTOR_URN = CorpUserUrn("__ingestion").urn()
54+
5255

5356
def make_time_stamp(ts: Optional[datetime]) -> Optional[models.TimeStampClass]:
5457
if ts is None:
@@ -438,8 +441,7 @@ def _parse_glossary_term_association_class(
438441
def _terms_audit_stamp(self) -> models.AuditStampClass:
439442
return models.AuditStampClass(
440443
time=0,
441-
# TODO figure out what to put here
442-
actor=CorpUserUrn("__ingestion").urn(),
444+
actor=_DEFAULT_ACTOR_URN,
443445
)
444446

445447
def set_terms(self, terms: TermsInputType) -> None:
@@ -493,3 +495,86 @@ def domain(self) -> Optional[DomainUrn]:
493495
def set_domain(self, domain: DomainInputType) -> None:
494496
domain_urn = DomainUrn.from_string(domain) # basically a type assertion
495497
self._set_aspect(models.DomainsClass(domains=[str(domain_urn)]))
498+
499+
500+
LinkInputType: TypeAlias = Union[
501+
str,
502+
Tuple[str, str], # url, description
503+
models.InstitutionalMemoryMetadataClass,
504+
]
505+
LinksInputType: TypeAlias = Sequence[LinkInputType]
506+
507+
508+
class HasInstitutionalMemory(Entity):
509+
__slots__ = ()
510+
511+
# Internally the aspect is called institutionalMemory, and so much of the code
512+
# uses that name. However, the public-facing API is called "links", since
513+
# that's what we call these in the UI.
514+
515+
def _ensure_institutional_memory(
516+
self,
517+
) -> List[models.InstitutionalMemoryMetadataClass]:
518+
return self._setdefault_aspect(
519+
models.InstitutionalMemoryClass(elements=[])
520+
).elements
521+
522+
@property
523+
def links(self) -> Optional[List[models.InstitutionalMemoryMetadataClass]]:
524+
if institutional_memory := self._get_aspect(models.InstitutionalMemoryClass):
525+
return institutional_memory.elements
526+
return None
527+
528+
@classmethod
529+
def _institutional_memory_audit_stamp(self) -> models.AuditStampClass:
530+
return models.AuditStampClass(
531+
time=0,
532+
actor=_DEFAULT_ACTOR_URN,
533+
)
534+
535+
@classmethod
536+
def _parse_link_association_class(
537+
cls, link: LinkInputType
538+
) -> models.InstitutionalMemoryMetadataClass:
539+
if isinstance(link, models.InstitutionalMemoryMetadataClass):
540+
return link
541+
elif isinstance(link, str):
542+
return models.InstitutionalMemoryMetadataClass(
543+
url=link,
544+
description=link,
545+
createStamp=cls._institutional_memory_audit_stamp(),
546+
)
547+
elif isinstance(link, tuple) and len(link) == 2:
548+
url, description = link
549+
return models.InstitutionalMemoryMetadataClass(
550+
url=url,
551+
description=description,
552+
createStamp=cls._institutional_memory_audit_stamp(),
553+
)
554+
else:
555+
assert_never(link)
556+
557+
def set_links(self, links: LinksInputType) -> None:
558+
self._set_aspect(
559+
models.InstitutionalMemoryClass(
560+
elements=[self._parse_link_association_class(link) for link in links]
561+
)
562+
)
563+
564+
@classmethod
565+
def _link_key(self, link: models.InstitutionalMemoryMetadataClass) -> str:
566+
return link.url
567+
568+
def add_link(self, link: LinkInputType) -> None:
569+
add_list_unique(
570+
self._ensure_institutional_memory(),
571+
self._link_key,
572+
self._parse_link_association_class(link),
573+
)
574+
575+
def remove_link(self, link: LinkInputType) -> None:
576+
remove_list_unique(
577+
self._ensure_institutional_memory(),
578+
self._link_key,
579+
self._parse_link_association_class(link),
580+
)

metadata-ingestion/src/datahub/sdk/container.py

+6
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@
2020
DomainInputType,
2121
HasContainer,
2222
HasDomain,
23+
HasInstitutionalMemory,
2324
HasOwnership,
2425
HasPlatformInstance,
2526
HasSubtype,
2627
HasTags,
2728
HasTerms,
29+
LinksInputType,
2830
OwnersInputType,
2931
ParentContainerInputType,
3032
TagsInputType,
@@ -41,6 +43,7 @@ class Container(
4143
HasSubtype,
4244
HasContainer,
4345
HasOwnership,
46+
HasInstitutionalMemory,
4447
HasTags,
4548
HasTerms,
4649
HasDomain,
@@ -71,6 +74,7 @@ def __init__(
7174
parent_container: Auto | ParentContainerInputType | None = auto,
7275
subtype: Optional[str] = None,
7376
owners: Optional[OwnersInputType] = None,
77+
links: Optional[LinksInputType] = None,
7478
tags: Optional[TagsInputType] = None,
7579
terms: Optional[TermsInputType] = None,
7680
domain: Optional[DomainInputType] = None,
@@ -133,6 +137,8 @@ def __init__(
133137
self.set_subtype(subtype)
134138
if owners is not None:
135139
self.set_owners(owners)
140+
if links is not None:
141+
self.set_links(links)
136142
if tags is not None:
137143
self.set_tags(tags)
138144
if terms is not None:

metadata-ingestion/src/datahub/sdk/dataset.py

+6
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,13 @@
2323
DomainInputType,
2424
HasContainer,
2525
HasDomain,
26+
HasInstitutionalMemory,
2627
HasOwnership,
2728
HasPlatformInstance,
2829
HasSubtype,
2930
HasTags,
3031
HasTerms,
32+
LinksInputType,
3133
OwnersInputType,
3234
ParentContainerInputType,
3335
TagInputType,
@@ -422,6 +424,7 @@ class Dataset(
422424
HasSubtype,
423425
HasContainer,
424426
HasOwnership,
427+
HasInstitutionalMemory,
425428
HasTags,
426429
HasTerms,
427430
HasDomain,
@@ -453,6 +456,7 @@ def __init__(
453456
parent_container: ParentContainerInputType | Unset = unset,
454457
subtype: Optional[str] = None,
455458
owners: Optional[OwnersInputType] = None,
459+
links: Optional[LinksInputType] = None,
456460
tags: Optional[TagsInputType] = None,
457461
terms: Optional[TermsInputType] = None,
458462
# TODO structured_properties
@@ -499,6 +503,8 @@ def __init__(
499503
self.set_subtype(subtype)
500504
if owners is not None:
501505
self.set_owners(owners)
506+
if links is not None:
507+
self.set_links(links)
502508
if tags is not None:
503509
self.set_tags(tags)
504510
if terms is not None:

metadata-ingestion/tests/unit/sdk_v2/container_golden/test_container_complex_golden.json

+20
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,26 @@
104104
}
105105
}
106106
},
107+
{
108+
"entityType": "container",
109+
"entityUrn": "urn:li:container:66c5ac35f0bfc521dee6f7d9533a8056",
110+
"changeType": "UPSERT",
111+
"aspectName": "institutionalMemory",
112+
"aspect": {
113+
"json": {
114+
"elements": [
115+
{
116+
"url": "https://example.com/doc1",
117+
"description": "https://example.com/doc1",
118+
"createStamp": {
119+
"time": 0,
120+
"actor": "urn:li:corpuser:__ingestion"
121+
}
122+
}
123+
]
124+
}
125+
}
126+
},
107127
{
108128
"entityType": "container",
109129
"entityUrn": "urn:li:container:66c5ac35f0bfc521dee6f7d9533a8056",

metadata-ingestion/tests/unit/sdk_v2/dataset_golden/test_dataset_complex_golden.json

+28
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,34 @@
175175
}
176176
}
177177
},
178+
{
179+
"entityType": "dataset",
180+
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_instance.my_db.my_schema.my_table,PROD)",
181+
"changeType": "UPSERT",
182+
"aspectName": "institutionalMemory",
183+
"aspect": {
184+
"json": {
185+
"elements": [
186+
{
187+
"url": "https://example.com/doc1",
188+
"description": "https://example.com/doc1",
189+
"createStamp": {
190+
"time": 0,
191+
"actor": "urn:li:corpuser:__ingestion"
192+
}
193+
},
194+
{
195+
"url": "https://example.com/doc2",
196+
"description": "Documentation 2",
197+
"createStamp": {
198+
"time": 0,
199+
"actor": "urn:li:corpuser:__ingestion"
200+
}
201+
}
202+
]
203+
}
204+
}
205+
},
178206
{
179207
"entityType": "dataset",
180208
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_instance.my_db.my_schema.my_table,PROD)",

metadata-ingestion/tests/unit/sdk_v2/dataset_golden/test_dataset_ingestion_golden.json

+28
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,34 @@
182182
}
183183
}
184184
},
185+
{
186+
"entityType": "dataset",
187+
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_instance.my_db.my_schema.my_table,PROD)",
188+
"changeType": "UPSERT",
189+
"aspectName": "institutionalMemory",
190+
"aspect": {
191+
"json": {
192+
"elements": [
193+
{
194+
"url": "https://example.com/doc1",
195+
"description": "https://example.com/doc1",
196+
"createStamp": {
197+
"time": 0,
198+
"actor": "urn:li:corpuser:__ingestion"
199+
}
200+
},
201+
{
202+
"url": "https://example.com/doc2",
203+
"description": "Documentation 2",
204+
"createStamp": {
205+
"time": 0,
206+
"actor": "urn:li:corpuser:__ingestion"
207+
}
208+
}
209+
]
210+
}
211+
}
212+
},
185213
{
186214
"entityType": "dataset",
187215
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:snowflake,my_instance.my_db.my_schema.my_table,PROD)",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
[
2+
{
3+
"entityType": "dataset",
4+
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,proj.dataset.table,PROD)",
5+
"changeType": "UPSERT",
6+
"aspectName": "dataPlatformInstance",
7+
"aspect": {
8+
"json": {
9+
"platform": "urn:li:dataPlatform:bigquery"
10+
}
11+
}
12+
},
13+
{
14+
"entityType": "dataset",
15+
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,proj.dataset.table,PROD)",
16+
"changeType": "UPSERT",
17+
"aspectName": "schemaMetadata",
18+
"aspect": {
19+
"json": {
20+
"schemaName": "",
21+
"platform": "urn:li:dataPlatform:bigquery",
22+
"version": 0,
23+
"created": {
24+
"time": 0,
25+
"actor": "urn:li:corpuser:unknown"
26+
},
27+
"lastModified": {
28+
"time": 0,
29+
"actor": "urn:li:corpuser:unknown"
30+
},
31+
"hash": "",
32+
"platformSchema": {
33+
"com.linkedin.schema.Schemaless": {}
34+
},
35+
"fields": [
36+
{
37+
"fieldPath": "field1",
38+
"nullable": false,
39+
"type": {
40+
"type": {
41+
"com.linkedin.schema.StringType": {}
42+
}
43+
},
44+
"nativeDataType": "string",
45+
"recursive": false,
46+
"isPartOfKey": false
47+
},
48+
{
49+
"fieldPath": "field2",
50+
"nullable": false,
51+
"description": "field2 description",
52+
"type": {
53+
"type": {
54+
"com.linkedin.schema.NullType": {}
55+
}
56+
},
57+
"nativeDataType": "int64",
58+
"recursive": false,
59+
"isPartOfKey": false
60+
}
61+
]
62+
}
63+
}
64+
},
65+
{
66+
"entityType": "dataset",
67+
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:bigquery,proj.dataset.table,PROD)",
68+
"changeType": "UPSERT",
69+
"aspectName": "institutionalMemory",
70+
"aspect": {
71+
"json": {
72+
"elements": [
73+
{
74+
"url": "https://example.com/doc2",
75+
"description": "Documentation 2",
76+
"createStamp": {
77+
"time": 0,
78+
"actor": "urn:li:corpuser:__ingestion"
79+
}
80+
},
81+
{
82+
"url": "https://example.com/doc3",
83+
"description": "Documentation 3",
84+
"createStamp": {
85+
"time": 0,
86+
"actor": "urn:li:corpuser:__ingestion"
87+
}
88+
}
89+
]
90+
}
91+
}
92+
}
93+
]

0 commit comments

Comments
 (0)