Skip to content

Commit 874c683

Browse files
authoredJan 31, 2024··
fix(ingest/transformer): generate ownership aspect from handle_end_of_stream (#9720)
1 parent f3afdf9 commit 874c683

File tree

3 files changed

+58
-17
lines changed

3 files changed

+58
-17
lines changed
 

‎metadata-ingestion/src/datahub/ingestion/transformer/base_transformer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
def _update_work_unit_id(
2121
envelope: RecordEnvelope, urn: str, aspect_name: str
2222
) -> Dict[Any, Any]:
23-
structured_urn = Urn.create_from_string(urn)
24-
simple_name = "-".join(structured_urn.get_entity_id())
23+
structured_urn = Urn.from_string(urn)
24+
simple_name = "-".join(structured_urn.entity_ids)
2525
record_metadata = envelope.metadata.copy()
2626
record_metadata.update({"workunit_id": f"txform-{simple_name}-{aspect_name}"})
2727
return record_metadata

‎metadata-ingestion/src/datahub/ingestion/transformer/extract_ownership_from_tags.py

+35-11
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
1+
import logging
12
import re
23
from functools import lru_cache
3-
from typing import List, Optional, cast
4+
from typing import List, Optional, Sequence, Union, cast
45

56
from datahub.configuration.common import TransformerSemanticsConfigModel
67
from datahub.emitter.mce_builder import Aspect
8+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
79
from datahub.ingestion.api.common import PipelineContext
810
from datahub.ingestion.transformer.dataset_transformer import DatasetTagsTransformer
11+
from datahub.metadata._schema_classes import MetadataChangeProposalClass
912
from datahub.metadata.schema_classes import (
1013
GlobalTagsClass,
1114
OwnerClass,
@@ -16,6 +19,8 @@
1619
from datahub.utilities.urns.corpuser_urn import CorpuserUrn
1720
from datahub.utilities.urns.tag_urn import TagUrn
1821

22+
logger = logging.getLogger(__name__)
23+
1924

2025
class ExtractOwnersFromTagsConfig(TransformerSemanticsConfigModel):
2126
tag_prefix: str
@@ -38,11 +43,13 @@ class ExtractOwnersFromTagsTransformer(DatasetTagsTransformer):
3843

3944
ctx: PipelineContext
4045
config: ExtractOwnersFromTagsConfig
46+
owner_mcps: List[MetadataChangeProposalWrapper]
4147

4248
def __init__(self, config: ExtractOwnersFromTagsConfig, ctx: PipelineContext):
4349
super().__init__()
4450
self.ctx = ctx
4551
self.config = config
52+
self.owner_mcps = []
4653

4754
@classmethod
4855
def create(
@@ -56,6 +63,12 @@ def get_owner_urn(self, owner_str: str) -> str:
5663
return owner_str + "@" + self.config.email_domain
5764
return owner_str
5865

66+
def handle_end_of_stream(
67+
self,
68+
) -> Sequence[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]]:
69+
70+
return self.owner_mcps
71+
5972
def transform_aspect(
6073
self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect]
6174
) -> Optional[Aspect]:
@@ -64,28 +77,39 @@ def transform_aspect(
6477
return None
6578
tags = in_tags_aspect.tags
6679
owners: List[OwnerClass] = []
80+
6781
for tag_class in tags:
6882
tag_urn = TagUrn.from_string(tag_class.tag)
69-
tag_str = tag_urn.get_entity_id()[0]
83+
tag_str = tag_urn.entity_ids[0]
7084
re_match = re.search(self.config.tag_prefix, tag_str)
7185
if re_match:
7286
owner_str = tag_str[re_match.end() :].strip()
7387
owner_urn_str = self.get_owner_urn(owner_str)
7488
if self.config.is_user:
75-
owner_urn = str(CorpuserUrn.create_from_id(owner_urn_str))
89+
owner_urn = str(CorpuserUrn(owner_urn_str))
7690
else:
77-
owner_urn = str(CorpGroupUrn.create_from_id(owner_urn_str))
91+
owner_urn = str(CorpGroupUrn(owner_urn_str))
7892
owner_type = get_owner_type(self.config.owner_type)
7993
if owner_type == OwnershipTypeClass.CUSTOM:
8094
assert (
8195
self.config.owner_type_urn is not None
8296
), "owner_type_urn must be set if owner_type is CUSTOM"
83-
owner = OwnerClass(
84-
owner=owner_urn,
85-
type=owner_type,
86-
typeUrn=self.config.owner_type_urn,
97+
98+
owners.append(
99+
OwnerClass(
100+
owner=owner_urn,
101+
type=owner_type,
102+
typeUrn=self.config.owner_type_urn,
103+
)
87104
)
88-
owners.append(owner)
89105

90-
owner_aspect = OwnershipClass(owners=owners)
91-
return cast(Aspect, owner_aspect)
106+
self.owner_mcps.append(
107+
MetadataChangeProposalWrapper(
108+
entityUrn=entity_urn,
109+
aspect=OwnershipClass(
110+
owners=owners,
111+
),
112+
)
113+
)
114+
115+
return None

‎metadata-ingestion/tests/unit/test_transform_dataset.py

+21-4
Original file line numberDiff line numberDiff line change
@@ -648,22 +648,35 @@ def _test_owner(
648648
)
649649
]
650650
)
651+
651652
transformer = ExtractOwnersFromTagsTransformer.create(
652653
config,
653654
PipelineContext(run_id="test"),
654655
)
655-
transformed = list(
656+
657+
record_envelops: List[RecordEnvelope] = list(
656658
transformer.transform(
657659
[
658660
RecordEnvelope(dataset, metadata={}),
661+
RecordEnvelope(record=EndOfStream(), metadata={}),
659662
]
660663
)
661664
)
662-
owners_aspect = transformed[0].record.proposedSnapshot.aspects[0]
665+
666+
assert len(record_envelops) == 3
667+
668+
mcp: MetadataChangeProposalWrapper = record_envelops[1].record
669+
670+
owners_aspect = cast(OwnershipClass, mcp.aspect)
671+
663672
owners = owners_aspect.owners
673+
664674
owner = owners[0]
665-
if expected_owner_type is not None:
666-
assert owner.type == expected_owner_type
675+
676+
assert expected_owner_type is not None
677+
678+
assert owner.type == expected_owner_type
679+
667680
assert owner.owner == expected_owner
668681

669682
_test_owner(
@@ -672,13 +685,15 @@ def _test_owner(
672685
"tag_prefix": "owner:",
673686
},
674687
expected_owner="urn:li:corpuser:foo",
688+
expected_owner_type=OwnershipTypeClass.TECHNICAL_OWNER,
675689
)
676690
_test_owner(
677691
tag="abcdef-owner:foo",
678692
config={
679693
"tag_prefix": ".*owner:",
680694
},
681695
expected_owner="urn:li:corpuser:foo",
696+
expected_owner_type=OwnershipTypeClass.TECHNICAL_OWNER,
682697
)
683698
_test_owner(
684699
tag="owner:foo",
@@ -687,6 +702,7 @@ def _test_owner(
687702
"is_user": False,
688703
},
689704
expected_owner="urn:li:corpGroup:foo",
705+
expected_owner_type=OwnershipTypeClass.TECHNICAL_OWNER,
690706
)
691707
_test_owner(
692708
tag="owner:foo",
@@ -695,6 +711,7 @@ def _test_owner(
695711
"email_domain": "example.com",
696712
},
697713
expected_owner="urn:li:corpuser:foo@example.com",
714+
expected_owner_type=OwnershipTypeClass.TECHNICAL_OWNER,
698715
)
699716
_test_owner(
700717
tag="owner:foo",

0 commit comments

Comments
 (0)
Please sign in to comment.