Skip to content

Commit 8e18be9

Browse files
committed
initial commit
1 parent 4f8c9bb commit 8e18be9

File tree

2 files changed

+433
-91
lines changed

2 files changed

+433
-91
lines changed

metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py

+90-1
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,21 @@
1515
)
1616
from datahub.metadata.schema_classes import (
1717
AuditStampClass,
18+
DataPlatformInstanceClass,
1819
DataProcessInstanceRunEventClass,
1920
DataProcessInstanceRunResultClass,
2021
DataProcessRunStatusClass,
2122
DataProcessTypeClass,
23+
SubTypesClass,
24+
ContainerClass,
2225
)
26+
from datahub.metadata.urns import DataPlatformInstanceUrn, DataPlatformUrn, ContainerUrn
2327
from datahub.utilities.str_enum import StrEnum
2428
from datahub.utilities.urns.data_flow_urn import DataFlowUrn
2529
from datahub.utilities.urns.data_job_urn import DataJobUrn
2630
from datahub.utilities.urns.data_process_instance_urn import DataProcessInstanceUrn
2731
from datahub.utilities.urns.dataset_urn import DatasetUrn
32+
from datahub.emitter.mcp_builder import ContainerKey
2833

2934

3035
class DataProcessInstanceKey(DatahubKey):
@@ -61,7 +66,9 @@ class DataProcessInstance:
6166
orchestrator: str
6267
cluster: Optional[str] = None
6368
type: str = DataProcessTypeClass.BATCH_SCHEDULED
64-
template_urn: Optional[Union[DataJobUrn, DataFlowUrn, DatasetUrn]] = None
69+
template_urn: Optional[
70+
Union[DataJobUrn, DataFlowUrn, DatasetUrn, ContainerUrn]
71+
] = None
6572
parent_instance: Optional[DataProcessInstanceUrn] = None
6673
properties: Dict[str, str] = field(default_factory=dict)
6774
url: Optional[str] = None
@@ -71,6 +78,10 @@ class DataProcessInstance:
7178
_template_object: Optional[Union[DataJob, DataFlow]] = field(
7279
init=False, default=None, repr=False
7380
)
81+
data_platform: Optional[str] = None
82+
data_plaform_instance: Optional[str] = None
83+
subtype: Optional[str] = None
84+
container_urn: Optional[str] = None
7485

7586
def __post_init__(self):
7687
self.urn = DataProcessInstanceUrn(
@@ -80,6 +91,36 @@ def __post_init__(self):
8091
id=self.id,
8192
).guid()
8293
)
94+
if self.data_platform is None:
95+
self.data_platform = self.orchestrator
96+
97+
try:
98+
# We first try to create from string assuming its an urn
99+
self.data_platform = str(
100+
DataPlatformUrn.create_from_string(self.data_platform)
101+
)
102+
except Exception:
103+
# If it fails, we assume its an id
104+
self.data_platform = str(DataPlatformUrn.create_from_id(self.data_platform))
105+
106+
if self.data_plaform_instance is None and self.cluster is not None:
107+
self.data_plaform_instance = self.cluster
108+
109+
if self.data_plaform_instance is not None:
110+
try:
111+
# We first try to create from string assuming its an urn
112+
self.data_plaform_instance = str(
113+
DataPlatformInstanceUrn.create_from_string(
114+
self.data_plaform_instance
115+
)
116+
)
117+
except Exception:
118+
# If it fails, we assume its an id
119+
self.data_plaform_instance = str(
120+
DataPlatformInstanceUrn(
121+
platform=self.data_platform, instance=self.data_plaform_instance
122+
)
123+
)
83124

84125
def start_event_mcp(
85126
self, start_timestamp_millis: int, attempt: Optional[int] = None
@@ -269,6 +310,29 @@ def generate_mcp(
269310
)
270311
yield mcp
271312

313+
assert self.data_platform
314+
315+
mcp = MetadataChangeProposalWrapper(
316+
entityUrn=str(self.urn),
317+
aspect=DataPlatformInstanceClass(
318+
platform=self.data_platform, instance=self.data_plaform_instance
319+
),
320+
)
321+
yield mcp
322+
323+
if self.subtype:
324+
mcp = MetadataChangeProposalWrapper(
325+
entityUrn=str(self.urn), aspect=SubTypesClass(typeNames=[self.subtype])
326+
)
327+
yield mcp
328+
329+
if self.container_urn:
330+
mcp = MetadataChangeProposalWrapper(
331+
entityUrn=str(self.urn),
332+
aspect=ContainerClass(container=self.container_urn),
333+
)
334+
yield mcp
335+
272336
yield from self.generate_inlet_outlet_mcp(materialize_iolets=materialize_iolets)
273337

274338
@staticmethod
@@ -331,6 +395,31 @@ def from_datajob(
331395
dpi.outlets = datajob.outlets
332396
return dpi
333397

398+
@staticmethod
399+
def from_container(
400+
container_key: ContainerKey,
401+
id: str,
402+
) -> "DataProcessInstance":
403+
"""
404+
Generates DataProcessInstance from a Container
405+
406+
:param datajob: (DataJob) the datajob from generate the DataProcessInstance
407+
:param id: (str) the id for the DataProcessInstance
408+
:param clone_inlets: (bool) whether to clone datajob's inlets
409+
:param clone_outlets: (bool) whether to clone datajob's outlets
410+
:return: DataProcessInstance
411+
"""
412+
dpi: DataProcessInstance = DataProcessInstance(
413+
id=id,
414+
orchestrator=DataPlatformUrn.from_string(
415+
container_key.platform
416+
).platform_name,
417+
template_urn=None,
418+
container_urn=container_key.as_urn(),
419+
)
420+
421+
return dpi
422+
334423
@staticmethod
335424
def from_dataflow(dataflow: DataFlow, id: str) -> "DataProcessInstance":
336425
"""

0 commit comments

Comments
 (0)