15
15
)
16
16
from datahub .metadata .schema_classes import (
17
17
AuditStampClass ,
18
+ DataPlatformInstanceClass ,
18
19
DataProcessInstanceRunEventClass ,
19
20
DataProcessInstanceRunResultClass ,
20
21
DataProcessRunStatusClass ,
21
22
DataProcessTypeClass ,
23
+ SubTypesClass ,
24
+ ContainerClass ,
22
25
)
26
+ from datahub .metadata .urns import DataPlatformInstanceUrn , DataPlatformUrn , ContainerUrn
23
27
from datahub .utilities .str_enum import StrEnum
24
28
from datahub .utilities .urns .data_flow_urn import DataFlowUrn
25
29
from datahub .utilities .urns .data_job_urn import DataJobUrn
26
30
from datahub .utilities .urns .data_process_instance_urn import DataProcessInstanceUrn
27
31
from datahub .utilities .urns .dataset_urn import DatasetUrn
32
+ from datahub .emitter .mcp_builder import ContainerKey
28
33
29
34
30
35
class DataProcessInstanceKey (DatahubKey ):
@@ -61,7 +66,9 @@ class DataProcessInstance:
61
66
orchestrator : str
62
67
cluster : Optional [str ] = None
63
68
type : str = DataProcessTypeClass .BATCH_SCHEDULED
64
- template_urn : Optional [Union [DataJobUrn , DataFlowUrn , DatasetUrn ]] = None
69
+ template_urn : Optional [
70
+ Union [DataJobUrn , DataFlowUrn , DatasetUrn , ContainerUrn ]
71
+ ] = None
65
72
parent_instance : Optional [DataProcessInstanceUrn ] = None
66
73
properties : Dict [str , str ] = field (default_factory = dict )
67
74
url : Optional [str ] = None
@@ -71,6 +78,10 @@ class DataProcessInstance:
71
78
_template_object : Optional [Union [DataJob , DataFlow ]] = field (
72
79
init = False , default = None , repr = False
73
80
)
81
+ data_platform : Optional [str ] = None
82
+ data_plaform_instance : Optional [str ] = None
83
+ subtype : Optional [str ] = None
84
+ container_urn : Optional [str ] = None
74
85
75
86
def __post_init__ (self ):
76
87
self .urn = DataProcessInstanceUrn (
@@ -80,6 +91,36 @@ def __post_init__(self):
80
91
id = self .id ,
81
92
).guid ()
82
93
)
94
+ if self .data_platform is None :
95
+ self .data_platform = self .orchestrator
96
+
97
+ try :
98
+ # We first try to create from string assuming its an urn
99
+ self .data_platform = str (
100
+ DataPlatformUrn .create_from_string (self .data_platform )
101
+ )
102
+ except Exception :
103
+ # If it fails, we assume its an id
104
+ self .data_platform = str (DataPlatformUrn .create_from_id (self .data_platform ))
105
+
106
+ if self .data_plaform_instance is None and self .cluster is not None :
107
+ self .data_plaform_instance = self .cluster
108
+
109
+ if self .data_plaform_instance is not None :
110
+ try :
111
+ # We first try to create from string assuming its an urn
112
+ self .data_plaform_instance = str (
113
+ DataPlatformInstanceUrn .create_from_string (
114
+ self .data_plaform_instance
115
+ )
116
+ )
117
+ except Exception :
118
+ # If it fails, we assume its an id
119
+ self .data_plaform_instance = str (
120
+ DataPlatformInstanceUrn (
121
+ platform = self .data_platform , instance = self .data_plaform_instance
122
+ )
123
+ )
83
124
84
125
def start_event_mcp (
85
126
self , start_timestamp_millis : int , attempt : Optional [int ] = None
@@ -269,6 +310,29 @@ def generate_mcp(
269
310
)
270
311
yield mcp
271
312
313
+ assert self .data_platform
314
+
315
+ mcp = MetadataChangeProposalWrapper (
316
+ entityUrn = str (self .urn ),
317
+ aspect = DataPlatformInstanceClass (
318
+ platform = self .data_platform , instance = self .data_plaform_instance
319
+ ),
320
+ )
321
+ yield mcp
322
+
323
+ if self .subtype :
324
+ mcp = MetadataChangeProposalWrapper (
325
+ entityUrn = str (self .urn ), aspect = SubTypesClass (typeNames = [self .subtype ])
326
+ )
327
+ yield mcp
328
+
329
+ if self .container_urn :
330
+ mcp = MetadataChangeProposalWrapper (
331
+ entityUrn = str (self .urn ),
332
+ aspect = ContainerClass (container = self .container_urn ),
333
+ )
334
+ yield mcp
335
+
272
336
yield from self .generate_inlet_outlet_mcp (materialize_iolets = materialize_iolets )
273
337
274
338
@staticmethod
@@ -331,6 +395,31 @@ def from_datajob(
331
395
dpi .outlets = datajob .outlets
332
396
return dpi
333
397
398
+ @staticmethod
399
+ def from_container (
400
+ container_key : ContainerKey ,
401
+ id : str ,
402
+ ) -> "DataProcessInstance" :
403
+ """
404
+ Generates DataProcessInstance from a Container
405
+
406
+ :param datajob: (DataJob) the datajob from generate the DataProcessInstance
407
+ :param id: (str) the id for the DataProcessInstance
408
+ :param clone_inlets: (bool) whether to clone datajob's inlets
409
+ :param clone_outlets: (bool) whether to clone datajob's outlets
410
+ :return: DataProcessInstance
411
+ """
412
+ dpi : DataProcessInstance = DataProcessInstance (
413
+ id = id ,
414
+ orchestrator = DataPlatformUrn .from_string (
415
+ container_key .platform
416
+ ).platform_name ,
417
+ template_urn = None ,
418
+ container_urn = container_key .as_urn (),
419
+ )
420
+
421
+ return dpi
422
+
334
423
@staticmethod
335
424
def from_dataflow (dataflow : DataFlow , id : str ) -> "DataProcessInstance" :
336
425
"""
0 commit comments