Skip to content

Commit 5d5682f

Browse files
committed
fix linting
1 parent f94a8aa commit 5d5682f

File tree

5 files changed

+221
-124
lines changed

5 files changed

+221
-124
lines changed

metadata-ingestion/examples/ai/data_job_instance.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -152,4 +152,4 @@ def generate_pipeline(
152152
for mcp in generate_pipeline(
153153
"training_pipeline_airflow", orchestrator=ORCHESTRATOR_AIRFLOW
154154
):
155-
graph.emit(mcp)
155+
graph.emit(mcp)

metadata-ingestion/examples/ai/demo_script.py

+74-22
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,12 @@
1313
from datahub.api.entities.dataset.dataset import Dataset
1414
from datahub.emitter.mcp import MetadataChangeProposalWrapper
1515
from datahub.ingestion.graph.client import get_default_graph
16-
from datahub.metadata.urns import DatasetUrn, DataPlatformUrn, MlModelGroupUrn, MlModelUrn
16+
from datahub.metadata.urns import (
17+
DatasetUrn,
18+
DataPlatformUrn,
19+
MlModelGroupUrn,
20+
MlModelUrn,
21+
)
1722
from datahub.emitter.mcp_builder import ContainerKey
1823

1924
ORCHESTRATOR_MLFLOW = "mlflow"
@@ -133,7 +138,9 @@ def generate_pipeline(
133138

134139
yield from experiment.generate_mcp()
135140

136-
model_group_urn = MlModelGroupUrn(platform="mlflow", name="airline_forecast_models")
141+
model_group_urn = MlModelGroupUrn(
142+
platform="mlflow", name="airline_forecast_models"
143+
)
137144
current_time = int(time.time() * 1000)
138145
model_group_info = models.MLModelGroupPropertiesClass(
139146
description="ML models for airline passenger forecasting",
@@ -153,8 +160,20 @@ def generate_pipeline(
153160

154161
print("model_group_urn: ", model_group_urn)
155162

156-
model_aliases = ["challenger", "champion", "production", "experimental", "deprecated"]
157-
model_tags = ["stage:production", "stage:development", "team:data_science", "team:ml_engineering", "team:analytics"]
163+
model_aliases = [
164+
"challenger",
165+
"champion",
166+
"production",
167+
"experimental",
168+
"deprecated",
169+
]
170+
model_tags = [
171+
"stage:production",
172+
"stage:development",
173+
"team:data_science",
174+
"team:ml_engineering",
175+
"team:analytics",
176+
]
158177

159178
model_dict = {
160179
"arima_model_1": "ARIMA model for airline passenger forecasting",
@@ -166,21 +185,45 @@ def generate_pipeline(
166185

167186
# Generate run timestamps within the last month
168187
end_time = int(time.time() * 1000) # Current timestamp in milliseconds
169-
start_time = end_time - (30 * 24 * 60 * 60 * 1000) # 30 days ago in milliseconds
188+
start_time = end_time - (
189+
30 * 24 * 60 * 60 * 1000
190+
) # 30 days ago in milliseconds
170191
run_timestamps = [
171192
start_time + (i * 5 * 24 * 60 * 60 * 1000) # 5-day intervals
172193
for i in range(5)
173194
]
174195

175196
run_dict = {
176-
"run_1": {"start_time": run_timestamps[0], "duration": 45, "result": InstanceRunResult.SUCCESS},
177-
"run_2": {"start_time": run_timestamps[1], "duration": 60, "result": InstanceRunResult.FAILURE},
178-
"run_3": {"start_time": run_timestamps[2], "duration": 55, "result": InstanceRunResult.SUCCESS},
179-
"run_4": {"start_time": run_timestamps[3], "duration": 70, "result": InstanceRunResult.SUCCESS},
180-
"run_5": {"start_time": run_timestamps[4], "duration": 50, "result": InstanceRunResult.FAILURE},
197+
"run_1": {
198+
"start_time": run_timestamps[0],
199+
"duration": 45,
200+
"result": InstanceRunResult.SUCCESS,
201+
},
202+
"run_2": {
203+
"start_time": run_timestamps[1],
204+
"duration": 60,
205+
"result": InstanceRunResult.FAILURE,
206+
},
207+
"run_3": {
208+
"start_time": run_timestamps[2],
209+
"duration": 55,
210+
"result": InstanceRunResult.SUCCESS,
211+
},
212+
"run_4": {
213+
"start_time": run_timestamps[3],
214+
"duration": 70,
215+
"result": InstanceRunResult.SUCCESS,
216+
},
217+
"run_5": {
218+
"start_time": run_timestamps[4],
219+
"duration": 50,
220+
"result": InstanceRunResult.FAILURE,
221+
},
181222
}
182223

183-
for i, (model_name, model_description) in enumerate(model_dict.items(), start=1):
224+
for i, (model_name, model_description) in enumerate(
225+
model_dict.items(), start=1
226+
):
184227
run_id = f"run_{i}"
185228
data_process_instance = DataProcessInstance.from_container(
186229
container_key=experiment.key, id=run_id
@@ -206,34 +249,36 @@ def generate_pipeline(
206249
models.MLMetricClass(
207250
name="accuracy",
208251
value=str(random.uniform(0.7, 0.99)),
209-
description="Test accuracy"
252+
description="Test accuracy",
210253
),
211254
models.MLMetricClass(
212255
name="f1_score",
213256
value=str(random.uniform(0.7, 0.99)),
214-
description="Test F1 score"
215-
)
257+
description="Test F1 score",
258+
),
216259
]
217260
hyper_params = [
218261
models.MLHyperParamClass(
219262
name="n_estimators",
220263
value=str(random.randint(50, 200)),
221-
description="Number of trees"
264+
description="Number of trees",
222265
),
223266
models.MLHyperParamClass(
224267
name="max_depth",
225268
value=str(random.randint(5, 15)),
226-
description="Maximum tree depth"
227-
)
269+
description="Maximum tree depth",
270+
),
228271
]
229272

230273
# DPI properties
231274
created_at = int(time.time() * 1000)
232275
print(start_time)
233276
dpi_props = models.DataProcessInstancePropertiesClass(
234277
name=f"Training {run_id}",
235-
created=models.AuditStampClass(time=created_at, actor="urn:li:corpuser:datahub"),
236-
createdAt=int(created_at/1000),
278+
created=models.AuditStampClass(
279+
time=created_at, actor="urn:li:corpuser:datahub"
280+
),
281+
createdAt=int(created_at / 1000),
237282
createdBy="jane_doe",
238283
loggedModels=["sklearn"],
239284
artifactsLocation="s3://mlflow/artifacts",
@@ -261,7 +306,9 @@ def generate_pipeline(
261306
duration_minutes = run_dict[run_id]["duration"]
262307
end_time_millis = start_time_millis + duration_minutes * 60000
263308
result = run_dict[run_id]["result"]
264-
result_type = "SUCCESS" if result == InstanceRunResult.SUCCESS else "FAILURE"
309+
result_type = (
310+
"SUCCESS" if result == InstanceRunResult.SUCCESS else "FAILURE"
311+
)
265312

266313
yield from data_process_instance.start_event_mcp(
267314
start_timestamp_millis=start_time_millis
@@ -275,7 +322,12 @@ def generate_pipeline(
275322

276323
print("data_process_instance.urn: ", data_process_instance.urn)
277324
print("start Time:", start_time_millis)
278-
print("start Time:", time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time_millis/1000)))
325+
print(
326+
"start Time:",
327+
time.strftime(
328+
"%Y-%m-%d %H:%M:%S", time.localtime(start_time_millis / 1000)
329+
),
330+
)
279331

280332
# Model
281333
selected_aliases = random.sample(model_aliases, k=random.randint(1, 2))
@@ -309,4 +361,4 @@ def generate_pipeline(
309361
for mcp in generate_pipeline(
310362
"airline_forecast_pipeline_airflow", orchestrator=ORCHESTRATOR_AIRFLOW
311363
):
312-
graph.emit(mcp)
364+
graph.emit(mcp)

metadata-ingestion/src/datahub/api/entities/dataprocess/dataprocess_instance.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
DataProcessTypeClass,
2222
TimeStampClass,
2323
SubTypesClass,
24-
ContainerClass
24+
ContainerClass,
2525
)
2626
from datahub.metadata.urns import DataPlatformInstanceUrn, DataPlatformUrn, ContainerUrn
2727
from datahub.utilities.str_enum import StrEnum
@@ -66,7 +66,9 @@ class DataProcessInstance:
6666
orchestrator: str
6767
cluster: Optional[str] = None
6868
type: str = DataProcessTypeClass.BATCH_SCHEDULED
69-
template_urn: Optional[Union[DataJobUrn, DataFlowUrn, DatasetUrn, ContainerUrn]] = None
69+
template_urn: Optional[
70+
Union[DataJobUrn, DataFlowUrn, DatasetUrn, ContainerUrn]
71+
] = None
7072
parent_instance: Optional[DataProcessInstanceUrn] = None
7173
properties: Dict[str, str] = field(default_factory=dict)
7274
url: Optional[str] = None
@@ -409,9 +411,11 @@ def from_container(
409411
"""
410412
dpi: DataProcessInstance = DataProcessInstance(
411413
id=id,
412-
orchestrator=DataPlatformUrn.from_string(container_key.platform).platform_name,
414+
orchestrator=DataPlatformUrn.from_string(
415+
container_key.platform
416+
).platform_name,
413417
template_urn=None,
414-
container_urn = container_key.as_urn(),
418+
container_urn=container_key.as_urn(),
415419
)
416420

417421
return dpi
@@ -461,4 +465,4 @@ def generate_inlet_outlet_mcp(
461465
yield MetadataChangeProposalWrapper(
462466
entityUrn=str(iolet),
463467
aspect=iolet.to_key_aspect(),
464-
)
468+
)

metadata-ingestion/src/datahub/emitter/mcp.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,10 @@ class MetadataChangeProposalWrapper:
6363

6464
entityType: str = _ENTITY_TYPE_UNSET
6565
changeType: Union[str, ChangeTypeClass] = ChangeTypeClass.UPSERT
66-
entityUrn: Union[None, str,] = None
66+
entityUrn: Union[
67+
None,
68+
str,
69+
] = None
6770
entityKeyAspect: Union[None, _Aspect] = None
6871
auditHeader: Union[None, KafkaAuditHeaderClass] = None
6972
aspectName: Union[None, str] = None
@@ -108,7 +111,7 @@ def construct_many(
108111
) -> List["MetadataChangeProposalWrapper"]:
109112
return [cls(entityUrn=entityUrn, aspect=aspect) for aspect in aspects if aspect]
110113

111-
def _make_mcp_without_aspects(self) -> MetadataChangeProposalClass:
114+
def _make_mcp_without_aspects(self) -> MetadataChangeProposalClass:
112115
assert self.entityUrn is None or isinstance(self.entityUrn, str)
113116
return MetadataChangeProposalClass(
114117
entityType=self.entityType,

0 commit comments

Comments
 (0)