Skip to content

Commit 2e50b0d

Browse files
committed
fix mlflow connector
1 parent 08044cc commit 2e50b0d

File tree

1 file changed

+10
-24
lines changed
  • metadata-ingestion/src/datahub/ingestion/source

1 file changed

+10
-24
lines changed

metadata-ingestion/src/datahub/ingestion/source/mlflow.py

+10-24
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
DataProcessInstancePropertiesClass,
3636
DataProcessInstanceRelationshipsClass,
3737
ContainerPropertiesClass,
38-
AuditStampClass,
3938
TimeStampClass,
4039
DataProcessRunStatusClass,
4140
SubTypesClass,
@@ -161,9 +160,10 @@ def get_report(self) -> SourceReport:
161160
return self.report
162161

163162
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
163+
print("get_workunits_internal")
164164
yield from self._get_tags_workunits()
165-
yield from self._get_experiment_workunits()
166165
yield from self._get_ml_model_workunits()
166+
# yield from self._get_experiment_workunits()
167167

168168
def _get_tags_workunits(self) -> Iterable[MetadataWorkUnit]:
169169
for stage_info in self.registered_model_stages_info:
@@ -218,7 +218,7 @@ def _get_experiment_container_workunit(self, experiment: Experiment) -> List[Met
218218
subtype="ML Experiment",
219219
name=experiment.name,
220220
description=experiment.tags.get('mlflow.note.content')
221-
)
221+
) # TODO: urn should be experiment id
222222

223223
print("experiment.key.id:", experiment.key.id) # this should be same as container key as urn
224224
print("experiment.key.as_urn(): ", experiment.key.as_urn())
@@ -247,7 +247,6 @@ def _convert_run_result_type(self, status: str) -> DataProcessInstanceRunResultC
247247
return DataProcessInstanceRunResultClass(type="SKIPPED", nativeResultType="mlflow")
248248

249249
def _get_run_workunits(self, experiment: Experiment, run: Run) -> List[MetadataWorkUnit]:
250-
# TODO: this does not map to the correct experiment
251250
experiment_key = ContainerKeyWithId(
252251
platform=str(DataPlatformUrn.create_from_id("mlflow")),
253252
id=experiment.name
@@ -258,6 +257,7 @@ def _get_run_workunits(self, experiment: Experiment, run: Run) -> List[MetadataW
258257
id=run.info.run_name
259258
)
260259

260+
# TODO: urn should be run id
261261

262262
print("dpi id", run.info.run_name)
263263
print("experiment_key.id:", experiment_key.id)
@@ -398,6 +398,7 @@ def _get_ml_group_workunit(
398398
versionTag=self._get_latest_version(registered_model)
399399
),
400400
)
401+
print("ml_model_group_properties: ", ml_model_group_properties)
401402
wu = self._create_workunit(
402403
urn=ml_model_group_urn,
403404
aspect=ml_model_group_properties,
@@ -452,30 +453,15 @@ def _get_ml_model_properties_workunit(
452453
ml_model_group_urn = self._make_ml_model_group_urn(registered_model)
453454
ml_model_urn = self._make_ml_model_urn(model_version)
454455

456+
hyperparams = None
457+
training_metrics = None
458+
training_jobs = None
459+
455460
if run:
456461
# Use the same metrics and hyperparams from the run
457462
hyperparams = self._get_run_params(run)
458463
training_metrics = self._get_run_metrics(run)
459-
460-
# Create proper relationship with the run
461-
462-
# get experiment name from experiment id
463-
experiment_id = run.info.experiment_id
464-
experiment = self.client.get_experiment(experiment_id)
465-
experiment_key = ContainerKeyWithId(
466-
platform=str(DataPlatformUrn.create_from_id("mlflow")),
467-
id=experiment.name
468-
)
469-
470-
data_process_instance = DataProcessInstance.from_container(
471-
container_key=experiment_key,
472-
id=run.info.run_name
473-
)
474-
training_jobs = [str(data_process_instance.urn)]
475-
else:
476-
hyperparams = None
477-
training_metrics = None
478-
training_jobs = None
464+
# training_jobs = [str(builder.make_data_process_instance_urn(run.info.run_id))]
479465

480466
created_time = model_version.creation_timestamp
481467
created_actor = f"urn:li:platformResource:{model_version.user_id}" if model_version.user_id else None

0 commit comments

Comments
 (0)