From fa5b6216123777b3adb84817a0330a7d6d53df7c Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Fri, 3 Jan 2025 13:51:02 +0900 Subject: [PATCH 01/19] update mappers --- .../graphql/resolvers/search/SearchUtils.java | 3 +- .../DataPlatformInstanceAspectMapper.java | 6 + .../mappers/TimeStampToAuditStampMapper.java | 23 ++++ .../common/mappers/UrnToEntityMapper.java | 6 + .../mappers/DataProcessInstanceMapper.java | 124 ++++++++++++++++-- .../DataProcessInstanceType.java | 96 ++++++++++++++ .../types/entitytype/EntityTypeUrnMapper.java | 3 + .../mappers/MLModelPropertiesMapper.java | 9 ++ .../graphql/featureflags/FeatureFlags.java | 1 + 9 files changed, 257 insertions(+), 14 deletions(-) create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapper.java create mode 100644 datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinstance/DataProcessInstanceType.java diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java index a01b3aaec9c982..6aeb7c7a3a94a5 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java @@ -75,7 +75,8 @@ private SearchUtils() {} EntityType.DATA_PRODUCT, EntityType.NOTEBOOK, EntityType.BUSINESS_ATTRIBUTE, - EntityType.SCHEMA_FIELD); + EntityType.SCHEMA_FIELD, + EntityType.DATA_PROCESS_INSTANCE); /** Entities that are part of autocomplete by default in Auto Complete Across Entities */ public static final List AUTO_COMPLETE_ENTITY_TYPES = diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java index 4345819867617b..32b55ebaab8e1c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.types.common.mappers; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DataPlatformInstance; import com.linkedin.datahub.graphql.generated.EntityType; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; @@ -28,6 +29,11 @@ public DataPlatformInstance apply( result.setType(EntityType.DATA_PLATFORM_INSTANCE); result.setUrn(input.getInstance().toString()); } + result.setPlatform( + DataPlatform.builder() + .setUrn(input.getPlatform().toString()) + .setType(EntityType.DATA_PLATFORM) + .build()); return result; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapper.java new file mode 100644 index 00000000000000..55e33bc3f2655d --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapper.java @@ -0,0 +1,23 @@ +package com.linkedin.datahub.graphql.types.common.mappers; + +import com.linkedin.common.TimeStamp; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.AuditStamp; +import javax.annotation.Nullable; + +public class TimeStampToAuditStampMapper { + + public static final TimeStampToAuditStampMapper INSTANCE = new TimeStampToAuditStampMapper(); + + public static AuditStamp map(@Nullable final QueryContext context, @Nullable final TimeStamp input) { + if (input == null) { + return null; + } + final AuditStamp result = new AuditStamp(); + result.setTime(input.getTime()); + if (input.hasActor()) { + result.setActor(input.getActor().toString()); + } + return result; + } +} \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java index 1988cafc486c18..eae33e6da2e56d 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java @@ -18,6 +18,7 @@ import com.linkedin.datahub.graphql.generated.DataJob; import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DataPlatformInstance; +import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.DataProduct; import com.linkedin.datahub.graphql.generated.Dataset; import com.linkedin.datahub.graphql.generated.Domain; @@ -225,6 +226,11 @@ public Entity apply(@Nullable QueryContext context, Urn input) { ((BusinessAttribute) partialEntity).setUrn(input.toString()); ((BusinessAttribute) partialEntity).setType(EntityType.BUSINESS_ATTRIBUTE); } + if (input.getEntityType().equals(DATA_PROCESS_INSTANCE_ENTITY_NAME)) { + partialEntity = new DataProcessInstance(); + ((DataProcessInstance) partialEntity).setUrn(input.toString()); + ((DataProcessInstance) partialEntity).setType(EntityType.DATA_PROCESS_INSTANCE); + } return partialEntity; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java index 7a4d342281fe54..51cdb6209fc2fe 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java @@ -1,26 +1,39 @@ package com.linkedin.datahub.graphql.types.dataprocessinst.mappers; import static com.linkedin.metadata.Constants.*; - +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.SubTypes; +import com.linkedin.common.urn.Urn; import com.linkedin.data.DataMap; import com.linkedin.data.template.RecordTemplate; import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; +import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; +import com.linkedin.datahub.graphql.types.common.mappers.SubTypesMapper; import com.linkedin.datahub.graphql.types.common.mappers.AuditStampMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; import com.linkedin.dataprocess.DataProcessInstanceProperties; +import com.linkedin.datahub.graphql.types.mlmodel.mappers.MLHyperParamMapper; +import com.linkedin.datahub.graphql.types.mlmodel.mappers.MLMetricMapper; +import com.linkedin.ml.metadata.MLTrainingRunProperties; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspectMap; import javax.annotation.Nonnull; import javax.annotation.Nullable; +import java.util.stream.Collectors; +import lombok.extern.slf4j.Slf4j; /** * Maps Pegasus {@link RecordTemplate} objects to objects conforming to the GQL schema. * *

To be replaced by auto-generated mappers implementations */ + +@Slf4j public class DataProcessInstanceMapper implements ModelMapper { public static final DataProcessInstanceMapper INSTANCE = new DataProcessInstanceMapper(); @@ -30,6 +43,18 @@ public static DataProcessInstance map( return INSTANCE.apply(context, entityResponse); } +private void mapContainers( + @Nullable final QueryContext context, + @Nonnull DataProcessInstance dataProcessInstance, + @Nonnull DataMap dataMap) { + final com.linkedin.container.Container gmsContainer = + new com.linkedin.container.Container(dataMap); + dataProcessInstance.setContainer( + com.linkedin.datahub.graphql.generated.Container.builder() + .setType(EntityType.CONTAINER) + .setUrn(gmsContainer.getContainer().toString()) + .build()); +} @Override public DataProcessInstance apply( @Nullable QueryContext context, @Nonnull final EntityResponse entityResponse) { @@ -37,24 +62,97 @@ public DataProcessInstance apply( result.setUrn(entityResponse.getUrn().toString()); result.setType(EntityType.DATA_PROCESS_INSTANCE); + Urn entityUrn = entityResponse.getUrn(); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); + mappingHelper = new MappingHelper<>(aspectMap, result); + mappingHelper.mapToResult( + DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, + (dataProcessInstance, dataMap) -> { + try { + mapTrainingRunProperties(context, dataProcessInstance, dataMap, entityUrn); + } catch (Exception e) { + mapDataProcessProperties(context, dataProcessInstance, dataMap, entityUrn); + } + }); mappingHelper.mapToResult( - context, DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, this::mapDataProcessProperties); + DATA_PLATFORM_INSTANCE_ASPECT_NAME, + (dataProcessInstance, dataMap) -> { + DataPlatformInstance dataPlatformInstance = new DataPlatformInstance(dataMap); + dataProcessInstance.setDataPlatformInstance( + DataPlatformInstanceAspectMapper.map(context, dataPlatformInstance)); + DataPlatform dataPlatform = new DataPlatform(); + dataPlatform.setUrn(dataPlatformInstance.getPlatform().toString()); + dataPlatform.setType(EntityType.DATA_PLATFORM); + dataProcessInstance.setPlatform(dataPlatform); + }); + mappingHelper.mapToResult( + SUB_TYPES_ASPECT_NAME, + (dataProcessInstance, dataMap) -> + dataProcessInstance.setSubTypes(SubTypesMapper.map(context, new SubTypes(dataMap)))); + mappingHelper.mapToResult( + CONTAINER_ASPECT_NAME, + (dataProcessInstance, dataMap) -> mapContainers(context, dataProcessInstance, dataMap)); return mappingHelper.getResult(); } - private void mapDataProcessProperties( - @Nonnull QueryContext context, @Nonnull DataProcessInstance dpi, @Nonnull DataMap dataMap) { - DataProcessInstanceProperties dataProcessInstanceProperties = - new DataProcessInstanceProperties(dataMap); - dpi.setName(dataProcessInstanceProperties.getName()); - if (dataProcessInstanceProperties.hasCreated()) { - dpi.setCreated(AuditStampMapper.map(context, dataProcessInstanceProperties.getCreated())); - } - if (dataProcessInstanceProperties.hasExternalUrl()) { - dpi.setExternalUrl(dataProcessInstanceProperties.getExternalUrl().toString()); - } + private void mapTrainingRunProperties( + @Nonnull QueryContext context, + @Nonnull DataProcessInstance dpi, + @Nonnull DataMap dataMap, + @Nonnull Urn entityUrn) { + MLTrainingRunProperties trainingProperties = new MLTrainingRunProperties(dataMap); + + com.linkedin.datahub.graphql.generated.MLTrainingRunProperties properties = + new com.linkedin.datahub.graphql.generated.MLTrainingRunProperties(); + if (trainingProperties.hasOutputUrls()) { + properties.setOutputUrls( + trainingProperties.getOutputUrls() + .stream() + .map(url -> url.toString()) + .collect(Collectors.toList()) + ); + } + if (trainingProperties.getHyperParams() != null) { + properties.setHyperParams( + trainingProperties.getHyperParams().stream() + .map(param -> MLHyperParamMapper.map(context, param)) + .collect(Collectors.toList())); } + if (trainingProperties.getTrainingMetrics() != null) { + properties.setTrainingMetrics( + trainingProperties.getTrainingMetrics().stream() + .map(metric -> MLMetricMapper.map(context, metric)) + .collect(Collectors.toList())); + } + if (trainingProperties.hasId()) { + properties.setId(trainingProperties.getId()); + } +} + +private void mapDataProcessProperties( + @Nonnull QueryContext context, + @Nonnull DataProcessInstance dpi, + @Nonnull DataMap dataMap, + @Nonnull Urn entityUrn) { +DataProcessInstanceProperties dataProcessInstanceProperties = + new DataProcessInstanceProperties(dataMap); +dpi.setName(dataProcessInstanceProperties.getName()); +com.linkedin.datahub.graphql.generated.DataProcessInstanceProperties properties = + new com.linkedin.datahub.graphql.generated.DataProcessInstanceProperties(); +if (dataProcessInstanceProperties.hasExternalUrl()) { + dpi.setExternalUrl(dataProcessInstanceProperties.getExternalUrl().toString()); + properties.setExternalUrl(dataProcessInstanceProperties.getExternalUrl().toString()); +} +if (dataProcessInstanceProperties.hasCustomProperties()) { + properties.setCustomProperties( + CustomPropertiesMapper.map( + dataProcessInstanceProperties.getCustomProperties(), entityUrn)); +} +if (dataProcessInstanceProperties.hasCreated()) { + dpi.setCreated(AuditStampMapper.map(context, dataProcessInstanceProperties.getCreated())); +} +dpi.setProperties(properties); } +} \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinstance/DataProcessInstanceType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinstance/DataProcessInstanceType.java new file mode 100644 index 00000000000000..5a3c4bc31be0c9 --- /dev/null +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinstance/DataProcessInstanceType.java @@ -0,0 +1,96 @@ +package com.linkedin.datahub.graphql.types.dataprocessinstance; + +import static com.linkedin.metadata.Constants.*; + +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.datahub.graphql.QueryContext; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.datahub.graphql.generated.DataProcessInstance; +import com.linkedin.datahub.graphql.generated.Entity; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.types.dataprocessinst.mappers.DataProcessInstanceMapper; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.client.EntityClient; +import graphql.execution.DataFetcherResult; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; +import javax.annotation.Nonnull; +import lombok.RequiredArgsConstructor; + +@RequiredArgsConstructor +public class DataProcessInstanceType + implements com.linkedin.datahub.graphql.types.EntityType { + + public static final Set ASPECTS_TO_FETCH = + ImmutableSet.of( + DATA_PLATFORM_INSTANCE_ASPECT_NAME, + DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, + DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME, + SUB_TYPES_ASPECT_NAME, + CONTAINER_ASPECT_NAME); + + private final EntityClient _entityClient; + private final FeatureFlags _featureFlags; + + @Override + public EntityType type() { + return EntityType.DATA_PROCESS_INSTANCE; + } + + @Override + public Function getKeyProvider() { + return Entity::getUrn; + } + + @Override + public Class objectClass() { + return DataProcessInstance.class; + } + + @Override + public List> batchLoad( + @Nonnull List urns, @Nonnull QueryContext context) throws Exception { + final List dataProcessInstanceUrns = + urns.stream().map(UrnUtils::getUrn).collect(Collectors.toList()); + + try { + Map entities = new HashMap<>(); + if (_featureFlags.isDataProcessInstanceEntityEnabled()) { + entities = + _entityClient.batchGetV2( + context.getOperationContext(), + DATA_PROCESS_INSTANCE_ENTITY_NAME, + new HashSet<>(dataProcessInstanceUrns), + ASPECTS_TO_FETCH); + } + + final List gmsResults = new ArrayList<>(); + for (Urn urn : dataProcessInstanceUrns) { + if (_featureFlags.isDataProcessInstanceEntityEnabled()) { + gmsResults.add(entities.getOrDefault(urn, null)); + } + } + + return gmsResults.stream() + .map( + gmsResult -> + gmsResult == null + ? null + : DataFetcherResult.newResult() + .data(DataProcessInstanceMapper.map(context, gmsResult)) + .build()) + .collect(Collectors.toList()); + + } catch (Exception e) { + throw new RuntimeException("Failed to load schemaField entity", e); + } + } +} \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java index 334faf753cb8b5..953b55ad82fe5e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java @@ -77,6 +77,9 @@ public class EntityTypeUrnMapper { .put( Constants.BUSINESS_ATTRIBUTE_ENTITY_NAME, "urn:li:entityType:datahub.businessAttribute") + .put( + Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME, + "urn:li:entityType:datahub.dataProcessInstance") .build(); private static final Map ENTITY_TYPE_URN_TO_NAME = diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java index 265005c2caa9ee..7f0654b8fbe5ec 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java @@ -8,6 +8,7 @@ import com.linkedin.datahub.graphql.generated.MLModelProperties; import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; import com.linkedin.datahub.graphql.types.mappers.EmbeddedModelMapper; +import com.linkedin.datahub.graphql.types.common.mappers.TimeStampToAuditStampMapper; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -31,6 +32,14 @@ public MLModelProperties apply( final MLModelProperties result = new MLModelProperties(); result.setDate(mlModelProperties.getDate()); + if (mlModelProperties.getName() != null) { + result.setName(mlModelProperties.getName()); + } else { + // backfill name from URN for backwards compatibility + result.setName(entityUrn.getEntityKey().get(1)); // indexed access is safe here + } + result.setCreated(TimeStampToAuditStampMapper.map(context, mlModelProperties.getCreated())); + result.setLastModified(TimeStampToAuditStampMapper.map(context, mlModelProperties.getLastModified())); result.setDescription(mlModelProperties.getDescription()); if (mlModelProperties.getExternalUrl() != null) { result.setExternalUrl(mlModelProperties.getExternalUrl().toString()); diff --git a/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java b/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java index 28abb26be1f524..97ca0dcabea9f3 100644 --- a/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/datahub/graphql/featureflags/FeatureFlags.java @@ -25,4 +25,5 @@ public class FeatureFlags { private boolean showSeparateSiblings = false; private boolean alternateMCPValidation = false; private boolean showManageStructuredProperties = false; + private boolean dataProcessInstanceEntityEnabled = true; } From b34fd90086b45f7e52b5737a33528d2333528b28 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Fri, 3 Jan 2025 14:07:20 +0900 Subject: [PATCH 02/19] fix format violation --- .../DataPlatformInstanceAspectMapper.java | 8 +- .../mappers/TimeStampToAuditStampMapper.java | 27 ++-- .../mappers/DataProcessInstanceMapper.java | 141 +++++++++--------- .../DataProcessInstanceType.java | 2 +- .../types/entitytype/EntityTypeUrnMapper.java | 4 +- .../mappers/MLModelPropertiesMapper.java | 5 +- 6 files changed, 94 insertions(+), 93 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java index 32b55ebaab8e1c..ab3127a3ae232b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/DataPlatformInstanceAspectMapper.java @@ -30,10 +30,10 @@ public DataPlatformInstance apply( result.setUrn(input.getInstance().toString()); } result.setPlatform( - DataPlatform.builder() - .setUrn(input.getPlatform().toString()) - .setType(EntityType.DATA_PLATFORM) - .build()); + DataPlatform.builder() + .setUrn(input.getPlatform().toString()) + .setType(EntityType.DATA_PLATFORM) + .build()); return result; } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapper.java index 55e33bc3f2655d..58f78b146b406c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapper.java @@ -6,18 +6,19 @@ import javax.annotation.Nullable; public class TimeStampToAuditStampMapper { - - public static final TimeStampToAuditStampMapper INSTANCE = new TimeStampToAuditStampMapper(); - public static AuditStamp map(@Nullable final QueryContext context, @Nullable final TimeStamp input) { - if (input == null) { - return null; - } - final AuditStamp result = new AuditStamp(); - result.setTime(input.getTime()); - if (input.hasActor()) { - result.setActor(input.getActor().toString()); - } - return result; + public static final TimeStampToAuditStampMapper INSTANCE = new TimeStampToAuditStampMapper(); + + public static AuditStamp map( + @Nullable final QueryContext context, @Nullable final TimeStamp input) { + if (input == null) { + return null; + } + final AuditStamp result = new AuditStamp(); + result.setTime(input.getTime()); + if (input.hasActor()) { + result.setActor(input.getActor().toString()); } -} \ No newline at end of file + return result; + } +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java index 51cdb6209fc2fe..ff278e551c31ee 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.types.dataprocessinst.mappers; import static com.linkedin.metadata.Constants.*; + import com.linkedin.common.DataPlatformInstance; import com.linkedin.common.SubTypes; import com.linkedin.common.urn.Urn; @@ -10,21 +11,21 @@ import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.types.common.mappers.AuditStampMapper; import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; import com.linkedin.datahub.graphql.types.common.mappers.SubTypesMapper; -import com.linkedin.datahub.graphql.types.common.mappers.AuditStampMapper; import com.linkedin.datahub.graphql.types.common.mappers.util.MappingHelper; import com.linkedin.datahub.graphql.types.mappers.ModelMapper; -import com.linkedin.dataprocess.DataProcessInstanceProperties; import com.linkedin.datahub.graphql.types.mlmodel.mappers.MLHyperParamMapper; import com.linkedin.datahub.graphql.types.mlmodel.mappers.MLMetricMapper; -import com.linkedin.ml.metadata.MLTrainingRunProperties; +import com.linkedin.dataprocess.DataProcessInstanceProperties; import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.ml.metadata.MLTrainingRunProperties; +import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; -import java.util.stream.Collectors; import lombok.extern.slf4j.Slf4j; /** @@ -32,7 +33,6 @@ * *

To be replaced by auto-generated mappers implementations */ - @Slf4j public class DataProcessInstanceMapper implements ModelMapper { @@ -43,18 +43,19 @@ public static DataProcessInstance map( return INSTANCE.apply(context, entityResponse); } -private void mapContainers( - @Nullable final QueryContext context, - @Nonnull DataProcessInstance dataProcessInstance, - @Nonnull DataMap dataMap) { - final com.linkedin.container.Container gmsContainer = - new com.linkedin.container.Container(dataMap); - dataProcessInstance.setContainer( - com.linkedin.datahub.graphql.generated.Container.builder() - .setType(EntityType.CONTAINER) - .setUrn(gmsContainer.getContainer().toString()) - .build()); -} + private void mapContainers( + @Nullable final QueryContext context, + @Nonnull DataProcessInstance dataProcessInstance, + @Nonnull DataMap dataMap) { + final com.linkedin.container.Container gmsContainer = + new com.linkedin.container.Container(dataMap); + dataProcessInstance.setContainer( + com.linkedin.datahub.graphql.generated.Container.builder() + .setType(EntityType.CONTAINER) + .setUrn(gmsContainer.getContainer().toString()) + .build()); + } + @Override public DataProcessInstance apply( @Nullable QueryContext context, @Nonnull final EntityResponse entityResponse) { @@ -89,7 +90,7 @@ public DataProcessInstance apply( mappingHelper.mapToResult( SUB_TYPES_ASPECT_NAME, (dataProcessInstance, dataMap) -> - dataProcessInstance.setSubTypes(SubTypesMapper.map(context, new SubTypes(dataMap)))); + dataProcessInstance.setSubTypes(SubTypesMapper.map(context, new SubTypes(dataMap)))); mappingHelper.mapToResult( CONTAINER_ASPECT_NAME, (dataProcessInstance, dataMap) -> mapContainers(context, dataProcessInstance, dataMap)); @@ -98,61 +99,59 @@ public DataProcessInstance apply( } private void mapTrainingRunProperties( - @Nonnull QueryContext context, - @Nonnull DataProcessInstance dpi, - @Nonnull DataMap dataMap, - @Nonnull Urn entityUrn) { - MLTrainingRunProperties trainingProperties = new MLTrainingRunProperties(dataMap); + @Nonnull QueryContext context, + @Nonnull DataProcessInstance dpi, + @Nonnull DataMap dataMap, + @Nonnull Urn entityUrn) { + MLTrainingRunProperties trainingProperties = new MLTrainingRunProperties(dataMap); - com.linkedin.datahub.graphql.generated.MLTrainingRunProperties properties = - new com.linkedin.datahub.graphql.generated.MLTrainingRunProperties(); - if (trainingProperties.hasOutputUrls()) { + com.linkedin.datahub.graphql.generated.MLTrainingRunProperties properties = + new com.linkedin.datahub.graphql.generated.MLTrainingRunProperties(); + if (trainingProperties.hasOutputUrls()) { properties.setOutputUrls( - trainingProperties.getOutputUrls() - .stream() - .map(url -> url.toString()) - .collect(Collectors.toList()) - ); - } - if (trainingProperties.getHyperParams() != null) { - properties.setHyperParams( - trainingProperties.getHyperParams().stream() - .map(param -> MLHyperParamMapper.map(context, param)) - .collect(Collectors.toList())); - } - if (trainingProperties.getTrainingMetrics() != null) { - properties.setTrainingMetrics( - trainingProperties.getTrainingMetrics().stream() - .map(metric -> MLMetricMapper.map(context, metric)) - .collect(Collectors.toList())); - } - if (trainingProperties.hasId()) { - properties.setId(trainingProperties.getId()); + trainingProperties.getOutputUrls().stream() + .map(url -> url.toString()) + .collect(Collectors.toList())); + } + if (trainingProperties.getHyperParams() != null) { + properties.setHyperParams( + trainingProperties.getHyperParams().stream() + .map(param -> MLHyperParamMapper.map(context, param)) + .collect(Collectors.toList())); + } + if (trainingProperties.getTrainingMetrics() != null) { + properties.setTrainingMetrics( + trainingProperties.getTrainingMetrics().stream() + .map(metric -> MLMetricMapper.map(context, metric)) + .collect(Collectors.toList())); + } + if (trainingProperties.hasId()) { + properties.setId(trainingProperties.getId()); + } } -} -private void mapDataProcessProperties( - @Nonnull QueryContext context, - @Nonnull DataProcessInstance dpi, - @Nonnull DataMap dataMap, - @Nonnull Urn entityUrn) { -DataProcessInstanceProperties dataProcessInstanceProperties = - new DataProcessInstanceProperties(dataMap); -dpi.setName(dataProcessInstanceProperties.getName()); -com.linkedin.datahub.graphql.generated.DataProcessInstanceProperties properties = - new com.linkedin.datahub.graphql.generated.DataProcessInstanceProperties(); -if (dataProcessInstanceProperties.hasExternalUrl()) { - dpi.setExternalUrl(dataProcessInstanceProperties.getExternalUrl().toString()); - properties.setExternalUrl(dataProcessInstanceProperties.getExternalUrl().toString()); -} -if (dataProcessInstanceProperties.hasCustomProperties()) { - properties.setCustomProperties( - CustomPropertiesMapper.map( - dataProcessInstanceProperties.getCustomProperties(), entityUrn)); -} -if (dataProcessInstanceProperties.hasCreated()) { - dpi.setCreated(AuditStampMapper.map(context, dataProcessInstanceProperties.getCreated())); -} -dpi.setProperties(properties); + private void mapDataProcessProperties( + @Nonnull QueryContext context, + @Nonnull DataProcessInstance dpi, + @Nonnull DataMap dataMap, + @Nonnull Urn entityUrn) { + DataProcessInstanceProperties dataProcessInstanceProperties = + new DataProcessInstanceProperties(dataMap); + dpi.setName(dataProcessInstanceProperties.getName()); + com.linkedin.datahub.graphql.generated.DataProcessInstanceProperties properties = + new com.linkedin.datahub.graphql.generated.DataProcessInstanceProperties(); + if (dataProcessInstanceProperties.hasExternalUrl()) { + dpi.setExternalUrl(dataProcessInstanceProperties.getExternalUrl().toString()); + properties.setExternalUrl(dataProcessInstanceProperties.getExternalUrl().toString()); + } + if (dataProcessInstanceProperties.hasCustomProperties()) { + properties.setCustomProperties( + CustomPropertiesMapper.map( + dataProcessInstanceProperties.getCustomProperties(), entityUrn)); + } + if (dataProcessInstanceProperties.hasCreated()) { + dpi.setCreated(AuditStampMapper.map(context, dataProcessInstanceProperties.getCreated())); + } + dpi.setProperties(properties); + } } -} \ No newline at end of file diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinstance/DataProcessInstanceType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinstance/DataProcessInstanceType.java index 5a3c4bc31be0c9..0d300678f61414 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinstance/DataProcessInstanceType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinstance/DataProcessInstanceType.java @@ -93,4 +93,4 @@ public List> batchLoad( throw new RuntimeException("Failed to load schemaField entity", e); } } -} \ No newline at end of file +} diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java index 953b55ad82fe5e..5b72c2b3c11c5e 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/entitytype/EntityTypeUrnMapper.java @@ -78,8 +78,8 @@ public class EntityTypeUrnMapper { Constants.BUSINESS_ATTRIBUTE_ENTITY_NAME, "urn:li:entityType:datahub.businessAttribute") .put( - Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME, - "urn:li:entityType:datahub.dataProcessInstance") + Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME, + "urn:li:entityType:datahub.dataProcessInstance") .build(); private static final Map ENTITY_TYPE_URN_TO_NAME = diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java index 7f0654b8fbe5ec..7b00fe88f2d683 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/mlmodel/mappers/MLModelPropertiesMapper.java @@ -7,8 +7,8 @@ import com.linkedin.datahub.graphql.generated.MLModelGroup; import com.linkedin.datahub.graphql.generated.MLModelProperties; import com.linkedin.datahub.graphql.types.common.mappers.CustomPropertiesMapper; -import com.linkedin.datahub.graphql.types.mappers.EmbeddedModelMapper; import com.linkedin.datahub.graphql.types.common.mappers.TimeStampToAuditStampMapper; +import com.linkedin.datahub.graphql.types.mappers.EmbeddedModelMapper; import java.util.stream.Collectors; import javax.annotation.Nonnull; import javax.annotation.Nullable; @@ -39,7 +39,8 @@ public MLModelProperties apply( result.setName(entityUrn.getEntityKey().get(1)); // indexed access is safe here } result.setCreated(TimeStampToAuditStampMapper.map(context, mlModelProperties.getCreated())); - result.setLastModified(TimeStampToAuditStampMapper.map(context, mlModelProperties.getLastModified())); + result.setLastModified( + TimeStampToAuditStampMapper.map(context, mlModelProperties.getLastModified())); result.setDescription(mlModelProperties.getDescription()); if (mlModelProperties.getExternalUrl() != null) { result.setExternalUrl(mlModelProperties.getExternalUrl().toString()); From 2824ddd704c4804469efea6876f8690fdd8ee9c9 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Fri, 3 Jan 2025 15:27:20 +0900 Subject: [PATCH 03/19] update gmsgraphqlengine.java --- .../datahub/graphql/GmsGraphQLEngine.java | 37 ++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 59335ba605a741..fd736b7cd9cd51 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -56,6 +56,7 @@ import com.linkedin.datahub.graphql.generated.DataJobInputOutput; import com.linkedin.datahub.graphql.generated.DataPlatform; import com.linkedin.datahub.graphql.generated.DataPlatformInstance; +import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.DataQualityContract; import com.linkedin.datahub.graphql.generated.Dataset; import com.linkedin.datahub.graphql.generated.DatasetStatsSummary; @@ -347,6 +348,7 @@ import com.linkedin.datahub.graphql.types.dataplatform.DataPlatformType; import com.linkedin.datahub.graphql.types.dataplatforminstance.DataPlatformInstanceType; import com.linkedin.datahub.graphql.types.dataprocessinst.mappers.DataProcessInstanceRunEventMapper; +import com.linkedin.datahub.graphql.types.dataprocessinstance.DataProcessInstanceType; import com.linkedin.datahub.graphql.types.dataproduct.DataProductType; import com.linkedin.datahub.graphql.types.dataset.DatasetType; import com.linkedin.datahub.graphql.types.dataset.VersionedDatasetType; @@ -530,6 +532,7 @@ public class GmsGraphQLEngine { private final FormType formType; private final IncidentType incidentType; private final RestrictedType restrictedType; + private final DataProcessInstanceType dataProcessInstanceType; private final int graphQLQueryComplexityLimit; private final int graphQLQueryDepthLimit; @@ -649,6 +652,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { this.formType = new FormType(entityClient); this.incidentType = new IncidentType(entityClient); this.restrictedType = new RestrictedType(entityClient, restrictedService); + this.dataProcessInstanceType = new DataProcessInstanceType(entityClient, featureFlags); this.graphQLQueryComplexityLimit = args.graphQLQueryComplexityLimit; this.graphQLQueryDepthLimit = args.graphQLQueryDepthLimit; @@ -699,7 +703,8 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { formType, incidentType, restrictedType, - businessAttributeType)); + businessAttributeType, + dataProcessInstanceType)); this.loadableTypes = new ArrayList<>(entityTypes); // Extend loadable types with types from the plugins // This allows us to offer search and browse capabilities out of the box for @@ -1024,6 +1029,7 @@ private void configureQueryResolvers(final RuntimeWiring.Builder builder) { .dataFetcher("tag", getResolver(tagType)) .dataFetcher("dataFlow", getResolver(dataFlowType)) .dataFetcher("dataJob", getResolver(dataJobType)) + .dataFetcher("dataProcessInstance", getResolver(dataProcessInstanceType)) .dataFetcher("glossaryTerm", getResolver(glossaryTermType)) .dataFetcher("glossaryNode", getResolver(glossaryNodeType)) .dataFetcher("domain", getResolver((domainType))) @@ -3058,6 +3064,35 @@ private void configureDataProcessInstanceResolvers(final RuntimeWiring.Builder b "DataProcessInstance", typeWiring -> typeWiring + .dataFetcher( + "dataPlatformInstance", + new LoadableTypeResolver<>( + dataPlatformInstanceType, + (env) -> { + final DataProcessInstance dataProcessInstance = env.getSource(); + return dataProcessInstance.getDataPlatformInstance() != null + ? dataProcessInstance.getDataPlatformInstance().getUrn() + : null; + })) + .dataFetcher( + "platform", + new LoadableTypeResolver<>( + dataPlatformType, + (env) -> { + final DataProcessInstance dataProcessInstance = env.getSource(); + return dataProcessInstance.getPlatform() != null + ? dataProcessInstance.getPlatform().getUrn() + : null; + })) + .dataFetcher("parentContainers", new ParentContainersResolver(entityClient)) + .dataFetcher( + "container", + new LoadableTypeResolver<>( + containerType, + (env) -> { + final DataProcessInstance dpi = env.getSource(); + return dpi.getContainer() != null ? dpi.getContainer().getUrn() : null; + })) .dataFetcher("relationships", new EntityRelationshipsResultResolver(graphClient)) .dataFetcher( "lineage", From 25e78986a55b34c1c9b80178d05cd367a7dba855 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Fri, 3 Jan 2025 15:49:50 +0900 Subject: [PATCH 04/19] update build.gradle --- datahub-graphql-core/build.gradle | 36 ++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/datahub-graphql-core/build.gradle b/datahub-graphql-core/build.gradle index 47ada8e9929dd3..a0d52f8ff3a10f 100644 --- a/datahub-graphql-core/build.gradle +++ b/datahub-graphql-core/build.gradle @@ -33,7 +33,10 @@ dependencies { graphqlCodegen { // For options: https://github.com/kobylynskyi/graphql-java-codegen/blob/master/docs/codegen-options.md - graphqlSchemaPaths = fileTree(dir: "${projectDir}/src/main/resources", include: '**/*.graphql').collect { it.absolutePath } + graphqlSchemaPaths = ( + fileTree(dir: "${projectDir}/src/main/resources", include: "*.graphql").files + + fileTree(dir: "${projectDir}/src/main/resources/graphql/schema", include: "**/*.graphql").files + ).collect { it.absolutePath } outputDir = new File("${projectDir}/src/mainGeneratedGraphQL/java") packageName = "com.linkedin.datahub.graphql.generated" generateToString = true @@ -46,6 +49,37 @@ graphqlCodegen { ] } +task debugSchemaLoading { + doLast { + + def extensions = fileTree(dir: "${projectDir}/src/main/resources/graphql/schema") + .include("**/*.graphql") + .files + + println "\nExtensions found:" + extensions.each { file -> + println "- ${file.absolutePath}" + } + } +} + +task listGraphQLFiles { + doLast { + def baseFiles = fileTree(dir: "${projectDir}/src/main/resources", include: "*.graphql") + def extensionFiles = fileTree(dir: "${projectDir}/src/main/resources/graphql/schema", include: "**/*.graphql") + + println "\nBase GraphQL files found:" + baseFiles.each { file -> + println "- ${file.absolutePath}" + } + + println "\nExtension GraphQL files found:" + extensionFiles.each { file -> + println "- ${file.absolutePath}" + } + } +} + clean { delete 'src/mainGeneratedGraphQL' } From 0891b86a17288701a05d2ed5623c9b4f2c51bc25 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Tue, 7 Jan 2025 15:40:02 +0900 Subject: [PATCH 05/19] update searchRequestHandlerTest.java --- .../search/query/request/SearchRequestHandlerTest.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index e51511699e345a..eceb09b2b99058 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -772,6 +772,10 @@ public void testQueryByDefault() { EntityType.SCHEMA_FIELD, Stream.concat(COMMON.stream(), Stream.of("schemaFieldAliases", "parent")) .collect(Collectors.toSet())) + .put( + EntityType.DATA_PROCESS_INSTANCE, + Stream.concat(COMMON.stream(), Stream.of("parentInstance", "parentTemplate", "status")) + .collect(Collectors.toSet())) .build(); for (EntityType entityType : SEARCHABLE_ENTITY_TYPES) { From c78c902c4adc349857e3c48d530d1fdb91db67ac Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Tue, 7 Jan 2025 16:48:27 +0900 Subject: [PATCH 06/19] add dataprocessinstance to buildentityregistry --- .../src/app/buildEntityRegistry.ts | 2 + .../DataProcessInstanceEntity.tsx | 251 ++++++++++++++++++ .../dataProcessInstance/preview/Preview.tsx | 110 ++++++++ .../src/graphql/dataProcessInstance.graphql | 181 +++++++++++++ 4 files changed, 544 insertions(+) create mode 100644 datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx create mode 100644 datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx create mode 100644 datahub-web-react/src/graphql/dataProcessInstance.graphql diff --git a/datahub-web-react/src/app/buildEntityRegistry.ts b/datahub-web-react/src/app/buildEntityRegistry.ts index 181ec7d328a587..b7ff97b3a07469 100644 --- a/datahub-web-react/src/app/buildEntityRegistry.ts +++ b/datahub-web-react/src/app/buildEntityRegistry.ts @@ -25,6 +25,7 @@ import { RestrictedEntity } from './entity/restricted/RestrictedEntity'; import { BusinessAttributeEntity } from './entity/businessAttribute/BusinessAttributeEntity'; import { SchemaFieldPropertiesEntity } from './entity/schemaField/SchemaFieldPropertiesEntity'; import { StructuredPropertyEntity } from './entity/structuredProperty/StructuredPropertyEntity'; +import { DataProcessInstanceEntity } from './entity/dataProcessInstance/DataProcessInstanceEntity'; export default function buildEntityRegistry() { const registry = new EntityRegistry(); @@ -54,5 +55,6 @@ export default function buildEntityRegistry() { registry.register(new BusinessAttributeEntity()); registry.register(new SchemaFieldPropertiesEntity()); registry.register(new StructuredPropertyEntity()); + registry.register(new DataProcessInstanceEntity()); return registry; } diff --git a/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx b/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx new file mode 100644 index 00000000000000..a80c2f2b5ca367 --- /dev/null +++ b/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx @@ -0,0 +1,251 @@ +import { ApiOutlined } from '@ant-design/icons'; +import { DataJob, DataProcessInstance, EntityType, OwnershipType, SearchResult } from '../../../types.generated'; +import { Preview } from './preview/Preview'; +import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; +import { EntityProfile } from '../shared/containers/profile/EntityProfile'; +import { useGetDataProcessInstanceQuery } from '../../../graphql/dataProcessInstance.generated'; +import { PropertiesTab } from '../shared/tabs/Properties/PropertiesTab'; +import { LineageTab } from '../shared/tabs/Lineage/LineageTab'; +import { SidebarAboutSection } from '../shared/containers/profile/sidebar/AboutSection/SidebarAboutSection'; +import { SidebarTagsSection } from '../shared/containers/profile/sidebar/SidebarTagsSection'; +import { SidebarOwnerSection } from '../shared/containers/profile/sidebar/Ownership/sidebar/SidebarOwnerSection'; +import { GenericEntityProperties } from '../shared/types'; +import { getDataForEntityType } from '../shared/containers/profile/utils'; +import { SidebarDomainSection } from '../shared/containers/profile/sidebar/Domain/SidebarDomainSection'; +import { EntityMenuItems } from '../shared/EntityDropdown/EntityDropdown'; +import { capitalizeFirstLetterOnly } from '../../shared/textUtil'; +import DataProductSection from '../shared/containers/profile/sidebar/DataProduct/DataProductSection'; +import { getDataProduct } from '../shared/utils'; +// import SummaryTab from './profile/DataProcessInstaceSummary'; + +const getProcessPlatformName = (data?: DataProcessInstance): string => { + return ( + data?.dataPlatformInstance?.platform?.properties?.displayName || + capitalizeFirstLetterOnly(data?.dataPlatformInstance?.platform?.name) || + '' + ); +}; + +const getParentEntities = (data: DataProcessInstance): Entity[] => { + const parentEntity = data?.relationships?.relationships?.find( + (rel) => rel.type === 'InstanceOf' && rel.entity?.type === EntityType.DataJob, + ); + + const containerEntity = data?.container?.entity; + + return parentEntity ? [parentEntity.entity as Entity] : []; // TODO: HACK +}; + +/** + * Definition of the DataHub DataProcessInstance entity. + */ +export class DataProcessInstanceEntity implements Entity { + type: EntityType = EntityType.DataProcessInstance; + + icon = (fontSize: number, styleType: IconStyleType, color?: string) => { + if (styleType === IconStyleType.TAB_VIEW) { + return ; + } + + if (styleType === IconStyleType.HIGHLIGHT) { + return ; + } + + return ( + + ); + }; + + isSearchEnabled = () => true; + + isBrowseEnabled = () => true; + + isLineageEnabled = () => true; + + getAutoCompleteFieldName = () => 'name'; + + getPathName = () => 'dataProcessInstance'; + + getEntityName = () => 'Process Instance'; + + getGraphName = () => 'dataProcessInstance'; + + getCollectionName = () => 'Process Instances'; + + useEntityQuery = useGetDataProcessInstanceQuery; + + renderProfile = (urn: string) => ( + { + // const activeIncidentCount = processInstance?.dataProcessInstance?.activeIncidents.total; + // return `Incidents${(activeIncidentCount && ` (${activeIncidentCount})`) || ''}`; + // }, + // }, + ]} + sidebarSections={this.getSidebarSections()} + /> + ); + + getSidebarSections = () => [ + { + component: SidebarAboutSection, + }, + { + component: SidebarOwnerSection, + properties: { + defaultOwnerType: OwnershipType.TechnicalOwner, + }, + }, + { + component: SidebarTagsSection, + properties: { + hasTags: true, + hasTerms: true, + }, + }, + { + component: SidebarDomainSection, + }, + { + component: DataProductSection, + }, + ]; + + getOverridePropertiesFromEntity = (processInstance?: DataProcessInstance | null): GenericEntityProperties => { + const name = processInstance?.name; + const externalUrl = processInstance?.externalUrl; + return { + name, + externalUrl, + }; + }; + + renderPreview = (_: PreviewType, data: DataProcessInstance) => { + const genericProperties = this.getGenericEntityProperties(data); + const parentEntities = getParentEntities(data); + return ( + + ); + }; + + renderSearch = (result: SearchResult) => { + const data = result.entity as DataProcessInstance; + const genericProperties = this.getGenericEntityProperties(data); + const parentEntities = getParentEntities(data); + return ( + + ); + }; + + getLineageVizConfig = (entity: DataProcessInstance) => { + return { + urn: entity?.urn, + name: this.displayName(entity), + type: EntityType.DataProcessInstance, + subtype: entity?.subTypes?.typeNames?.[0], + icon: entity?.platform?.properties?.logoUrl || undefined, + platform: entity?.platform, + container: entity?.container, + // health: entity?.health || undefined, + }; + }; + + displayName = (data: DataProcessInstance) => { + return data.properties?.name || data.urn; + }; + + getGenericEntityProperties = (data: DataProcessInstance) => { + return getDataForEntityType({ + data, + entityType: this.type, + getOverrideProperties: this.getOverridePropertiesFromEntity, + }); + }; + + supportedCapabilities = () => { + return new Set([ + EntityCapabilityType.OWNERS, + EntityCapabilityType.GLOSSARY_TERMS, + EntityCapabilityType.TAGS, + EntityCapabilityType.DOMAINS, + EntityCapabilityType.DEPRECATION, + EntityCapabilityType.SOFT_DELETE, + EntityCapabilityType.DATA_PRODUCTS, + ]); + }; +} \ No newline at end of file diff --git a/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx b/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx new file mode 100644 index 00000000000000..e8e506ebe4692f --- /dev/null +++ b/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx @@ -0,0 +1,110 @@ +import React from 'react'; +import styled from 'styled-components'; +import { Typography } from 'antd'; +import { + DataProduct, + Deprecation, + Domain, + Entity, + EntityPath, + EntityType, + GlobalTags, + Health, + Owner, + SearchInsight, + Container, + ParentContainersResult, +} from '../../../../types.generated'; +import DefaultPreviewCard from '../../../preview/DefaultPreviewCard'; +import { useEntityRegistry } from '../../../useEntityRegistry'; +import { IconStyleType } from '../../Entity'; +import { ANTD_GRAY } from '../../shared/constants'; + +const StatText = styled(Typography.Text)` + color: ${ANTD_GRAY[8]}; +`; + +export const Preview = ({ + urn, + name, + subType, + description, + platformName, + platformLogo, + platformInstanceId, + container, + owners, + domain, + dataProduct, + deprecation, + globalTags, + snippet, + insights, + externalUrl, + degree, + paths, + health, + parentEntities, + parentContainers, + duration, + status, + startTime, +}: { + urn: string; + name: string; + subType?: string | null; + description?: string | null; + platformName?: string; + platformLogo?: string | null; + platformInstanceId?: string; + container?: Container | null; + owners?: Array | null; + domain?: Domain | null; + dataProduct?: DataProduct | null; + deprecation?: Deprecation | null; + globalTags?: GlobalTags | null; + snippet?: React.ReactNode | null; + insights?: Array | null; + externalUrl?: string | null; + degree?: number; + paths?: EntityPath[]; + health?: Health[] | null; + parentEntities?: Entity[] | null; + parentContainers?: ParentContainersResult | null; + duration?: number | null; + status?: string | null; + startTime?: number | null; +}): JSX.Element => { + const entityRegistry = useEntityRegistry(); + return ( + + ); +}; \ No newline at end of file diff --git a/datahub-web-react/src/graphql/dataProcessInstance.graphql b/datahub-web-react/src/graphql/dataProcessInstance.graphql new file mode 100644 index 00000000000000..efe4a9165553d2 --- /dev/null +++ b/datahub-web-react/src/graphql/dataProcessInstance.graphql @@ -0,0 +1,181 @@ +fragment processInstanceRelationshipResults on EntityRelationshipsResult { + start + count + total + relationships { + type + direction + entity { + urn + type + ... on Dataset { + name + properties { + name + description + qualifiedName + } + editableProperties { + description + } + platform { + ...platformFields + } + subTypes { + typeNames + } + status { + removed + } + } + ... on DataJob { + urn + type + dataFlow { + ...nonRecursiveDataFlowFields + } + jobId + properties { + name + description + externalUrl + customProperties { + key + value + } + } + deprecation { + ...deprecationFields + } + dataPlatformInstance { + ...dataPlatformInstanceFields + } + subTypes { + typeNames + } + editableProperties { + description + } + status { + removed + } + } + } + } +} + +fragment dataProcessInstanceFields on DataProcessInstance { + urn + type + platform { + ...platformFields + } + parentContainers { + ...parentContainersFields + } + container { + ...entityContainer + } + subTypes { + typeNames + } + properties { + name + createdTS: created { + time + actor + } + customProperties { + key + value + } + } + mlTrainingRunProperties { + outputUrls + trainingMetrics { + name + description + value + } + hyperParams { + name + description + value + } + } + dataPlatformInstance { + ...dataPlatformInstanceFields + } + state(startTimeMillis: null, endTimeMillis: null, limit: 1) { + status + attempt + result { + resultType + nativeResultType + } + timestampMillis + durationMillis + } + relationships(input: { types: ["InstanceOf", "Consumes", "Produces"], direction: OUTGOING, start: 0, count: 50 }) { + ...processInstanceRelationshipResults + } +} + +query getDataProcessInstance($urn: String!) { + dataProcessInstance(urn: $urn) { + urn + type + platform { + ...platformFields + } + parentContainers { + ...parentContainersFields + } + subTypes { + typeNames + } + container { + ...entityContainer + } + name + properties { + name + created { + time + actor + } + } + mlTrainingRunProperties { + id + outputUrls + trainingMetrics { + name + description + value + } + hyperParams { + name + description + value + } + } + relationships( + input: { types: ["InstanceOf", "Consumes", "Produces"], direction: OUTGOING, start: 0, count: 50 } + ) { + ...processInstanceRelationshipResults + } + dataPlatformInstance { + ...dataPlatformInstanceFields + } + state(startTimeMillis: null, endTimeMillis: null, limit: 1) { + status + attempt + result { + resultType + nativeResultType + } + timestampMillis + durationMillis + } + } +} \ No newline at end of file From ad751295142e6c25363a170eea3fe57296b1069f Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Tue, 7 Jan 2025 17:45:36 +0900 Subject: [PATCH 07/19] fix yarn lint --- .../DataProcessInstanceEntity.tsx | 45 ++++++++++++------- .../dataProcessInstance/preview/Preview.tsx | 35 ++++++--------- 2 files changed, 43 insertions(+), 37 deletions(-) diff --git a/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx b/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx index a80c2f2b5ca367..4834a026ad94a3 100644 --- a/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx +++ b/datahub-web-react/src/app/entity/dataProcessInstance/DataProcessInstanceEntity.tsx @@ -1,5 +1,12 @@ +import React from 'react'; import { ApiOutlined } from '@ant-design/icons'; -import { DataJob, DataProcessInstance, EntityType, OwnershipType, SearchResult } from '../../../types.generated'; +import { + DataProcessInstance, + Entity as GeneratedEntity, + EntityType, + OwnershipType, + SearchResult, +} from '../../../types.generated'; import { Preview } from './preview/Preview'; import { Entity, EntityCapabilityType, IconStyleType, PreviewType } from '../Entity'; import { EntityProfile } from '../shared/containers/profile/EntityProfile'; @@ -18,24 +25,30 @@ import DataProductSection from '../shared/containers/profile/sidebar/DataProduct import { getDataProduct } from '../shared/utils'; // import SummaryTab from './profile/DataProcessInstaceSummary'; -const getProcessPlatformName = (data?: DataProcessInstance): string => { - return ( - data?.dataPlatformInstance?.platform?.properties?.displayName || - capitalizeFirstLetterOnly(data?.dataPlatformInstance?.platform?.name) || - '' - ); -}; +// const getProcessPlatformName = (data?: DataProcessInstance): string => { +// return ( +// data?.dataPlatformInstance?.platform?.properties?.displayName || +// capitalizeFirstLetterOnly(data?.dataPlatformInstance?.platform?.name) || +// '' +// ); +// }; -const getParentEntities = (data: DataProcessInstance): Entity[] => { +const getParentEntities = (data: DataProcessInstance): GeneratedEntity[] => { const parentEntity = data?.relationships?.relationships?.find( (rel) => rel.type === 'InstanceOf' && rel.entity?.type === EntityType.DataJob, ); - const containerEntity = data?.container?.entity; + if (!parentEntity?.entity) return []; - return parentEntity ? [parentEntity.entity as Entity] : []; // TODO: HACK + // Convert to GeneratedEntity + return [ + { + type: parentEntity.entity.type, + urn: (parentEntity.entity as any).urn, // Make sure urn exists + relationships: (parentEntity.entity as any).relationships, + }, + ]; }; - /** * Definition of the DataHub DataProcessInstance entity. */ @@ -204,9 +217,9 @@ export class DataProcessInstanceEntity implements Entity { parentContainers={data.parentContainers} parentEntities={parentEntities} container={data.container || undefined} - duration={data?.state[0]?.durationMillis} - status={data?.state[0]?.result?.resultType} - startTime={data?.state[0]?.timestampMillis} + // duration={data?.state?.[0]?.durationMillis} + // status={data?.state?.[0]?.result?.resultType} + // startTime={data?.state?.[0]?.timestampMillis} // health={data.health} /> ); @@ -248,4 +261,4 @@ export class DataProcessInstanceEntity implements Entity { EntityCapabilityType.DATA_PRODUCTS, ]); }; -} \ No newline at end of file +} diff --git a/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx b/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx index e8e506ebe4692f..3a3b0340695d96 100644 --- a/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx +++ b/datahub-web-react/src/app/entity/dataProcessInstance/preview/Preview.tsx @@ -1,11 +1,9 @@ import React from 'react'; -import styled from 'styled-components'; -import { Typography } from 'antd'; import { DataProduct, Deprecation, Domain, - Entity, + Entity as GeneratedEntity, EntityPath, EntityType, GlobalTags, @@ -18,11 +16,6 @@ import { import DefaultPreviewCard from '../../../preview/DefaultPreviewCard'; import { useEntityRegistry } from '../../../useEntityRegistry'; import { IconStyleType } from '../../Entity'; -import { ANTD_GRAY } from '../../shared/constants'; - -const StatText = styled(Typography.Text)` - color: ${ANTD_GRAY[8]}; -`; export const Preview = ({ urn, @@ -46,10 +39,10 @@ export const Preview = ({ health, parentEntities, parentContainers, - duration, - status, - startTime, -}: { +}: // duration, +// status, +// startTime, +{ urn: string; name: string; subType?: string | null; @@ -57,7 +50,7 @@ export const Preview = ({ platformName?: string; platformLogo?: string | null; platformInstanceId?: string; - container?: Container | null; + container?: Container; owners?: Array | null; domain?: Domain | null; dataProduct?: DataProduct | null; @@ -69,11 +62,11 @@ export const Preview = ({ degree?: number; paths?: EntityPath[]; health?: Health[] | null; - parentEntities?: Entity[] | null; + parentEntities?: Array | null; parentContainers?: ParentContainersResult | null; - duration?: number | null; - status?: string | null; - startTime?: number | null; + // duration?: number | null; + // status?: string | null; + // startTime?: number | null; }): JSX.Element => { const entityRegistry = useEntityRegistry(); return ( @@ -102,9 +95,9 @@ export const Preview = ({ paths={paths} health={health || undefined} parentEntities={parentEntities} - duration={duration} - status={status} - startTime={startTime} + // duration={duration} + // status={status} + // startTime={startTime} /> ); -}; \ No newline at end of file +}; From 5b02baffb6dc83b69485e7c6785a1b3657c8cec6 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Tue, 7 Jan 2025 19:58:31 +0900 Subject: [PATCH 08/19] fix formatting searchrequesthandlertest --- .../search/query/request/SearchRequestHandlerTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java index eceb09b2b99058..1a91ae35c6595b 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/query/request/SearchRequestHandlerTest.java @@ -774,7 +774,8 @@ public void testQueryByDefault() { .collect(Collectors.toSet())) .put( EntityType.DATA_PROCESS_INSTANCE, - Stream.concat(COMMON.stream(), Stream.of("parentInstance", "parentTemplate", "status")) + Stream.concat( + COMMON.stream(), Stream.of("parentInstance", "parentTemplate", "status")) .collect(Collectors.toSet())) .build(); From 350cb88a1aa35d36589e153e4ba9abceac9a14e4 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Tue, 7 Jan 2025 20:48:33 +0900 Subject: [PATCH 09/19] add graphql files --- .../src/graphql/dataProcessInstance.graphql | 18 +++++++++--------- .../src/graphql/fragments.graphql | 14 ++++++++++++++ datahub-web-react/src/graphql/lineage.graphql | 19 +++++++++++++++++++ .../src/graphql/mlModelGroup.graphql | 12 ++++++++++++ 4 files changed, 54 insertions(+), 9 deletions(-) diff --git a/datahub-web-react/src/graphql/dataProcessInstance.graphql b/datahub-web-react/src/graphql/dataProcessInstance.graphql index efe4a9165553d2..8f55ca4903d527 100644 --- a/datahub-web-react/src/graphql/dataProcessInstance.graphql +++ b/datahub-web-react/src/graphql/dataProcessInstance.graphql @@ -107,14 +107,14 @@ fragment dataProcessInstanceFields on DataProcessInstance { ...dataPlatformInstanceFields } state(startTimeMillis: null, endTimeMillis: null, limit: 1) { - status - attempt - result { - resultType - nativeResultType - } - timestampMillis - durationMillis + status + attempt + result { + resultType + nativeResultType + } + timestampMillis + durationMillis } relationships(input: { types: ["InstanceOf", "Consumes", "Produces"], direction: OUTGOING, start: 0, count: 50 }) { ...processInstanceRelationshipResults @@ -178,4 +178,4 @@ query getDataProcessInstance($urn: String!) { durationMillis } } -} \ No newline at end of file +} diff --git a/datahub-web-react/src/graphql/fragments.graphql b/datahub-web-react/src/graphql/fragments.graphql index 68c57c5cb5db55..ecac2997489354 100644 --- a/datahub-web-react/src/graphql/fragments.graphql +++ b/datahub-web-react/src/graphql/fragments.graphql @@ -863,8 +863,17 @@ fragment nonRecursiveMLModel on MLModel { ...ownershipFields } properties { + name description date + created { + time + actor + } + lastModified { + time + actor + } externalUrl version type @@ -956,7 +965,12 @@ fragment nonRecursiveMLModelGroupFields on MLModelGroup { ...deprecationFields } properties { + name description + created { + time + actor + } } browsePathV2 { ...browsePathV2Fields diff --git a/datahub-web-react/src/graphql/lineage.graphql b/datahub-web-react/src/graphql/lineage.graphql index ee05811cbb72de..457936ed62cd2e 100644 --- a/datahub-web-react/src/graphql/lineage.graphql +++ b/datahub-web-react/src/graphql/lineage.graphql @@ -259,6 +259,9 @@ fragment lineageNodeProperties on EntityWithRelationships { name description origin + tags { + ...globalTagsFields + } platform { ...platformFields } @@ -268,6 +271,19 @@ fragment lineageNodeProperties on EntityWithRelationships { status { removed } + properties { + createdTS: created { + time + actor + } + customProperties { + key + value + } + } + editableProperties { + description + } structuredProperties { properties { ...structuredPropertiesFields @@ -328,6 +344,9 @@ fragment lineageNodeProperties on EntityWithRelationships { urn type } + ... on DataProcessInstance { + ...dataProcessInstanceFields + } } fragment lineageFields on EntityWithRelationships { diff --git a/datahub-web-react/src/graphql/mlModelGroup.graphql b/datahub-web-react/src/graphql/mlModelGroup.graphql index 81ab65d0b9a08d..4f11ed4984d37a 100644 --- a/datahub-web-react/src/graphql/mlModelGroup.graphql +++ b/datahub-web-react/src/graphql/mlModelGroup.graphql @@ -2,6 +2,18 @@ query getMLModelGroup($urn: String!) { mlModelGroup(urn: $urn) { urn type + properties { + name + description + created { + time + actor + } + lastModified { + time + actor + } + } ...nonRecursiveMLModelGroupFields incoming: relationships( input: { From b25d58b7c66d2dc6cdeb4619f7413619fe669b1e Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Thu, 9 Jan 2025 00:03:59 +0900 Subject: [PATCH 10/19] add unit test for timestamp to auditstamp mapper test --- .../TimeStampToAuditStampMapperTest.java | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapperTest.java diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapperTest.java new file mode 100644 index 00000000000000..f25b4849c8bd7a --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapperTest.java @@ -0,0 +1,46 @@ +package com.linkedin.datahub.graphql.types.common.mappers; + +import static org.testng.Assert.*; + +import com.linkedin.common.TimeStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.AuditStamp; +import org.testng.annotations.Test; + +public class TimeStampToAuditStampMapperTest { + + private static final String TEST_ACTOR_URN = "urn:li:corpuser:testUser"; + private static final long TEST_TIME = 1234567890L; + + @Test + public void testMapWithActor() throws Exception { + TimeStamp input = new TimeStamp(); + input.setTime(TEST_TIME); + input.setActor(Urn.createFromString(TEST_ACTOR_URN)); + + AuditStamp result = TimeStampToAuditStampMapper.map(null, input); + + assertNotNull(result); + assertEquals(result.getTime().longValue(), TEST_TIME); + assertEquals(result.getActor(), TEST_ACTOR_URN); + } + + @Test + public void testMapWithoutActor() { + TimeStamp input = new TimeStamp(); + input.setTime(TEST_TIME); + + AuditStamp result = TimeStampToAuditStampMapper.map(null, input); + + assertNotNull(result); + assertEquals(result.getTime().longValue(), TEST_TIME); + assertNull(result.getActor()); + } + + @Test + public void testMapNull() { + AuditStamp result = TimeStampToAuditStampMapper.map(null, null); + + assertNull(result); + } +} \ No newline at end of file From 2ea9da502434385c66080f115a76d3cac81fc1b4 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Thu, 9 Jan 2025 16:43:56 +0900 Subject: [PATCH 11/19] add unit test for dataprocess instance & rename dir --- .../datahub/graphql/GmsGraphQLEngine.java | 2 +- .../DataProcessInstanceType.java | 2 +- .../DataPlatformInstanceAspectMapperTest.java | 73 +++++++++++++ .../DataProcessInstanceTypeTest.java | 102 ++++++++++++++++++ .../DataProcessInstanceMapperTest.java | 29 +++++ 5 files changed, 206 insertions(+), 2 deletions(-) rename datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/{dataprocessinstance => dataprocessinst}/DataProcessInstanceType.java (98%) create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataplatforminstance/mapper/DataPlatformInstanceAspectMapperTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java create mode 100644 datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index fd736b7cd9cd51..3c46c1a8dce35c 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -347,8 +347,8 @@ import com.linkedin.datahub.graphql.types.datajob.DataJobType; import com.linkedin.datahub.graphql.types.dataplatform.DataPlatformType; import com.linkedin.datahub.graphql.types.dataplatforminstance.DataPlatformInstanceType; +import com.linkedin.datahub.graphql.types.dataprocessinst.DataProcessInstanceType; import com.linkedin.datahub.graphql.types.dataprocessinst.mappers.DataProcessInstanceRunEventMapper; -import com.linkedin.datahub.graphql.types.dataprocessinstance.DataProcessInstanceType; import com.linkedin.datahub.graphql.types.dataproduct.DataProductType; import com.linkedin.datahub.graphql.types.dataset.DatasetType; import com.linkedin.datahub.graphql.types.dataset.VersionedDatasetType; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinstance/DataProcessInstanceType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceType.java similarity index 98% rename from datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinstance/DataProcessInstanceType.java rename to datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceType.java index 0d300678f61414..c6cede662fa9c2 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinstance/DataProcessInstanceType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceType.java @@ -1,4 +1,4 @@ -package com.linkedin.datahub.graphql.types.dataprocessinstance; +package com.linkedin.datahub.graphql.types.dataprocessinst; import static com.linkedin.metadata.Constants.*; diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataplatforminstance/mapper/DataPlatformInstanceAspectMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataplatforminstance/mapper/DataPlatformInstanceAspectMapperTest.java new file mode 100644 index 00000000000000..2009428556f601 --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataplatforminstance/mapper/DataPlatformInstanceAspectMapperTest.java @@ -0,0 +1,73 @@ +package com.linkedin.datahub.graphql.types.dataplatforminstance.mapper; + +import static org.testng.Assert.*; + +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.DataPlatformInstance; +import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; +import com.linkedin.datahub.graphql.generated.EntityType; +import org.testng.annotations.Test; + +public class DataPlatformInstanceAspectMapperTest { + + private static final String TEST_PLATFORM = "hive"; + private static final String TEST_INSTANCE = "prod"; + private static final String TEST_PLATFORM_URN = "urn:li:dataPlatform:" + TEST_PLATFORM; + private static final String TEST_INSTANCE_URN = + String.format("urn:li:dataPlatformInstance:(urn:li:dataPlatform:%s,%s)", TEST_PLATFORM, TEST_INSTANCE); + + @Test + public void testMapWithInstance() throws Exception { + // Create test input + com.linkedin.common.DataPlatformInstance input = new com.linkedin.common.DataPlatformInstance(); + DataPlatformUrn platformUrn = new DataPlatformUrn(TEST_PLATFORM); + Urn instanceUrn = Urn.createFromString(TEST_INSTANCE_URN); + + input.setPlatform(platformUrn); + input.setInstance(instanceUrn); + + // Map and verify + DataPlatformInstance result = DataPlatformInstanceAspectMapper.map(null, input); + + assertNotNull(result); + assertEquals(result.getType(), EntityType.DATA_PLATFORM_INSTANCE); + assertEquals(result.getUrn(), TEST_INSTANCE_URN); + + // Verify platform mapping + assertNotNull(result.getPlatform()); + assertEquals(result.getPlatform().getType(), EntityType.DATA_PLATFORM); + assertEquals(result.getPlatform().getUrn(), TEST_PLATFORM_URN); + } + + @Test + public void testMapWithoutInstance() throws Exception { + // Create test input with only platform + com.linkedin.common.DataPlatformInstance input = new com.linkedin.common.DataPlatformInstance(); + DataPlatformUrn platformUrn = new DataPlatformUrn(TEST_PLATFORM); + input.setPlatform(platformUrn); + + // Map and verify + DataPlatformInstance result = DataPlatformInstanceAspectMapper.map(null, input); + + assertNotNull(result); + assertNull(result.getType()); // Type should be null when no instance + assertNull(result.getUrn()); // URN should be null when no instance + + // Verify platform is still mapped correctly + assertNotNull(result.getPlatform()); + assertEquals(result.getPlatform().getType(), EntityType.DATA_PLATFORM); + assertEquals(result.getPlatform().getUrn(), TEST_PLATFORM_URN); + } + + @Test(expectedExceptions = NullPointerException.class) + public void testMapNull() { + DataPlatformInstanceAspectMapper.map(null, null); + } + + @Test + public void testSingleton() { + assertNotNull(DataPlatformInstanceAspectMapper.INSTANCE); + assertSame(DataPlatformInstanceAspectMapper.INSTANCE, DataPlatformInstanceAspectMapper.INSTANCE); + } +} \ No newline at end of file diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java new file mode 100644 index 00000000000000..850ee6b3a3213d --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java @@ -0,0 +1,102 @@ +package com.linkedin.datahub.graphql.types.dataprocessinst; + +import static com.linkedin.datahub.graphql.TestUtils.getMockAllowContext; +import static org.mockito.ArgumentMatchers.any; +import static org.testng.Assert.*; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.DataProcessInstance; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.dataprocess.DataProcessInstanceProperties; +import com.linkedin.common.AuditStamp; +import com.linkedin.data.template.StringMap; +import com.linkedin.r2.RemoteInvocationException; +import graphql.execution.DataFetcherResult; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import org.mockito.Mockito; +import org.testng.annotations.Test; + +public class DataProcessInstanceTypeTest { + + private static final String TEST_INSTANCE_URN = "urn:li:dataProcessInstance:(test-workflow,test-instance-1)"; + + @Test + public void testBatchLoad() throws Exception { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(true); + + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + + List> result = + type.batchLoad(ImmutableList.of(TEST_INSTANCE_URN), getMockAllowContext()); + + assertEquals(result.size(), 1); + } + + @Test + public void testBatchLoadFeatureFlagDisabled() throws Exception { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(false); + + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + + List> result = + type.batchLoad(ImmutableList.of(TEST_INSTANCE_URN), getMockAllowContext()); + + assertEquals(result.size(), 0); + + Mockito.verify(mockClient, Mockito.never()) + .batchGetV2( + any(), + Mockito.anyString(), + Mockito.anySet(), + Mockito.anySet()); + } + + @Test(expectedExceptions = RuntimeException.class) + public void testBatchLoadClientException() throws Exception { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(true); + + Mockito.doThrow(RemoteInvocationException.class) + .when(mockClient) + .batchGetV2(any(), Mockito.anyString(), Mockito.anySet(), Mockito.anySet()); + + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + type.batchLoad(ImmutableList.of(TEST_INSTANCE_URN), getMockAllowContext()); + } + + @Test + public void testGetType() { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + + assertEquals(type.type(), EntityType.DATA_PROCESS_INSTANCE); + } + + @Test + public void testObjectClass() { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + + assertEquals(type.objectClass(), DataProcessInstance.class); + } +} \ No newline at end of file diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java new file mode 100644 index 00000000000000..7b8b8ef247d65e --- /dev/null +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java @@ -0,0 +1,29 @@ +package com.linkedin.datahub.graphql.types.dataprocessinst.mappers; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNotNull; + +import com.google.common.collect.ImmutableMap; +import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.generated.DataProcessInstance; +import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspectMap; +import org.testng.annotations.Test; + +public class DataProcessInstanceMapperTest { + + @Test + public void testMap() throws Exception { + EntityResponse entityResponse = new EntityResponse(); + Urn urn = Urn.createFromString("urn:li:dataProcessInstance:(test-workflow,test-instance)"); + entityResponse.setUrn(urn); + entityResponse.setAspects(new EnvelopedAspectMap(ImmutableMap.of())); + + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + + assertNotNull(instance); + assertEquals(instance.getUrn(), urn.toString()); + assertEquals(instance.getType(), EntityType.DATA_PROCESS_INSTANCE); + } +} \ No newline at end of file From 8fe7725ca795d315aa1815749de1670c08be685c Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Fri, 10 Jan 2025 09:30:51 +0900 Subject: [PATCH 12/19] fix spotlessjavacheck --- .../TimeStampToAuditStampMapperTest.java | 4 +- .../DataPlatformInstanceAspectMapperTest.java | 22 +-- .../DataProcessInstanceTypeTest.java | 127 ++++++++---------- .../DataProcessInstanceMapperTest.java | 24 ++-- 4 files changed, 81 insertions(+), 96 deletions(-) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapperTest.java index f25b4849c8bd7a..4e0dbd7b1733b4 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapperTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/common/mappers/TimeStampToAuditStampMapperTest.java @@ -40,7 +40,7 @@ public void testMapWithoutActor() { @Test public void testMapNull() { AuditStamp result = TimeStampToAuditStampMapper.map(null, null); - + assertNull(result); } -} \ No newline at end of file +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataplatforminstance/mapper/DataPlatformInstanceAspectMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataplatforminstance/mapper/DataPlatformInstanceAspectMapperTest.java index 2009428556f601..479d7340fef945 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataplatforminstance/mapper/DataPlatformInstanceAspectMapperTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataplatforminstance/mapper/DataPlatformInstanceAspectMapperTest.java @@ -5,8 +5,8 @@ import com.linkedin.common.urn.DataPlatformUrn; import com.linkedin.common.urn.Urn; import com.linkedin.datahub.graphql.generated.DataPlatformInstance; -import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.datahub.graphql.types.common.mappers.DataPlatformInstanceAspectMapper; import org.testng.annotations.Test; public class DataPlatformInstanceAspectMapperTest { @@ -14,8 +14,9 @@ public class DataPlatformInstanceAspectMapperTest { private static final String TEST_PLATFORM = "hive"; private static final String TEST_INSTANCE = "prod"; private static final String TEST_PLATFORM_URN = "urn:li:dataPlatform:" + TEST_PLATFORM; - private static final String TEST_INSTANCE_URN = - String.format("urn:li:dataPlatformInstance:(urn:li:dataPlatform:%s,%s)", TEST_PLATFORM, TEST_INSTANCE); + private static final String TEST_INSTANCE_URN = + String.format( + "urn:li:dataPlatformInstance:(urn:li:dataPlatform:%s,%s)", TEST_PLATFORM, TEST_INSTANCE); @Test public void testMapWithInstance() throws Exception { @@ -23,7 +24,7 @@ public void testMapWithInstance() throws Exception { com.linkedin.common.DataPlatformInstance input = new com.linkedin.common.DataPlatformInstance(); DataPlatformUrn platformUrn = new DataPlatformUrn(TEST_PLATFORM); Urn instanceUrn = Urn.createFromString(TEST_INSTANCE_URN); - + input.setPlatform(platformUrn); input.setInstance(instanceUrn); @@ -33,7 +34,7 @@ public void testMapWithInstance() throws Exception { assertNotNull(result); assertEquals(result.getType(), EntityType.DATA_PLATFORM_INSTANCE); assertEquals(result.getUrn(), TEST_INSTANCE_URN); - + // Verify platform mapping assertNotNull(result.getPlatform()); assertEquals(result.getPlatform().getType(), EntityType.DATA_PLATFORM); @@ -51,9 +52,9 @@ public void testMapWithoutInstance() throws Exception { DataPlatformInstance result = DataPlatformInstanceAspectMapper.map(null, input); assertNotNull(result); - assertNull(result.getType()); // Type should be null when no instance - assertNull(result.getUrn()); // URN should be null when no instance - + assertNull(result.getType()); // Type should be null when no instance + assertNull(result.getUrn()); // URN should be null when no instance + // Verify platform is still mapped correctly assertNotNull(result.getPlatform()); assertEquals(result.getPlatform().getType(), EntityType.DATA_PLATFORM); @@ -68,6 +69,7 @@ public void testMapNull() { @Test public void testSingleton() { assertNotNull(DataPlatformInstanceAspectMapper.INSTANCE); - assertSame(DataPlatformInstanceAspectMapper.INSTANCE, DataPlatformInstanceAspectMapper.INSTANCE); + assertSame( + DataPlatformInstanceAspectMapper.INSTANCE, DataPlatformInstanceAspectMapper.INSTANCE); } -} \ No newline at end of file +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java index 850ee6b3a3213d..60849c50cbaa7b 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java @@ -5,98 +5,81 @@ import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.linkedin.common.urn.Urn; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.EntityType; -import com.linkedin.datahub.graphql.featureflags.FeatureFlags; -import com.linkedin.entity.Aspect; -import com.linkedin.entity.EntityResponse; -import com.linkedin.entity.EnvelopedAspect; -import com.linkedin.entity.EnvelopedAspectMap; import com.linkedin.entity.client.EntityClient; -import com.linkedin.metadata.Constants; -import com.linkedin.dataprocess.DataProcessInstanceProperties; -import com.linkedin.common.AuditStamp; -import com.linkedin.data.template.StringMap; import com.linkedin.r2.RemoteInvocationException; import graphql.execution.DataFetcherResult; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; -import java.util.Map; import org.mockito.Mockito; import org.testng.annotations.Test; public class DataProcessInstanceTypeTest { - private static final String TEST_INSTANCE_URN = "urn:li:dataProcessInstance:(test-workflow,test-instance-1)"; + private static final String TEST_INSTANCE_URN = + "urn:li:dataProcessInstance:(test-workflow,test-instance-1)"; - @Test - public void testBatchLoad() throws Exception { - EntityClient mockClient = Mockito.mock(EntityClient.class); - FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); - Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(true); + @Test + public void testBatchLoad() throws Exception { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(true); - DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); - List> result = - type.batchLoad(ImmutableList.of(TEST_INSTANCE_URN), getMockAllowContext()); + List> result = + type.batchLoad(ImmutableList.of(TEST_INSTANCE_URN), getMockAllowContext()); - assertEquals(result.size(), 1); - } + assertEquals(result.size(), 1); + } - @Test - public void testBatchLoadFeatureFlagDisabled() throws Exception { - EntityClient mockClient = Mockito.mock(EntityClient.class); - FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); - Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(false); + @Test + public void testBatchLoadFeatureFlagDisabled() throws Exception { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(false); - DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); - List> result = - type.batchLoad(ImmutableList.of(TEST_INSTANCE_URN), getMockAllowContext()); + List> result = + type.batchLoad(ImmutableList.of(TEST_INSTANCE_URN), getMockAllowContext()); - assertEquals(result.size(), 0); + assertEquals(result.size(), 0); - Mockito.verify(mockClient, Mockito.never()) - .batchGetV2( - any(), - Mockito.anyString(), - Mockito.anySet(), - Mockito.anySet()); - } + Mockito.verify(mockClient, Mockito.never()) + .batchGetV2(any(), Mockito.anyString(), Mockito.anySet(), Mockito.anySet()); + } - @Test(expectedExceptions = RuntimeException.class) - public void testBatchLoadClientException() throws Exception { - EntityClient mockClient = Mockito.mock(EntityClient.class); - FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); - Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(true); + @Test(expectedExceptions = RuntimeException.class) + public void testBatchLoadClientException() throws Exception { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(true); - Mockito.doThrow(RemoteInvocationException.class) - .when(mockClient) - .batchGetV2(any(), Mockito.anyString(), Mockito.anySet(), Mockito.anySet()); + Mockito.doThrow(RemoteInvocationException.class) + .when(mockClient) + .batchGetV2(any(), Mockito.anyString(), Mockito.anySet(), Mockito.anySet()); - DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); - type.batchLoad(ImmutableList.of(TEST_INSTANCE_URN), getMockAllowContext()); - } - - @Test - public void testGetType() { - EntityClient mockClient = Mockito.mock(EntityClient.class); - FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); - DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); - - assertEquals(type.type(), EntityType.DATA_PROCESS_INSTANCE); - } - - @Test - public void testObjectClass() { - EntityClient mockClient = Mockito.mock(EntityClient.class); - FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); - DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); - - assertEquals(type.objectClass(), DataProcessInstance.class); - } -} \ No newline at end of file + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + type.batchLoad(ImmutableList.of(TEST_INSTANCE_URN), getMockAllowContext()); + } + + @Test + public void testGetType() { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + + assertEquals(type.type(), EntityType.DATA_PROCESS_INSTANCE); + } + + @Test + public void testObjectClass() { + EntityClient mockClient = Mockito.mock(EntityClient.class); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + DataProcessInstanceType type = new DataProcessInstanceType(mockClient, mockFeatureFlags); + + assertEquals(type.objectClass(), DataProcessInstance.class); + } +} diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java index 7b8b8ef247d65e..0c28b8b04c3fa8 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java @@ -13,17 +13,17 @@ public class DataProcessInstanceMapperTest { - @Test - public void testMap() throws Exception { - EntityResponse entityResponse = new EntityResponse(); - Urn urn = Urn.createFromString("urn:li:dataProcessInstance:(test-workflow,test-instance)"); - entityResponse.setUrn(urn); - entityResponse.setAspects(new EnvelopedAspectMap(ImmutableMap.of())); + @Test + public void testMap() throws Exception { + EntityResponse entityResponse = new EntityResponse(); + Urn urn = Urn.createFromString("urn:li:dataProcessInstance:(test-workflow,test-instance)"); + entityResponse.setUrn(urn); + entityResponse.setAspects(new EnvelopedAspectMap(ImmutableMap.of())); - DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); - assertNotNull(instance); - assertEquals(instance.getUrn(), urn.toString()); - assertEquals(instance.getType(), EntityType.DATA_PROCESS_INSTANCE); - } -} \ No newline at end of file + assertNotNull(instance); + assertEquals(instance.getUrn(), urn.toString()); + assertEquals(instance.getType(), EntityType.DATA_PROCESS_INSTANCE); + } +} From d024daa5a136aef06161b2c314d233e630478ebf Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Thu, 9 Jan 2025 16:44:47 -0800 Subject: [PATCH 13/19] refactor lineage fields out to a shared class --- .../ml/metadata/MLModelGroupProperties.pdl | 14 +------- .../ml/metadata/MLModelLineageInfo.pdl | 35 +++++++++++++++++++ .../ml/metadata/MLModelProperties.pdl | 27 +------------- 3 files changed, 37 insertions(+), 39 deletions(-) create mode 100644 metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelLineageInfo.pdl diff --git a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl index 81c5e7a240f618..b9e364bee8c65a 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl @@ -12,7 +12,7 @@ import com.linkedin.common.TimeStamp @Aspect = { "name": "mlModelGroupProperties" } -record MLModelGroupProperties includes CustomProperties { +record MLModelGroupProperties includes CustomProperties, MLModelLineageInfo { /** * Display name of the MLModelGroup @@ -50,18 +50,6 @@ record MLModelGroupProperties includes CustomProperties { */ lastModified: optional TimeStamp - /** - * List of jobs (if any) used to train the model group. Visible in Lineage. - */ - @Relationship = { - "/*": { - "name": "TrainedBy", - "entityTypes": [ "dataJob" ], - "isLineage": true - } - } - trainingJobs: optional array[Urn] - /** * Version of the MLModelGroup */ diff --git a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelLineageInfo.pdl b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelLineageInfo.pdl new file mode 100644 index 00000000000000..4c17d6e6ab1a00 --- /dev/null +++ b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelLineageInfo.pdl @@ -0,0 +1,35 @@ +namespace com.linkedin.ml.metadata +import com.linkedin.common.Urn + + +/** +* A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups +*/ +record MLModelLineageInfo { + + /** + * List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect. + */ + @Relationship = { + "/*": { + "name": "TrainedBy", + "entityTypes": [ "dataJob", "dataProcessInstance" ], + "isLineage": true + } + } + trainingJobs: optional array[Urn] + + /** + * List of jobs or process instances (if any) that use the model or group. + */ + @Relationship = { + "/*": { + "name": "UsedBy", + "entityTypes": [ "dataJob", "dataProcessInstance" ], + "isLineage": true, + "isUpstream": false + } + } + downstreamJobs: optional array[Urn] + +} \ No newline at end of file diff --git a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl index d89d07384bba1d..ac10e0add13a1c 100644 --- a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl +++ b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl @@ -14,7 +14,7 @@ import com.linkedin.common.TimeStamp @Aspect = { "name": "mlModelProperties" } -record MLModelProperties includes CustomProperties, ExternalReference { +record MLModelProperties includes CustomProperties, ExternalReference, MLModelLineageInfo { /** * Display name of the MLModel @@ -116,31 +116,6 @@ record MLModelProperties includes CustomProperties, ExternalReference { } deployments: optional array[Urn] - /** - * List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect. - */ - @Relationship = { - "/*": { - "name": "TrainedBy", - "entityTypes": [ "dataJob", "dataProcessInstance" ], - "isLineage": true - } - } - trainingJobs: optional array[Urn] - - /** - * List of jobs (if any) that use the model - */ - @Relationship = { - "/*": { - "name": "UsedBy", - "entityTypes": [ "dataJob" ], - "isLineage": true, - "isUpstream": false - } - } - downstreamJobs: optional array[Urn] - /** * Groups the model belongs to */ From 6e110c00bf24ef149f6ab9d750276e53f895f50f Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Thu, 9 Jan 2025 19:29:13 -0600 Subject: [PATCH 14/19] improve tests and fixup mapper --- .../graphql/resolvers/search/SearchUtils.java | 3 +- .../mappers/DataProcessInstanceMapper.java | 20 +-- .../DataProcessInstanceTypeTest.java | 159 ++++++++++++++++++ .../DataProcessInstanceMapperTest.java | 110 +++++++++++- .../java/com/linkedin/metadata/Constants.java | 4 + 5 files changed, 276 insertions(+), 20 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java index 6aeb7c7a3a94a5..a01b3aaec9c982 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/SearchUtils.java @@ -75,8 +75,7 @@ private SearchUtils() {} EntityType.DATA_PRODUCT, EntityType.NOTEBOOK, EntityType.BUSINESS_ATTRIBUTE, - EntityType.SCHEMA_FIELD, - EntityType.DATA_PROCESS_INSTANCE); + EntityType.SCHEMA_FIELD); /** Entities that are part of autocomplete by default in Auto Complete Across Entities */ public static final List AUTO_COMPLETE_ENTITY_TYPES = diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java index ff278e551c31ee..e3cbcdf709326b 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java @@ -66,16 +66,14 @@ public DataProcessInstance apply( Urn entityUrn = entityResponse.getUrn(); EnvelopedAspectMap aspectMap = entityResponse.getAspects(); MappingHelper mappingHelper = new MappingHelper<>(aspectMap, result); - mappingHelper = new MappingHelper<>(aspectMap, result); mappingHelper.mapToResult( DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, - (dataProcessInstance, dataMap) -> { - try { - mapTrainingRunProperties(context, dataProcessInstance, dataMap, entityUrn); - } catch (Exception e) { - mapDataProcessProperties(context, dataProcessInstance, dataMap, entityUrn); - } - }); + (dataProcessInstance, dataMap) -> + mapDataProcessProperties(context, dataProcessInstance, dataMap, entityUrn)); + mappingHelper.mapToResult( + ML_TRAINING_RUN_PROPERTIES_ASPECT_NAME, + (dataProcessInstance, dataMap) -> + mapTrainingRunProperties(context, dataProcessInstance, dataMap)); mappingHelper.mapToResult( DATA_PLATFORM_INSTANCE_ASPECT_NAME, (dataProcessInstance, dataMap) -> { @@ -99,10 +97,7 @@ public DataProcessInstance apply( } private void mapTrainingRunProperties( - @Nonnull QueryContext context, - @Nonnull DataProcessInstance dpi, - @Nonnull DataMap dataMap, - @Nonnull Urn entityUrn) { + @Nonnull QueryContext context, @Nonnull DataProcessInstance dpi, @Nonnull DataMap dataMap) { MLTrainingRunProperties trainingProperties = new MLTrainingRunProperties(dataMap); com.linkedin.datahub.graphql.generated.MLTrainingRunProperties properties = @@ -128,6 +123,7 @@ private void mapTrainingRunProperties( if (trainingProperties.hasId()) { properties.setId(trainingProperties.getId()); } + dpi.setMlTrainingRunProperties(properties); } private void mapDataProcessProperties( diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java index 60849c50cbaa7b..38556261c71a01 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java @@ -5,13 +5,48 @@ import static org.testng.Assert.*; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.FabricType; +import com.linkedin.common.Status; +import com.linkedin.common.SubTypes; +import com.linkedin.common.UrnArray; +import com.linkedin.common.urn.DataPlatformUrn; +import com.linkedin.common.urn.DatasetUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.UrnUtils; +import com.linkedin.container.Container; +import com.linkedin.data.template.StringArray; +import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.featureflags.FeatureFlags; import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.dataprocess.DataProcessInstanceInput; +import com.linkedin.dataprocess.DataProcessInstanceOutput; +import com.linkedin.dataprocess.DataProcessInstanceProperties; +import com.linkedin.dataprocess.DataProcessInstanceRelationships; +import com.linkedin.dataprocess.DataProcessInstanceRunEvent; +import com.linkedin.dataprocess.DataProcessRunStatus; +import com.linkedin.dataprocess.DataProcessType; +import com.linkedin.entity.Aspect; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.entity.EnvelopedAspectMap; import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.Constants; +import com.linkedin.metadata.key.DataProcessInstanceKey; +import com.linkedin.ml.metadata.MLTrainingRunProperties; import com.linkedin.r2.RemoteInvocationException; +import com.linkedin.test.TestResult; +import com.linkedin.test.TestResultArray; +import com.linkedin.test.TestResultType; +import com.linkedin.test.TestResults; import graphql.execution.DataFetcherResult; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; import org.mockito.Mockito; import org.testng.annotations.Test; @@ -19,6 +54,130 @@ public class DataProcessInstanceTypeTest { private static final String TEST_INSTANCE_URN = "urn:li:dataProcessInstance:(test-workflow,test-instance-1)"; + private static final String TEST_DPI_1_URN = "urn:li:dataProcessInstance:id-1"; + private static final DatasetUrn DATASET_URN = + new DatasetUrn(new DataPlatformUrn("kafka"), "dataset1", FabricType.TEST); + private static final Urn DPI_URN_REL = UrnUtils.getUrn("urn:li:dataProcessInstance:id-2"); + private static final DataProcessInstanceKey TEST_DPI_1_KEY = + new DataProcessInstanceKey().setId("id-1"); + private static final DataProcessInstanceProperties TEST_DPI_1_PROPERTIES = + new DataProcessInstanceProperties().setName("Test DPI").setType(DataProcessType.STREAMING); + private static final DataProcessInstanceInput TEST_DPI_1_DPI_INPUT = + new DataProcessInstanceInput().setInputs(new UrnArray(ImmutableList.of(DATASET_URN))); + private static final DataProcessInstanceOutput TEST_DPI_1_DPI_OUTPUT = + new DataProcessInstanceOutput().setOutputs(new UrnArray(ImmutableList.of(DATASET_URN))); + private static final DataProcessInstanceRelationships TEST_DPI_1_DPI_RELATIONSHIPS = + new DataProcessInstanceRelationships() + .setParentInstance(DPI_URN_REL) + .setUpstreamInstances(new UrnArray(ImmutableList.of(DPI_URN_REL))) + .setParentTemplate(DPI_URN_REL); + private static final DataProcessInstanceRunEvent TEST_DPI_1_DPI_RUN_EVENT = + new DataProcessInstanceRunEvent().setStatus(DataProcessRunStatus.COMPLETE); + private static final DataPlatformInstance TEST_DPI_1_DATA_PLATFORM_INSTANCE = + new DataPlatformInstance().setPlatform(new DataPlatformUrn("kafka")); + private static final Status TEST_DPI_1_STATUS = new Status().setRemoved(false); + private static final TestResults TEST_DPI_1_TEST_RESULTS = + new TestResults() + .setPassing( + new TestResultArray( + ImmutableList.of( + new TestResult() + .setTest(UrnUtils.getUrn("urn:li:test:123")) + .setType(TestResultType.SUCCESS)))) + .setFailing(new TestResultArray()); + private static final SubTypes TEST_DPI_1_SUB_TYPES = + new SubTypes().setTypeNames(new StringArray("subtype1")); + private static final Container TEST_DPI_1_CONTAINER = + new Container().setContainer(UrnUtils.getUrn("urn:li:container:123")); + private static final MLTrainingRunProperties ML_TRAINING_RUN_PROPERTIES = + new MLTrainingRunProperties().setId("mytrainingrun"); + + private static final String TEST_DPI_2_URN = "urn:li:dataProcessInstance:id-2"; + + @Test + public void testBatchLoadFull() throws Exception { + EntityClient client = Mockito.mock(EntityClient.class); + + Urn dpiUrn1 = Urn.createFromString(TEST_DPI_1_URN); + Urn dpiUrn2 = Urn.createFromString(TEST_DPI_2_URN); + + Map aspectMap = new HashMap<>(); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_KEY_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_KEY.data()))); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_PROPERTIES.data()))); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_INPUT_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_DPI_INPUT.data()))); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_OUTPUT_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_DPI_OUTPUT.data()))); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_DPI_RELATIONSHIPS.data()))); + aspectMap.put( + Constants.DATA_PROCESS_INSTANCE_RUN_EVENT_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_DPI_RUN_EVENT.data()))); + aspectMap.put( + Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_DATA_PLATFORM_INSTANCE.data()))); + aspectMap.put( + Constants.STATUS_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_STATUS.data()))); + aspectMap.put( + Constants.TEST_RESULTS_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_TEST_RESULTS.data()))); + aspectMap.put( + Constants.SUB_TYPES_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_SUB_TYPES.data()))); + aspectMap.put( + Constants.CONTAINER_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(TEST_DPI_1_CONTAINER.data()))); + aspectMap.put( + Constants.ML_TRAINING_RUN_PROPERTIES_ASPECT_NAME, + new EnvelopedAspect().setValue(new Aspect(ML_TRAINING_RUN_PROPERTIES.data()))); + + Mockito.when( + client.batchGetV2( + any(), + Mockito.eq(Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME), + Mockito.eq(new HashSet<>(ImmutableSet.of(dpiUrn1, dpiUrn2))), + Mockito.eq(DataProcessInstanceType.ASPECTS_TO_FETCH))) + .thenReturn( + ImmutableMap.of( + dpiUrn1, + new EntityResponse() + .setEntityName(Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME) + .setUrn(dpiUrn1) + .setAspects(new EnvelopedAspectMap(aspectMap)))); + + DataProcessInstanceType type = + new DataProcessInstanceType(client, Mockito.mock(FeatureFlags.class)); + + QueryContext mockContext = getMockAllowContext(); + List> result = + type.batchLoad(ImmutableList.of(TEST_DPI_1_URN, TEST_DPI_2_URN), mockContext); + + // Verify response + Mockito.verify(client, Mockito.times(1)) + .batchGetV2( + any(), + Mockito.eq(Constants.DATA_PROCESS_INSTANCE_ENTITY_NAME), + Mockito.eq(ImmutableSet.of(dpiUrn1, dpiUrn2)), + Mockito.eq(DataProcessInstanceType.ASPECTS_TO_FETCH)); + + assertEquals(result.size(), 2); + + DataProcessInstance dpi1 = result.get(0).getData(); + assertEquals(dpi1.getUrn(), TEST_DPI_1_URN); + assertEquals(dpi1.getName(), "Test DPI"); + assertEquals(dpi1.getType(), EntityType.DATA_PROCESS_INSTANCE); + + // Assert second element is null + assertNull(result.get(1)); + } @Test public void testBatchLoad() throws Exception { diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java index 0c28b8b04c3fa8..dc1ce935ad5ecd 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapperTest.java @@ -3,27 +3,125 @@ import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertNotNull; -import com.google.common.collect.ImmutableMap; +import com.linkedin.common.DataPlatformInstance; +import com.linkedin.common.url.Url; import com.linkedin.common.urn.Urn; +import com.linkedin.container.Container; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.data.template.StringArray; import com.linkedin.datahub.graphql.generated.DataProcessInstance; import com.linkedin.datahub.graphql.generated.EntityType; +import com.linkedin.dataprocess.DataProcessInstanceProperties; +import com.linkedin.entity.Aspect; import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.metadata.Constants; +import com.linkedin.ml.metadata.MLTrainingRunProperties; +import java.util.HashMap; +import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; public class DataProcessInstanceMapperTest { - @Test - public void testMap() throws Exception { - EntityResponse entityResponse = new EntityResponse(); - Urn urn = Urn.createFromString("urn:li:dataProcessInstance:(test-workflow,test-instance)"); + private static final String TEST_PLATFORM_URN = "urn:li:dataPlatform:kafka"; + private static final String TEST_INSTANCE_URN = + "urn:li:dataProcessInstance:(test-workflow,test-instance)"; + private static final String TEST_CONTAINER_URN = "urn:li:container:testContainer"; + private static final String TEST_EXTERNAL_URL = "https://example.com/process"; + private static final String TEST_NAME = "Test Process Instance"; + + private EntityResponse entityResponse; + private Urn urn; + + @BeforeMethod + public void setup() throws Exception { + urn = Urn.createFromString(TEST_INSTANCE_URN); + entityResponse = new EntityResponse(); entityResponse.setUrn(urn); - entityResponse.setAspects(new EnvelopedAspectMap(ImmutableMap.of())); + entityResponse.setAspects(new EnvelopedAspectMap(new HashMap<>())); + } + @Test + public void testMapBasicFields() throws Exception { DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); assertNotNull(instance); assertEquals(instance.getUrn(), urn.toString()); assertEquals(instance.getType(), EntityType.DATA_PROCESS_INSTANCE); } + + @Test + public void testMapDataProcessProperties() throws Exception { + // Create DataProcessInstanceProperties + DataProcessInstanceProperties properties = new DataProcessInstanceProperties(); + properties.setName(TEST_NAME); + properties.setExternalUrl(new Url(TEST_EXTERNAL_URL)); + + // Add properties aspect + addAspect(Constants.DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, properties); + + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + + assertNotNull(instance.getProperties()); + assertEquals(instance.getName(), TEST_NAME); + assertEquals(instance.getExternalUrl(), TEST_EXTERNAL_URL); + } + + @Test + public void testMapPlatformInstance() throws Exception { + // Create DataPlatformInstance + DataPlatformInstance platformInstance = new DataPlatformInstance(); + platformInstance.setPlatform(Urn.createFromString(TEST_PLATFORM_URN)); + + // Add platform instance aspect + addAspect(Constants.DATA_PLATFORM_INSTANCE_ASPECT_NAME, platformInstance); + + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + + assertNotNull(instance.getDataPlatformInstance()); + assertNotNull(instance.getPlatform()); + assertEquals(instance.getPlatform().getUrn(), TEST_PLATFORM_URN); + assertEquals(instance.getPlatform().getType(), EntityType.DATA_PLATFORM); + } + + @Test + public void testMapContainer() throws Exception { + // Create Container aspect + Container container = new Container(); + container.setContainer(Urn.createFromString(TEST_CONTAINER_URN)); + + // Add container aspect + addAspect(Constants.CONTAINER_ASPECT_NAME, container); + + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + + assertNotNull(instance.getContainer()); + assertEquals(instance.getContainer().getUrn(), TEST_CONTAINER_URN); + assertEquals(instance.getContainer().getType(), EntityType.CONTAINER); + } + + @Test + public void testMapMLTrainingProperties() throws Exception { + // Create MLTrainingRunProperties + MLTrainingRunProperties trainingProperties = new MLTrainingRunProperties(); + trainingProperties.setId("test-run-id"); + trainingProperties.setOutputUrls(new StringArray("s3://test-bucket/model")); + + // Add ML training properties aspect + addAspect(Constants.ML_TRAINING_RUN_PROPERTIES_ASPECT_NAME, trainingProperties); + + DataProcessInstance instance = DataProcessInstanceMapper.map(null, entityResponse); + + assertNotNull(instance); + assertEquals(instance.getMlTrainingRunProperties().getId(), "test-run-id"); + assertEquals( + instance.getMlTrainingRunProperties().getOutputUrls().get(0), "s3://test-bucket/model"); + } + + private void addAspect(String aspectName, RecordTemplate aspect) { + EnvelopedAspect envelopedAspect = new EnvelopedAspect(); + envelopedAspect.setValue(new Aspect(aspect.data())); + entityResponse.getAspects().put(aspectName, envelopedAspect); + } } diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index 42080e4e17596e..01c33a2530efb5 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -421,6 +421,10 @@ public class Constants { "dataProcessInstanceRunEvent"; public static final String DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME = "dataProcessInstanceRelationships"; + public static final String DATA_PROCESS_INSTANCE_INPUT_ASPECT_NAME = "dataProcessInstanceInput"; + public static final String DATA_PROCESS_INSTANCE_OUTPUT_ASPECT_NAME = "dataProcessInstanceOutput"; + public static final String DATA_PROCESS_INSTANCE_KEY_ASPECT_NAME = "dataProcessInstanceKey"; + public static final String ML_TRAINING_RUN_PROPERTIES_ASPECT_NAME = "mlTrainingRunProperties"; // Business Attribute public static final String BUSINESS_ATTRIBUTE_KEY_ASPECT_NAME = "businessAttributeKey"; From 6990d65cb8989d23d069fb779068d4efc2043ceb Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Fri, 10 Jan 2025 15:14:53 +0900 Subject: [PATCH 15/19] revert build.gradle & make mlmodel.properties.name optional --- datahub-graphql-core/build.gradle | 35 -------- .../src/main/resources/entity.graphql | 2 +- .../com.linkedin.entity.aspects.snapshot.json | 71 ++++++++------- ...com.linkedin.entity.entities.snapshot.json | 88 ++++++++----------- .../com.linkedin.entity.runs.snapshot.json | 71 ++++++++------- ...nkedin.operations.operations.snapshot.json | 71 ++++++++------- ...m.linkedin.platform.platform.snapshot.json | 88 ++++++++----------- 7 files changed, 193 insertions(+), 233 deletions(-) diff --git a/datahub-graphql-core/build.gradle b/datahub-graphql-core/build.gradle index a0d52f8ff3a10f..ed9cf19a023154 100644 --- a/datahub-graphql-core/build.gradle +++ b/datahub-graphql-core/build.gradle @@ -33,10 +33,6 @@ dependencies { graphqlCodegen { // For options: https://github.com/kobylynskyi/graphql-java-codegen/blob/master/docs/codegen-options.md - graphqlSchemaPaths = ( - fileTree(dir: "${projectDir}/src/main/resources", include: "*.graphql").files + - fileTree(dir: "${projectDir}/src/main/resources/graphql/schema", include: "**/*.graphql").files - ).collect { it.absolutePath } outputDir = new File("${projectDir}/src/mainGeneratedGraphQL/java") packageName = "com.linkedin.datahub.graphql.generated" generateToString = true @@ -49,37 +45,6 @@ graphqlCodegen { ] } -task debugSchemaLoading { - doLast { - - def extensions = fileTree(dir: "${projectDir}/src/main/resources/graphql/schema") - .include("**/*.graphql") - .files - - println "\nExtensions found:" - extensions.each { file -> - println "- ${file.absolutePath}" - } - } -} - -task listGraphQLFiles { - doLast { - def baseFiles = fileTree(dir: "${projectDir}/src/main/resources", include: "*.graphql") - def extensionFiles = fileTree(dir: "${projectDir}/src/main/resources/graphql/schema", include: "**/*.graphql") - - println "\nBase GraphQL files found:" - baseFiles.each { file -> - println "- ${file.absolutePath}" - } - - println "\nExtension GraphQL files found:" - extensionFiles.each { file -> - println "- ${file.absolutePath}" - } - } -} - clean { delete 'src/mainGeneratedGraphQL' } diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql index adb24d92587b58..9dd1948e18e042 100644 --- a/datahub-graphql-core/src/main/resources/entity.graphql +++ b/datahub-graphql-core/src/main/resources/entity.graphql @@ -10098,7 +10098,7 @@ type MLModelProperties { """ The display name of the model used in the UI """ - name: String! + name: String """ Detailed description of the model's purpose and characteristics diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json index 1c713fd33884b5..432c4a9ddcb73f 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json @@ -3827,7 +3827,43 @@ "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with a ML Model\r", - "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { + "type" : "record", + "name" : "MLModelLineageInfo", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "fields" : [ { + "name" : "trainingJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "name" : "TrainedBy" + } + } + }, { + "name" : "downstreamJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "isUpstream" : false, + "name" : "UsedBy" + } + } + } ] + } ], "fields" : [ { "name" : "name", "type" : "string", @@ -4005,37 +4041,6 @@ "name" : "DeployedTo" } } - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob", "dataProcessInstance" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } - }, { - "name" : "downstreamJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) that use the model\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "isUpstream" : false, - "name" : "UsedBy" - } - } }, { "name" : "groups", "type" : { @@ -4213,7 +4218,7 @@ }, "doc" : "The order to sort the results i.e. ASCENDING or DESCENDING" } ] - }, "com.linkedin.metadata.query.filter.SortOrder", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.mxe.GenericAspect", { + }, "com.linkedin.metadata.query.filter.SortOrder", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelLineageInfo", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.mxe.GenericAspect", { "type" : "record", "name" : "MetadataChangeProposal", "namespace" : "com.linkedin.mxe", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json index 77d4644f3c121a..45e91873de10ff 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json @@ -3985,7 +3985,43 @@ "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with a ML Model\r", - "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { + "type" : "record", + "name" : "MLModelLineageInfo", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "fields" : [ { + "name" : "trainingJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "name" : "TrainedBy" + } + } + }, { + "name" : "downstreamJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "isUpstream" : false, + "name" : "UsedBy" + } + } + } ] + } ], "fields" : [ { "name" : "name", "type" : "string", @@ -4163,37 +4199,6 @@ "name" : "DeployedTo" } } - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob", "dataProcessInstance" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } - }, { - "name" : "downstreamJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) that use the model\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "isUpstream" : false, - "name" : "UsedBy" - } - } }, { "name" : "groups", "type" : { @@ -5004,7 +5009,7 @@ "name" : "MLModelGroupProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with an ML Model Group\r", - "include" : [ "com.linkedin.common.CustomProperties" ], + "include" : [ "com.linkedin.common.CustomProperties", "MLModelLineageInfo" ], "fields" : [ { "name" : "name", "type" : "string", @@ -5041,21 +5046,6 @@ "type" : "com.linkedin.common.TimeStamp", "doc" : "Date when the MLModelGroup was last modified\r", "optional" : true - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model group. Visible in Lineage.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } }, { "name" : "version", "type" : "com.linkedin.common.VersionTag", @@ -6700,7 +6690,7 @@ "type" : "int", "doc" : "The total number of entities directly under searched path" } ] - }, "com.linkedin.metadata.search.SearchResultMetadata", "com.linkedin.metadata.search.SearchSuggestion", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataHubRetentionSnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", { + }, "com.linkedin.metadata.search.SearchResultMetadata", "com.linkedin.metadata.search.SearchSuggestion", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataHubRetentionSnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelLineageInfo", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", { "type" : "record", "name" : "SystemMetadata", "namespace" : "com.linkedin.mxe", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json index 8b6def75f7a665..9061cbff188135 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json @@ -3551,7 +3551,43 @@ "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with a ML Model\r", - "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { + "type" : "record", + "name" : "MLModelLineageInfo", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "fields" : [ { + "name" : "trainingJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "name" : "TrainedBy" + } + } + }, { + "name" : "downstreamJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "isUpstream" : false, + "name" : "UsedBy" + } + } + } ] + } ], "fields" : [ { "name" : "name", "type" : "string", @@ -3729,37 +3765,6 @@ "name" : "DeployedTo" } } - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob", "dataProcessInstance" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } - }, { - "name" : "downstreamJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) that use the model\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "isUpstream" : false, - "name" : "UsedBy" - } - } }, { "name" : "groups", "type" : { @@ -4002,7 +4007,7 @@ } } } ] - }, "com.linkedin.metadata.run.UnsafeEntityInfo", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties" ], + }, "com.linkedin.metadata.run.UnsafeEntityInfo", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelLineageInfo", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties" ], "schema" : { "name" : "runs", "namespace" : "com.linkedin.entity", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json index e4cc5c42303ee2..e6be4e828c976f 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json @@ -3545,7 +3545,43 @@ "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with a ML Model\r", - "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { + "type" : "record", + "name" : "MLModelLineageInfo", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "fields" : [ { + "name" : "trainingJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "name" : "TrainedBy" + } + } + }, { + "name" : "downstreamJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "isUpstream" : false, + "name" : "UsedBy" + } + } + } ] + } ], "fields" : [ { "name" : "name", "type" : "string", @@ -3723,37 +3759,6 @@ "name" : "DeployedTo" } } - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob", "dataProcessInstance" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } - }, { - "name" : "downstreamJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) that use the model\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "isUpstream" : false, - "name" : "UsedBy" - } - } }, { "name" : "groups", "type" : { @@ -3908,7 +3913,7 @@ "name" : "version", "type" : "long" } ] - }, "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.TagKey", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties", { + }, "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.TagKey", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelLineageInfo", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", "com.linkedin.schema.ArrayType", "com.linkedin.schema.BinaryJsonSchema", "com.linkedin.schema.BooleanType", "com.linkedin.schema.BytesType", "com.linkedin.schema.DatasetFieldForeignKey", "com.linkedin.schema.DateType", "com.linkedin.schema.EditableSchemaFieldInfo", "com.linkedin.schema.EditableSchemaMetadata", "com.linkedin.schema.EnumType", "com.linkedin.schema.EspressoSchema", "com.linkedin.schema.FixedType", "com.linkedin.schema.ForeignKeyConstraint", "com.linkedin.schema.ForeignKeySpec", "com.linkedin.schema.KafkaSchema", "com.linkedin.schema.KeyValueSchema", "com.linkedin.schema.MapType", "com.linkedin.schema.MySqlDDL", "com.linkedin.schema.NullType", "com.linkedin.schema.NumberType", "com.linkedin.schema.OracleDDL", "com.linkedin.schema.OrcSchema", "com.linkedin.schema.OtherSchema", "com.linkedin.schema.PrestoDDL", "com.linkedin.schema.RecordType", "com.linkedin.schema.SchemaField", "com.linkedin.schema.SchemaFieldDataType", "com.linkedin.schema.SchemaMetadata", "com.linkedin.schema.SchemaMetadataKey", "com.linkedin.schema.Schemaless", "com.linkedin.schema.StringType", "com.linkedin.schema.TimeType", "com.linkedin.schema.UnionType", "com.linkedin.schema.UrnForeignKey", "com.linkedin.tag.TagProperties", { "type" : "record", "name" : "TimeseriesIndexSizeResult", "namespace" : "com.linkedin.timeseries", diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json index e375ac698ab516..10f3218d469757 100644 --- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json +++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json @@ -3979,7 +3979,43 @@ "name" : "MLModelProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with a ML Model\r", - "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ], + "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference", { + "type" : "record", + "name" : "MLModelLineageInfo", + "doc" : "A set of re-usable fields used to capture lineage information for ML Models and ML Model Groups\r", + "fields" : [ { + "name" : "trainingJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) used to train the model or group. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "name" : "TrainedBy" + } + } + }, { + "name" : "downstreamJobs", + "type" : { + "type" : "array", + "items" : "com.linkedin.common.Urn" + }, + "doc" : "List of jobs or process instances (if any) that use the model or group.\r", + "optional" : true, + "Relationship" : { + "/*" : { + "entityTypes" : [ "dataJob", "dataProcessInstance" ], + "isLineage" : true, + "isUpstream" : false, + "name" : "UsedBy" + } + } + } ] + } ], "fields" : [ { "name" : "name", "type" : "string", @@ -4157,37 +4193,6 @@ "name" : "DeployedTo" } } - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob", "dataProcessInstance" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } - }, { - "name" : "downstreamJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) that use the model\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "isUpstream" : false, - "name" : "UsedBy" - } - } }, { "name" : "groups", "type" : { @@ -4998,7 +5003,7 @@ "name" : "MLModelGroupProperties", "namespace" : "com.linkedin.ml.metadata", "doc" : "Properties associated with an ML Model Group\r", - "include" : [ "com.linkedin.common.CustomProperties" ], + "include" : [ "com.linkedin.common.CustomProperties", "MLModelLineageInfo" ], "fields" : [ { "name" : "name", "type" : "string", @@ -5035,21 +5040,6 @@ "type" : "com.linkedin.common.TimeStamp", "doc" : "Date when the MLModelGroup was last modified\r", "optional" : true - }, { - "name" : "trainingJobs", - "type" : { - "type" : "array", - "items" : "com.linkedin.common.Urn" - }, - "doc" : "List of jobs (if any) used to train the model group. Visible in Lineage.\r", - "optional" : true, - "Relationship" : { - "/*" : { - "entityTypes" : [ "dataJob" ], - "isLineage" : true, - "name" : "TrainedBy" - } - } }, { "name" : "version", "type" : "com.linkedin.common.VersionTag", @@ -5844,7 +5834,7 @@ } ] } } ] - }, "com.linkedin.glossary.GlossaryNodeInfo", "com.linkedin.glossary.GlossaryRelatedTerms", "com.linkedin.glossary.GlossaryTermInfo", "com.linkedin.identity.CorpGroupInfo", "com.linkedin.identity.CorpUserEditableInfo", "com.linkedin.identity.CorpUserInfo", "com.linkedin.identity.CorpUserStatus", "com.linkedin.identity.GroupMembership", "com.linkedin.metadata.aspect.ChartAspect", "com.linkedin.metadata.aspect.CorpGroupAspect", "com.linkedin.metadata.aspect.CorpUserAspect", "com.linkedin.metadata.aspect.DashboardAspect", "com.linkedin.metadata.aspect.DataFlowAspect", "com.linkedin.metadata.aspect.DataHubPolicyAspect", "com.linkedin.metadata.aspect.DataHubRetentionAspect", "com.linkedin.metadata.aspect.DataJobAspect", "com.linkedin.metadata.aspect.DataPlatformAspect", "com.linkedin.metadata.aspect.DataProcessAspect", "com.linkedin.metadata.aspect.DatasetAspect", "com.linkedin.metadata.aspect.GlossaryNodeAspect", "com.linkedin.metadata.aspect.GlossaryTermAspect", "com.linkedin.metadata.aspect.MLFeatureAspect", "com.linkedin.metadata.aspect.MLFeatureTableAspect", "com.linkedin.metadata.aspect.MLModelAspect", "com.linkedin.metadata.aspect.MLModelDeploymentAspect", "com.linkedin.metadata.aspect.MLModelGroupAspect", "com.linkedin.metadata.aspect.MLPrimaryKeyAspect", "com.linkedin.metadata.aspect.SchemaFieldAspect", "com.linkedin.metadata.aspect.TagAspect", "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataHubPolicyKey", "com.linkedin.metadata.key.DataHubRetentionKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.DataPlatformKey", "com.linkedin.metadata.key.DataProcessKey", "com.linkedin.metadata.key.DatasetKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLFeatureTableKey", "com.linkedin.metadata.key.MLModelDeploymentKey", "com.linkedin.metadata.key.MLModelGroupKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.MLPrimaryKeyKey", "com.linkedin.metadata.key.SchemaFieldKey", "com.linkedin.metadata.key.TagKey", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataHubRetentionSnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", { + }, "com.linkedin.glossary.GlossaryNodeInfo", "com.linkedin.glossary.GlossaryRelatedTerms", "com.linkedin.glossary.GlossaryTermInfo", "com.linkedin.identity.CorpGroupInfo", "com.linkedin.identity.CorpUserEditableInfo", "com.linkedin.identity.CorpUserInfo", "com.linkedin.identity.CorpUserStatus", "com.linkedin.identity.GroupMembership", "com.linkedin.metadata.aspect.ChartAspect", "com.linkedin.metadata.aspect.CorpGroupAspect", "com.linkedin.metadata.aspect.CorpUserAspect", "com.linkedin.metadata.aspect.DashboardAspect", "com.linkedin.metadata.aspect.DataFlowAspect", "com.linkedin.metadata.aspect.DataHubPolicyAspect", "com.linkedin.metadata.aspect.DataHubRetentionAspect", "com.linkedin.metadata.aspect.DataJobAspect", "com.linkedin.metadata.aspect.DataPlatformAspect", "com.linkedin.metadata.aspect.DataProcessAspect", "com.linkedin.metadata.aspect.DatasetAspect", "com.linkedin.metadata.aspect.GlossaryNodeAspect", "com.linkedin.metadata.aspect.GlossaryTermAspect", "com.linkedin.metadata.aspect.MLFeatureAspect", "com.linkedin.metadata.aspect.MLFeatureTableAspect", "com.linkedin.metadata.aspect.MLModelAspect", "com.linkedin.metadata.aspect.MLModelDeploymentAspect", "com.linkedin.metadata.aspect.MLModelGroupAspect", "com.linkedin.metadata.aspect.MLPrimaryKeyAspect", "com.linkedin.metadata.aspect.SchemaFieldAspect", "com.linkedin.metadata.aspect.TagAspect", "com.linkedin.metadata.key.ChartKey", "com.linkedin.metadata.key.CorpGroupKey", "com.linkedin.metadata.key.CorpUserKey", "com.linkedin.metadata.key.DashboardKey", "com.linkedin.metadata.key.DataFlowKey", "com.linkedin.metadata.key.DataHubPolicyKey", "com.linkedin.metadata.key.DataHubRetentionKey", "com.linkedin.metadata.key.DataJobKey", "com.linkedin.metadata.key.DataPlatformKey", "com.linkedin.metadata.key.DataProcessKey", "com.linkedin.metadata.key.DatasetKey", "com.linkedin.metadata.key.GlossaryNodeKey", "com.linkedin.metadata.key.GlossaryTermKey", "com.linkedin.metadata.key.MLFeatureKey", "com.linkedin.metadata.key.MLFeatureTableKey", "com.linkedin.metadata.key.MLModelDeploymentKey", "com.linkedin.metadata.key.MLModelGroupKey", "com.linkedin.metadata.key.MLModelKey", "com.linkedin.metadata.key.MLPrimaryKeyKey", "com.linkedin.metadata.key.SchemaFieldKey", "com.linkedin.metadata.key.TagKey", "com.linkedin.metadata.snapshot.ChartSnapshot", "com.linkedin.metadata.snapshot.CorpGroupSnapshot", "com.linkedin.metadata.snapshot.CorpUserSnapshot", "com.linkedin.metadata.snapshot.DashboardSnapshot", "com.linkedin.metadata.snapshot.DataFlowSnapshot", "com.linkedin.metadata.snapshot.DataHubPolicySnapshot", "com.linkedin.metadata.snapshot.DataHubRetentionSnapshot", "com.linkedin.metadata.snapshot.DataJobSnapshot", "com.linkedin.metadata.snapshot.DataPlatformSnapshot", "com.linkedin.metadata.snapshot.DataProcessSnapshot", "com.linkedin.metadata.snapshot.DatasetSnapshot", "com.linkedin.metadata.snapshot.GlossaryNodeSnapshot", "com.linkedin.metadata.snapshot.GlossaryTermSnapshot", "com.linkedin.metadata.snapshot.MLFeatureSnapshot", "com.linkedin.metadata.snapshot.MLFeatureTableSnapshot", "com.linkedin.metadata.snapshot.MLModelDeploymentSnapshot", "com.linkedin.metadata.snapshot.MLModelGroupSnapshot", "com.linkedin.metadata.snapshot.MLModelSnapshot", "com.linkedin.metadata.snapshot.MLPrimaryKeySnapshot", "com.linkedin.metadata.snapshot.SchemaFieldSnapshot", "com.linkedin.metadata.snapshot.Snapshot", "com.linkedin.metadata.snapshot.TagSnapshot", "com.linkedin.ml.metadata.BaseData", "com.linkedin.ml.metadata.CaveatDetails", "com.linkedin.ml.metadata.CaveatsAndRecommendations", "com.linkedin.ml.metadata.DeploymentStatus", "com.linkedin.ml.metadata.EthicalConsiderations", "com.linkedin.ml.metadata.EvaluationData", "com.linkedin.ml.metadata.HyperParameterValueType", "com.linkedin.ml.metadata.IntendedUse", "com.linkedin.ml.metadata.IntendedUserType", "com.linkedin.ml.metadata.MLFeatureProperties", "com.linkedin.ml.metadata.MLFeatureTableProperties", "com.linkedin.ml.metadata.MLHyperParam", "com.linkedin.ml.metadata.MLMetric", "com.linkedin.ml.metadata.MLModelDeploymentProperties", "com.linkedin.ml.metadata.MLModelFactorPrompts", "com.linkedin.ml.metadata.MLModelFactors", "com.linkedin.ml.metadata.MLModelGroupProperties", "com.linkedin.ml.metadata.MLModelLineageInfo", "com.linkedin.ml.metadata.MLModelProperties", "com.linkedin.ml.metadata.MLPrimaryKeyProperties", "com.linkedin.ml.metadata.Metrics", "com.linkedin.ml.metadata.QuantitativeAnalyses", "com.linkedin.ml.metadata.ResultsType", "com.linkedin.ml.metadata.SourceCode", "com.linkedin.ml.metadata.SourceCodeUrl", "com.linkedin.ml.metadata.SourceCodeUrlType", "com.linkedin.ml.metadata.TrainingData", { "type" : "record", "name" : "GenericPayload", "namespace" : "com.linkedin.mxe", From ae35b0d6af01ae5f8e572e6cd8db353328d41f24 Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Fri, 10 Jan 2025 15:26:54 +0900 Subject: [PATCH 16/19] revert build.gradle --- datahub-graphql-core/build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/datahub-graphql-core/build.gradle b/datahub-graphql-core/build.gradle index ed9cf19a023154..47ada8e9929dd3 100644 --- a/datahub-graphql-core/build.gradle +++ b/datahub-graphql-core/build.gradle @@ -33,6 +33,7 @@ dependencies { graphqlCodegen { // For options: https://github.com/kobylynskyi/graphql-java-codegen/blob/master/docs/codegen-options.md + graphqlSchemaPaths = fileTree(dir: "${projectDir}/src/main/resources", include: '**/*.graphql').collect { it.absolutePath } outputDir = new File("${projectDir}/src/mainGeneratedGraphQL/java") packageName = "com.linkedin.datahub.graphql.generated" generateToString = true From 9a60c1d72a068c03665b1e449f87b3e45478539b Mon Sep 17 00:00:00 2001 From: Hyejin Yoon <0327jane@gmail.com> Date: Mon, 13 Jan 2025 23:06:03 +0900 Subject: [PATCH 17/19] [wip] add smoke test for dpi --- .../dataprocessinst/test_dataprocessinst.py | 220 ++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 smoke-test/tests/dataprocessinst/test_dataprocessinst.py diff --git a/smoke-test/tests/dataprocessinst/test_dataprocessinst.py b/smoke-test/tests/dataprocessinst/test_dataprocessinst.py new file mode 100644 index 00000000000000..a1fbe28769b8b5 --- /dev/null +++ b/smoke-test/tests/dataprocessinst/test_dataprocessinst.py @@ -0,0 +1,220 @@ +import logging +import os +import tempfile +import time +from random import randint +import pytest +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.metadata.schema_classes import ( + MLMetricClass, + MLHyperParamClass, + DataProcessInstancePropertiesClass, + DataProcessInstanceKeyClass, + MLTrainingRunPropertiesClass, + AuditStampClass, +) +from tests.consistency_utils import wait_for_writes_to_sync + +logger = logging.getLogger(__name__) + + +def create_sample_dpi(): + """Create a sample DataProcessInstance with realistic ML training properties""" + # Generate timestamps + current_time = int(time.time() * 1000) + run_id = "run_abcde" + dpi_urn = f"urn:li:dataProcessInstance:{run_id}" + + logger.info(f"Creating DPI with URN: {dpi_urn}") + + # Create key aspect + dpi_key = DataProcessInstanceKeyClass( + id=run_id + ) + + hyper_params = [ + MLHyperParamClass( + name="alpha", + value="0.05" + ), + MLHyperParamClass( + name="beta", + value="0.95" + ) + ] + + metrics = [ + MLMetricClass( + name="mse", + value="0.05" + ), + MLMetricClass( + name="r2", + value="0.95" + ) + ] + + # Create DPI properties + dpi_props = DataProcessInstancePropertiesClass( + name=f"Training {run_id}", + type="BATCH_SCHEDULED", + created=AuditStampClass(time=current_time, actor="urn:li:corpuser:datahub"), + externalUrl="http://mlflow:5000", + customProperties={ + "framework": "statsmodels", + "python_version": "3.8", + }, + ) + + dpi_ml_props = MLTrainingRunPropertiesClass( + hyperParams=hyper_params, + trainingMetrics=metrics, + outputUrls=["s3://my-bucket/ml/output"], + ) + + # Create the MCPs - one for the key, one for properties + mcps = [ + # Key aspect + MetadataChangeProposalWrapper( + entityUrn=dpi_urn, + entityType="dataProcessInstance", + aspectName="dataProcessInstanceKey", + changeType="UPSERT", + aspect=dpi_key + ), + # Properties aspect + MetadataChangeProposalWrapper( + entityUrn=dpi_urn, + entityType="dataProcessInstance", + aspectName="dataProcessInstanceProperties", + changeType="UPSERT", + aspect=dpi_props + ), + MetadataChangeProposalWrapper( + entityUrn=dpi_urn, + entityType="dataProcessInstance", + aspectName="mlTrainingRunProperties", + changeType="UPSERT", + aspect=dpi_ml_props + ) + ] + return mcps + + +@pytest.fixture(scope="module") +def ingest_cleanup_data(auth_session, graph_client, request): + """Fixture to handle test data setup and cleanup""" + try: + logger.info("Starting DPI test data creation") + mcps = create_sample_dpi() + + # Emit MCPs directly using graph client + for mcp in mcps: + logger.info(f"Emitting aspect: {mcp.aspect}") + graph_client.emit(mcp) + + wait_for_writes_to_sync() + + # Verify entity exists + dpi_urn = "urn:li:dataProcessInstance:run_abcde" + logger.info(f"Verifying entity exists in graph... {dpi_urn}") + + # Try getting aspect + dpi_props = graph_client.get_aspect( + dpi_urn, + DataProcessInstancePropertiesClass + ) + dpi_key = graph_client.get_aspect( + dpi_urn, + DataProcessInstanceKeyClass + ) + dpi_ml_props = graph_client.get_aspect( + dpi_urn, + MLTrainingRunPropertiesClass + ) + + logger.info(f"DPI properties from graph: {dpi_props}") + logger.info(f"DPI key from graph: {dpi_key}") + logger.info(f"DPI ML properties from graph: {dpi_ml_props}") + + yield + + logger.info("Cleaning up test data") + graph_client.hard_delete_entity(dpi_urn) + wait_for_writes_to_sync() + + except Exception as e: + logger.error(f"Error in test setup/cleanup: {str(e)}") + logger.error(f"Full exception: {e.__class__.__name__}") + raise + + +def test_get_dpi(auth_session, ingest_cleanup_data): + """Test getting a specific DPI entity""" + logger.info("Starting DPI query test") + + json = { + "query": """query dataProcessInstance($urn: String!) { + dataProcessInstance(urn: $urn) { + urn + type + properties { + name + created { + time + actor + } + customProperties { + key + value + } + externalUrl + } + mlTrainingRunProperties { + hyperParams { + name + value + } + trainingMetrics { + name + value + } + outputUrls + } + } + }""", + "variables": { + "urn": "urn:li:dataProcessInstance:run_abcde" + } + } + + # Send GraphQL query + logger.info("Sending GraphQL query") + response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) + response.raise_for_status() + res_data = response.json() + + logger.info(f"Response data: {res_data}") + + # Basic response structure validation + assert res_data, "Response should not be empty" + assert "data" in res_data, "Response should contain 'data' field" + assert "dataProcessInstance" in res_data["data"], "Response should contain 'dataProcessInstance' field" + + dpi = res_data["data"]["dataProcessInstance"] + assert dpi, "DPI should not be null" + assert "urn" in dpi, "DPI should have URN" + assert dpi["urn"] == "urn:li:dataProcessInstance:run_abcde", "URN should match expected value" + + # Validate properties if present + if "properties" in dpi and dpi["properties"]: + props = dpi["properties"] + assert "name" in props, "Properties should contain name" + assert "created" in props, "Properties should contain created" + assert "customProperties" in props, "Properties should contain customProperties" + + if "mlTrainingRunProperties" in dpi and dpi["mlTrainingRunProperties"]: + ml_props = dpi["mlTrainingRunProperties"] + assert "hyperParams" in ml_props, "ML properties should contain hyperParams" + assert "trainingMetrics" in ml_props, "ML properties should contain trainingMetrics" + assert "outputUrls" in ml_props, "ML properties should contain outputUrls" \ No newline at end of file From 55a77b0f3163d4e38cbf2a8b3b7fdf2b5ac6502b Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Mon, 13 Jan 2025 14:04:15 -0600 Subject: [PATCH 18/19] smoke test + fixes --- .../DataProcessInstanceType.java | 8 +- .../mappers/DataProcessInstanceMapper.java | 8 +- .../tests/data_process_instance/__init__.py | 0 .../test_data_process_instance.py | 293 ++++++++++++++++++ .../dataprocessinst/test_dataprocessinst.py | 220 ------------- smoke-test/tests/ml_models/__init__.py | 0 smoke-test/tests/ml_models/test_ml_models.py | 133 ++++++++ 7 files changed, 440 insertions(+), 222 deletions(-) create mode 100644 smoke-test/tests/data_process_instance/__init__.py create mode 100644 smoke-test/tests/data_process_instance/test_data_process_instance.py delete mode 100644 smoke-test/tests/dataprocessinst/test_dataprocessinst.py create mode 100644 smoke-test/tests/ml_models/__init__.py create mode 100644 smoke-test/tests/ml_models/test_ml_models.py diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceType.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceType.java index c6cede662fa9c2..eeaaaa96f51704 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceType.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceType.java @@ -31,9 +31,15 @@ public class DataProcessInstanceType public static final Set ASPECTS_TO_FETCH = ImmutableSet.of( + DATA_PROCESS_INSTANCE_KEY_ASPECT_NAME, DATA_PLATFORM_INSTANCE_ASPECT_NAME, DATA_PROCESS_INSTANCE_PROPERTIES_ASPECT_NAME, + DATA_PROCESS_INSTANCE_INPUT_ASPECT_NAME, + DATA_PROCESS_INSTANCE_OUTPUT_ASPECT_NAME, + DATA_PROCESS_INSTANCE_RUN_EVENT_ASPECT_NAME, + TEST_RESULTS_ASPECT_NAME, DATA_PROCESS_INSTANCE_RELATIONSHIPS_ASPECT_NAME, + ML_TRAINING_RUN_PROPERTIES_ASPECT_NAME, SUB_TYPES_ASPECT_NAME, CONTAINER_ASPECT_NAME); @@ -90,7 +96,7 @@ public List> batchLoad( .collect(Collectors.toList()); } catch (Exception e) { - throw new RuntimeException("Failed to load schemaField entity", e); + throw new RuntimeException("Failed to load Data Process Instance entity", e); } } } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java index e3cbcdf709326b..28c9c8936fdbfb 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/dataprocessinst/mappers/DataProcessInstanceMapper.java @@ -102,6 +102,9 @@ private void mapTrainingRunProperties( com.linkedin.datahub.graphql.generated.MLTrainingRunProperties properties = new com.linkedin.datahub.graphql.generated.MLTrainingRunProperties(); + if (trainingProperties.hasId()) { + properties.setId(trainingProperties.getId()); + } if (trainingProperties.hasOutputUrls()) { properties.setOutputUrls( trainingProperties.getOutputUrls().stream() @@ -133,9 +136,12 @@ private void mapDataProcessProperties( @Nonnull Urn entityUrn) { DataProcessInstanceProperties dataProcessInstanceProperties = new DataProcessInstanceProperties(dataMap); - dpi.setName(dataProcessInstanceProperties.getName()); + com.linkedin.datahub.graphql.generated.DataProcessInstanceProperties properties = new com.linkedin.datahub.graphql.generated.DataProcessInstanceProperties(); + + dpi.setName(dataProcessInstanceProperties.getName()); + properties.setName(dataProcessInstanceProperties.getName()); if (dataProcessInstanceProperties.hasExternalUrl()) { dpi.setExternalUrl(dataProcessInstanceProperties.getExternalUrl().toString()); properties.setExternalUrl(dataProcessInstanceProperties.getExternalUrl().toString()); diff --git a/smoke-test/tests/data_process_instance/__init__.py b/smoke-test/tests/data_process_instance/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/data_process_instance/test_data_process_instance.py b/smoke-test/tests/data_process_instance/test_data_process_instance.py new file mode 100644 index 00000000000000..a8aca6034d5be1 --- /dev/null +++ b/smoke-test/tests/data_process_instance/test_data_process_instance.py @@ -0,0 +1,293 @@ +import logging +import os +import tempfile +from random import randint + +import pytest +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.common import PipelineContext, RecordEnvelope +from datahub.ingestion.api.sink import NoopWriteCallback +from datahub.ingestion.sink.file import FileSink, FileSinkConfig +from datahub.metadata.schema_classes import ( + AuditStampClass, + ContainerClass, + ContainerPropertiesClass, + DataPlatformInstanceClass, + DataPlatformInstancePropertiesClass, + DataProcessInstanceKeyClass, + DataProcessInstancePropertiesClass, + DataProcessInstanceRunEventClass, + MLHyperParamClass, + MLMetricClass, + MLTrainingRunPropertiesClass, + SubTypesClass, + TimeWindowSizeClass, +) + +from tests.utils import ( + delete_urns_from_file, + ingest_file_via_rest, + wait_for_writes_to_sync, +) + +logger = logging.getLogger(__name__) + +# Generate unique DPI ID +dpi_id = f"test-pipeline-run-{randint(1000, 9999)}" +dpi_urn = f"urn:li:dataProcessInstance:{dpi_id}" + + +class FileEmitter: + def __init__(self, filename: str) -> None: + self.sink: FileSink = FileSink( + ctx=PipelineContext(run_id="create_test_data"), + config=FileSinkConfig(filename=filename), + ) + + def emit(self, event): + self.sink.write_record_async( + record_envelope=RecordEnvelope(record=event, metadata={}), + write_callback=NoopWriteCallback(), + ) + + def close(self): + self.sink.close() + + +def create_test_data(filename: str): + mcps = [ + # Key aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="dataProcessInstanceKey", + aspect=DataProcessInstanceKeyClass(id=dpi_id), + ), + # Properties aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="dataProcessInstanceProperties", + aspect=DataProcessInstancePropertiesClass( + name="Test Pipeline Run", + type="BATCH_SCHEDULED", + created=AuditStampClass( + time=1640692800000, actor="urn:li:corpuser:datahub" + ), + ), + ), + # Run Event aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="dataProcessInstanceRunEvent", + aspect=DataProcessInstanceRunEventClass( + timestampMillis=1704067200000, + eventGranularity=TimeWindowSizeClass(unit="WEEK", multiple=1), + status="COMPLETE", + ), + ), + # Platform Instance aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="dataPlatformInstance", + aspect=DataPlatformInstanceClass( + platform="urn:li:dataPlatform:airflow", + instance="urn:li:dataPlatformInstance:(urn:li:dataPlatform:airflow,1234567890)", + ), + ), + MetadataChangeProposalWrapper( + entityType="dataPlatformInstance", + entityUrn="urn:li:dataPlatformInstance:(urn:li:dataPlatform:airflow,1234567890)", + aspectName="dataPlatformInstanceProperties", + aspect=DataPlatformInstancePropertiesClass( + name="my process instance", + ), + ), + # SubTypes aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="subTypes", + aspect=SubTypesClass(typeNames=["TEST", "BATCH_JOB"]), + ), + # Container aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="container", + aspect=ContainerClass(container="urn:li:container:testGroup1"), + ), + MetadataChangeProposalWrapper( + entityType="container", + entityUrn="urn:li:container:testGroup1", + aspectName="containerProperties", + aspect=ContainerPropertiesClass(name="testGroup1"), + ), + # ML Training Run Properties aspect + MetadataChangeProposalWrapper( + entityType="dataProcessInstance", + entityUrn=dpi_urn, + aspectName="mlTrainingRunProperties", + aspect=MLTrainingRunPropertiesClass( + id="test-training-run-123", + trainingMetrics=[ + MLMetricClass( + name="accuracy", + description="accuracy of the model", + value="0.95", + ), + MLMetricClass( + name="loss", + description="accuracy loss of the model", + value="0.05", + ), + ], + hyperParams=[ + MLHyperParamClass( + name="learningRate", + description="rate of learning", + value="0.001", + ), + MLHyperParamClass( + name="batchSize", description="size of the batch", value="32" + ), + ], + outputUrls=["s3://my-bucket/ml/output"], + ), + ), + ] + + file_emitter = FileEmitter(filename) + for mcp in mcps: + file_emitter.emit(mcp) + file_emitter.close() + + +@pytest.fixture(scope="module", autouse=False) +def ingest_cleanup_data(auth_session, graph_client, request): + new_file, filename = tempfile.mkstemp(suffix=".json") + try: + create_test_data(filename) + print("ingesting data process instance test data") + ingest_file_via_rest(auth_session, filename) + wait_for_writes_to_sync() + yield + print("removing data process instance test data") + delete_urns_from_file(graph_client, filename) + wait_for_writes_to_sync() + finally: + os.remove(filename) + + +@pytest.mark.integration +def test_search_dpi(auth_session, ingest_cleanup_data): + """Test DPI search and validation of returned fields using GraphQL.""" + + json = { + "query": """query scrollAcrossEntities($input: ScrollAcrossEntitiesInput!) { + scrollAcrossEntities(input: $input) { + nextScrollId + count + total + searchResults { + entity { + ... on DataProcessInstance { + urn + properties { + name + externalUrl + } + dataPlatformInstance { + platform { + urn + name + } + } + subTypes { + typeNames + } + container { + urn + } + platform { + urn + name + properties { + type + } + } + mlTrainingRunProperties { + id + trainingMetrics { + name + value + } + hyperParams { + name + value + } + outputUrls + } + } + } + } + } + }""", + "variables": { + "input": {"types": ["DATA_PROCESS_INSTANCE"], "query": dpi_id, "count": 10} + }, + } + + response = auth_session.post( + f"{auth_session.frontend_url()}/api/v2/graphql", json=json + ) + response.raise_for_status() + res_data = response.json() + + # Basic response structure validation + assert res_data, "Response should not be empty" + assert "data" in res_data, "Response should contain 'data' field" + print("RESPONSE DATA:" + str(res_data)) + assert ( + "scrollAcrossEntities" in res_data["data"] + ), "Response should contain 'scrollAcrossEntities' field" + + search_results = res_data["data"]["scrollAcrossEntities"] + assert ( + "searchResults" in search_results + ), "Response should contain 'searchResults' field" + + results = search_results["searchResults"] + assert len(results) > 0, "Should find at least one result" + + # Find our test entity + test_entity = None + for result in results: + if result["entity"]["urn"] == dpi_urn: + test_entity = result["entity"] + break + + assert test_entity is not None, f"Should find test entity with URN {dpi_urn}" + + # Validate fields + props = test_entity["properties"] + assert props["name"] == "Test Pipeline Run" + + platform_instance = test_entity["dataPlatformInstance"] + assert platform_instance["platform"]["urn"] == "urn:li:dataPlatform:airflow" + + sub_types = test_entity["subTypes"] + assert set(sub_types["typeNames"]) == {"TEST", "BATCH_JOB"} + + container = test_entity["container"] + assert container["urn"] == "urn:li:container:testGroup1" + + ml_props = test_entity["mlTrainingRunProperties"] + assert ml_props["id"] == "test-training-run-123" + assert ml_props["trainingMetrics"][0] == {"name": "accuracy", "value": "0.95"} + assert ml_props["trainingMetrics"][1] == {"name": "loss", "value": "0.05"} + assert ml_props["hyperParams"][0] == {"name": "learningRate", "value": "0.001"} + assert ml_props["hyperParams"][1] == {"name": "batchSize", "value": "32"} + assert ml_props["outputUrls"][0] == "s3://my-bucket/ml/output" diff --git a/smoke-test/tests/dataprocessinst/test_dataprocessinst.py b/smoke-test/tests/dataprocessinst/test_dataprocessinst.py deleted file mode 100644 index a1fbe28769b8b5..00000000000000 --- a/smoke-test/tests/dataprocessinst/test_dataprocessinst.py +++ /dev/null @@ -1,220 +0,0 @@ -import logging -import os -import tempfile -import time -from random import randint -import pytest -from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.metadata.schema_classes import ( - MLMetricClass, - MLHyperParamClass, - DataProcessInstancePropertiesClass, - DataProcessInstanceKeyClass, - MLTrainingRunPropertiesClass, - AuditStampClass, -) -from tests.consistency_utils import wait_for_writes_to_sync - -logger = logging.getLogger(__name__) - - -def create_sample_dpi(): - """Create a sample DataProcessInstance with realistic ML training properties""" - # Generate timestamps - current_time = int(time.time() * 1000) - run_id = "run_abcde" - dpi_urn = f"urn:li:dataProcessInstance:{run_id}" - - logger.info(f"Creating DPI with URN: {dpi_urn}") - - # Create key aspect - dpi_key = DataProcessInstanceKeyClass( - id=run_id - ) - - hyper_params = [ - MLHyperParamClass( - name="alpha", - value="0.05" - ), - MLHyperParamClass( - name="beta", - value="0.95" - ) - ] - - metrics = [ - MLMetricClass( - name="mse", - value="0.05" - ), - MLMetricClass( - name="r2", - value="0.95" - ) - ] - - # Create DPI properties - dpi_props = DataProcessInstancePropertiesClass( - name=f"Training {run_id}", - type="BATCH_SCHEDULED", - created=AuditStampClass(time=current_time, actor="urn:li:corpuser:datahub"), - externalUrl="http://mlflow:5000", - customProperties={ - "framework": "statsmodels", - "python_version": "3.8", - }, - ) - - dpi_ml_props = MLTrainingRunPropertiesClass( - hyperParams=hyper_params, - trainingMetrics=metrics, - outputUrls=["s3://my-bucket/ml/output"], - ) - - # Create the MCPs - one for the key, one for properties - mcps = [ - # Key aspect - MetadataChangeProposalWrapper( - entityUrn=dpi_urn, - entityType="dataProcessInstance", - aspectName="dataProcessInstanceKey", - changeType="UPSERT", - aspect=dpi_key - ), - # Properties aspect - MetadataChangeProposalWrapper( - entityUrn=dpi_urn, - entityType="dataProcessInstance", - aspectName="dataProcessInstanceProperties", - changeType="UPSERT", - aspect=dpi_props - ), - MetadataChangeProposalWrapper( - entityUrn=dpi_urn, - entityType="dataProcessInstance", - aspectName="mlTrainingRunProperties", - changeType="UPSERT", - aspect=dpi_ml_props - ) - ] - return mcps - - -@pytest.fixture(scope="module") -def ingest_cleanup_data(auth_session, graph_client, request): - """Fixture to handle test data setup and cleanup""" - try: - logger.info("Starting DPI test data creation") - mcps = create_sample_dpi() - - # Emit MCPs directly using graph client - for mcp in mcps: - logger.info(f"Emitting aspect: {mcp.aspect}") - graph_client.emit(mcp) - - wait_for_writes_to_sync() - - # Verify entity exists - dpi_urn = "urn:li:dataProcessInstance:run_abcde" - logger.info(f"Verifying entity exists in graph... {dpi_urn}") - - # Try getting aspect - dpi_props = graph_client.get_aspect( - dpi_urn, - DataProcessInstancePropertiesClass - ) - dpi_key = graph_client.get_aspect( - dpi_urn, - DataProcessInstanceKeyClass - ) - dpi_ml_props = graph_client.get_aspect( - dpi_urn, - MLTrainingRunPropertiesClass - ) - - logger.info(f"DPI properties from graph: {dpi_props}") - logger.info(f"DPI key from graph: {dpi_key}") - logger.info(f"DPI ML properties from graph: {dpi_ml_props}") - - yield - - logger.info("Cleaning up test data") - graph_client.hard_delete_entity(dpi_urn) - wait_for_writes_to_sync() - - except Exception as e: - logger.error(f"Error in test setup/cleanup: {str(e)}") - logger.error(f"Full exception: {e.__class__.__name__}") - raise - - -def test_get_dpi(auth_session, ingest_cleanup_data): - """Test getting a specific DPI entity""" - logger.info("Starting DPI query test") - - json = { - "query": """query dataProcessInstance($urn: String!) { - dataProcessInstance(urn: $urn) { - urn - type - properties { - name - created { - time - actor - } - customProperties { - key - value - } - externalUrl - } - mlTrainingRunProperties { - hyperParams { - name - value - } - trainingMetrics { - name - value - } - outputUrls - } - } - }""", - "variables": { - "urn": "urn:li:dataProcessInstance:run_abcde" - } - } - - # Send GraphQL query - logger.info("Sending GraphQL query") - response = auth_session.post(f"{auth_session.frontend_url()}/api/v2/graphql", json=json) - response.raise_for_status() - res_data = response.json() - - logger.info(f"Response data: {res_data}") - - # Basic response structure validation - assert res_data, "Response should not be empty" - assert "data" in res_data, "Response should contain 'data' field" - assert "dataProcessInstance" in res_data["data"], "Response should contain 'dataProcessInstance' field" - - dpi = res_data["data"]["dataProcessInstance"] - assert dpi, "DPI should not be null" - assert "urn" in dpi, "DPI should have URN" - assert dpi["urn"] == "urn:li:dataProcessInstance:run_abcde", "URN should match expected value" - - # Validate properties if present - if "properties" in dpi and dpi["properties"]: - props = dpi["properties"] - assert "name" in props, "Properties should contain name" - assert "created" in props, "Properties should contain created" - assert "customProperties" in props, "Properties should contain customProperties" - - if "mlTrainingRunProperties" in dpi and dpi["mlTrainingRunProperties"]: - ml_props = dpi["mlTrainingRunProperties"] - assert "hyperParams" in ml_props, "ML properties should contain hyperParams" - assert "trainingMetrics" in ml_props, "ML properties should contain trainingMetrics" - assert "outputUrls" in ml_props, "ML properties should contain outputUrls" \ No newline at end of file diff --git a/smoke-test/tests/ml_models/__init__.py b/smoke-test/tests/ml_models/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/smoke-test/tests/ml_models/test_ml_models.py b/smoke-test/tests/ml_models/test_ml_models.py new file mode 100644 index 00000000000000..59821ab3e3cc41 --- /dev/null +++ b/smoke-test/tests/ml_models/test_ml_models.py @@ -0,0 +1,133 @@ +import logging +import os +import tempfile +from random import randint + +import pytest +from datahub.emitter.mce_builder import make_ml_model_group_urn, make_ml_model_urn +from datahub.emitter.mcp import MetadataChangeProposalWrapper +from datahub.ingestion.api.common import PipelineContext, RecordEnvelope +from datahub.ingestion.api.sink import NoopWriteCallback +from datahub.ingestion.graph.client import DataHubGraph +from datahub.ingestion.sink.file import FileSink, FileSinkConfig +from datahub.metadata.schema_classes import ( + MLModelGroupPropertiesClass, + MLModelPropertiesClass, +) + +from tests.utils import ( + delete_urns_from_file, + get_sleep_info, + ingest_file_via_rest, + wait_for_writes_to_sync, +) + +logger = logging.getLogger(__name__) + +# Generate unique model names for testing +start_index = randint(10, 10000) +model_names = [f"test_model_{i}" for i in range(start_index, start_index + 3)] +model_group_urn = make_ml_model_group_urn("workbench", "test_group", "DEV") +model_urns = [make_ml_model_urn("workbench", name, "DEV") for name in model_names] + + +class FileEmitter: + def __init__(self, filename: str) -> None: + self.sink: FileSink = FileSink( + ctx=PipelineContext(run_id="create_test_data"), + config=FileSinkConfig(filename=filename), + ) + + def emit(self, event): + self.sink.write_record_async( + record_envelope=RecordEnvelope(record=event, metadata={}), + write_callback=NoopWriteCallback(), + ) + + def close(self): + self.sink.close() + + +def create_test_data(filename: str): + # Create model group + model_group_mcp = MetadataChangeProposalWrapper( + entityUrn=str(model_group_urn), + aspect=MLModelGroupPropertiesClass( + description="Test model group for integration testing", + trainingJobs=["urn:li:dataProcessInstance:test_job"], + ), + ) + + # Create models that belong to the group + model_mcps = [ + MetadataChangeProposalWrapper( + entityUrn=model_urn, + aspect=MLModelPropertiesClass( + name=f"Test Model ({model_urn})", + description=f"Test model {model_urn}", + groups=[str(model_group_urn)], + trainingJobs=["urn:li:dataProcessInstance:test_job"], + ), + ) + for model_urn in model_urns + ] + + file_emitter = FileEmitter(filename) + for mcps in [model_group_mcp] + model_mcps: + file_emitter.emit(mcps) + + file_emitter.close() + + +sleep_sec, sleep_times = get_sleep_info() + + +@pytest.fixture(scope="module", autouse=False) +def ingest_cleanup_data(auth_session, graph_client, request): + new_file, filename = tempfile.mkstemp(suffix=".json") + try: + create_test_data(filename) + print("ingesting ml model test data") + ingest_file_via_rest(auth_session, filename) + wait_for_writes_to_sync() + yield + print("removing ml model test data") + delete_urns_from_file(graph_client, filename) + wait_for_writes_to_sync() + finally: + os.remove(filename) + + +@pytest.mark.integration +def test_create_ml_models(graph_client: DataHubGraph, ingest_cleanup_data): + """Test creation and validation of ML models and model groups.""" + + # Validate model group properties + fetched_group_props = graph_client.get_aspect( + str(model_group_urn), MLModelGroupPropertiesClass + ) + assert fetched_group_props is not None + assert fetched_group_props.description == "Test model group for integration testing" + assert fetched_group_props.trainingJobs == ["urn:li:dataProcessInstance:test_job"] + + # Validate individual models + for model_urn in model_urns: + fetched_model_props = graph_client.get_aspect(model_urn, MLModelPropertiesClass) + assert fetched_model_props is not None + assert fetched_model_props.name == f"Test Model ({model_urn})" + assert fetched_model_props.description == f"Test model {model_urn}" + assert str(model_group_urn) in (fetched_model_props.groups or []) + assert fetched_model_props.trainingJobs == [ + "urn:li:dataProcessInstance:test_job" + ] + + # Validate relationships between models and group + related_models = set() + for e in graph_client.get_related_entities( + str(model_group_urn), + relationship_types=["MemberOf"], + direction=DataHubGraph.RelationshipDirection.INCOMING, + ): + related_models.add(e.urn) + + assert set(model_urns) == related_models From e0e80e9d4f9265766bdaacd8d5305cb232b9b07c Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Mon, 13 Jan 2025 15:15:34 -0600 Subject: [PATCH 19/19] fix test --- .../types/dataprocessinst/DataProcessInstanceTypeTest.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java index 38556261c71a01..437c74ab669146 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/types/dataprocessinst/DataProcessInstanceTypeTest.java @@ -153,8 +153,10 @@ public void testBatchLoadFull() throws Exception { .setUrn(dpiUrn1) .setAspects(new EnvelopedAspectMap(aspectMap)))); - DataProcessInstanceType type = - new DataProcessInstanceType(client, Mockito.mock(FeatureFlags.class)); + FeatureFlags mockFeatureFlags = Mockito.mock(FeatureFlags.class); + Mockito.when(mockFeatureFlags.isDataProcessInstanceEntityEnabled()).thenReturn(true); + + DataProcessInstanceType type = new DataProcessInstanceType(client, mockFeatureFlags); QueryContext mockContext = getMockAllowContext(); List> result =