Skip to content

Commit 047644b

Browse files
yoonhyejinJohn Joycejjoyce0510
authored
feat: update mlflow-related metadata models (#12174)
Co-authored-by: John Joyce <[email protected]> Co-authored-by: John Joyce <[email protected]>
1 parent 21ddb55 commit 047644b

12 files changed

+568
-95
lines changed

datahub-graphql-core/src/main/resources/entity.graphql

+193-3
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,16 @@ type Query {
262262
Fetch all Business Attributes
263263
"""
264264
listBusinessAttributes(input: ListBusinessAttributesInput!): ListBusinessAttributesResult
265+
266+
"""
267+
Fetch a Data Process Instance by primary key (urn)
268+
"""
269+
dataProcessInstance(urn: String!): DataProcessInstance
270+
271+
265272
}
266273

274+
267275
"""
268276
An ERModelRelationship is a high-level abstraction that dictates what datasets fields are erModelRelationshiped.
269277
"""
@@ -9832,15 +9840,45 @@ type MLModelGroup implements EntityWithRelationships & Entity & BrowsableEntity
98329840
privileges: EntityPrivileges
98339841
}
98349842

9843+
"""
9844+
Properties describing a group of related ML models
9845+
"""
98359846
type MLModelGroupProperties {
9847+
"""
9848+
Display name of the model group
9849+
"""
9850+
name: String
98369851

9852+
"""
9853+
Detailed description of the model group's purpose and contents
9854+
"""
98379855
description: String
98389856

9839-
createdAt: Long
9857+
"""
9858+
When this model group was created
9859+
"""
9860+
created: AuditStamp
98409861

9862+
"""
9863+
When this model group was last modified
9864+
"""
9865+
lastModified: AuditStamp
9866+
9867+
"""
9868+
Version identifier for this model group
9869+
"""
98419870
version: VersionTag
98429871

9872+
"""
9873+
Custom key-value properties for the model group
9874+
"""
98439875
customProperties: [CustomPropertiesEntry!]
9876+
9877+
"""
9878+
Deprecated creation timestamp
9879+
@deprecated Use the 'created' field instead
9880+
"""
9881+
createdAt: Long @deprecated(reason: "Use `created` instead")
98449882
}
98459883

98469884
"""
@@ -9990,40 +10028,103 @@ description: String
999010028
}
999110029

999210030
type MLMetric {
10031+
"""
10032+
Name of the metric (e.g. accuracy, precision, recall)
10033+
"""
999310034
name: String
999410035

10036+
"""
10037+
Description of what this metric measures
10038+
"""
999510039
description: String
999610040

10041+
"""
10042+
The computed value of the metric
10043+
"""
999710044
value: String
999810045

10046+
"""
10047+
Timestamp when this metric was recorded
10048+
"""
999910049
createdAt: Long
1000010050
}
1000110051

1000210052
type MLModelProperties {
10053+
"""
10054+
The display name of the model used in the UI
10055+
"""
10056+
name: String!
1000310057

10058+
"""
10059+
Detailed description of the model's purpose and characteristics
10060+
"""
1000410061
description: String
1000510062

10006-
date: Long
10063+
"""
10064+
When the model was last modified
10065+
"""
10066+
lastModified: AuditStamp
1000710067

10068+
"""
10069+
Version identifier for this model
10070+
"""
1000810071
version: String
1000910072

10073+
"""
10074+
The type/category of ML model (e.g. classification, regression)
10075+
"""
1001010076
type: String
1001110077

10078+
"""
10079+
Mapping of hyperparameter configurations
10080+
"""
1001210081
hyperParameters: HyperParameterMap
1001310082

10014-
hyperParams: [MLHyperParam]
10083+
"""
10084+
List of hyperparameter settings used to train this model
10085+
"""
10086+
hyperParams: [MLHyperParam]
1001510087

10088+
"""
10089+
Performance metrics from model training
10090+
"""
1001610091
trainingMetrics: [MLMetric]
1001710092

10093+
"""
10094+
Names of ML features used by this model
10095+
"""
1001810096
mlFeatures: [String!]
1001910097

10098+
"""
10099+
Tags for categorizing and searching models
10100+
"""
1002010101
tags: [String!]
1002110102

10103+
"""
10104+
Model groups this model belongs to
10105+
"""
1002210106
groups: [MLModelGroup]
1002310107

10108+
"""
10109+
Additional custom properties specific to this model
10110+
"""
1002410111
customProperties: [CustomPropertiesEntry!]
1002510112

10113+
"""
10114+
URL to view this model in external system
10115+
"""
1002610116
externalUrl: String
10117+
10118+
"""
10119+
When this model was created
10120+
"""
10121+
created: AuditStamp
10122+
10123+
"""
10124+
Deprecated timestamp for model creation
10125+
@deprecated Use 'created' field instead
10126+
"""
10127+
date: Long @deprecated(reason: "Use `created` instead")
1002710128
}
1002810129

1002910130
type MLFeatureProperties {
@@ -12804,3 +12905,92 @@ type CronSchedule {
1280412905
"""
1280512906
timezone: String!
1280612907
}
12908+
12909+
12910+
"""
12911+
Properties describing a data process instance's execution metadata
12912+
"""
12913+
type DataProcessInstanceProperties {
12914+
"""
12915+
The display name of this process instance
12916+
"""
12917+
name: String!
12918+
12919+
"""
12920+
URL to view this process instance in the external system
12921+
"""
12922+
externalUrl: String
12923+
12924+
"""
12925+
When this process instance was created
12926+
"""
12927+
created: AuditStamp
12928+
12929+
"""
12930+
Additional custom properties specific to this process instance
12931+
"""
12932+
customProperties: [CustomPropertiesEntry!]
12933+
}
12934+
12935+
"""
12936+
Properties specific to an ML model training run instance
12937+
"""
12938+
type MLTrainingRunProperties {
12939+
"""
12940+
Unique identifier for this training run
12941+
"""
12942+
id: String
12943+
12944+
"""
12945+
List of URLs to access training run outputs (e.g. model artifacts, logs)
12946+
"""
12947+
outputUrls: [String]
12948+
12949+
"""
12950+
Hyperparameters used in this training run
12951+
"""
12952+
hyperParams: [MLHyperParam]
12953+
12954+
"""
12955+
Performance metrics recorded during this training run
12956+
"""
12957+
trainingMetrics: [MLMetric]
12958+
}
12959+
12960+
extend type DataProcessInstance {
12961+
12962+
"""
12963+
Additional read only properties associated with the Data Job
12964+
"""
12965+
properties: DataProcessInstanceProperties
12966+
12967+
"""
12968+
The specific instance of the data platform that this entity belongs to
12969+
"""
12970+
dataPlatformInstance: DataPlatformInstance
12971+
12972+
"""
12973+
Sub Types that this entity implements
12974+
"""
12975+
subTypes: SubTypes
12976+
12977+
"""
12978+
The parent container in which the entity resides
12979+
"""
12980+
container: Container
12981+
12982+
"""
12983+
Standardized platform urn where the data process instance is defined
12984+
"""
12985+
platform: DataPlatform!
12986+
12987+
"""
12988+
Recursively get the lineage of containers for this entity
12989+
"""
12990+
parentContainers: ParentContainersResult
12991+
12992+
"""
12993+
Additional properties when subtype is Training Run
12994+
"""
12995+
mlTrainingRunProperties: MLTrainingRunProperties
12996+
}

metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceOutput.pdl

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ record DataProcessInstanceOutput {
1515
@Relationship = {
1616
"/*": {
1717
"name": "Produces",
18-
"entityTypes": [ "dataset" ]
18+
"entityTypes": [ "dataset", "mlModel" ]
1919
}
2020
}
2121
@Searchable = {

metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceProperties.pdl

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,4 +52,4 @@ record DataProcessInstanceProperties includes CustomProperties, ExternalReferenc
5252
}
5353
created: AuditStamp
5454

55-
}
55+
}

metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl

+35
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import com.linkedin.common.Urn
44
import com.linkedin.common.Time
55
import com.linkedin.common.VersionTag
66
import com.linkedin.common.CustomProperties
7+
import com.linkedin.common.TimeStamp
78

89
/**
910
* Properties associated with an ML Model Group
@@ -13,6 +14,17 @@ import com.linkedin.common.CustomProperties
1314
}
1415
record MLModelGroupProperties includes CustomProperties {
1516

17+
/**
18+
* Display name of the MLModelGroup
19+
*/
20+
@Searchable = {
21+
"fieldType": "WORD_GRAM",
22+
"enableAutocomplete": true,
23+
"boostScore": 10.0,
24+
"queryByDefault": true,
25+
}
26+
name: optional string
27+
1628
/**
1729
* Documentation of the MLModelGroup
1830
*/
@@ -25,8 +37,31 @@ record MLModelGroupProperties includes CustomProperties {
2537
/**
2638
* Date when the MLModelGroup was developed
2739
*/
40+
@deprecated
2841
createdAt: optional Time
2942

43+
/**
44+
* Time and Actor who created the MLModelGroup
45+
*/
46+
created: optional TimeStamp
47+
48+
/**
49+
* Date when the MLModelGroup was last modified
50+
*/
51+
lastModified: optional TimeStamp
52+
53+
/**
54+
* List of jobs (if any) used to train the model group. Visible in Lineage.
55+
*/
56+
@Relationship = {
57+
"/*": {
58+
"name": "TrainedBy",
59+
"entityTypes": [ "dataJob" ],
60+
"isLineage": true
61+
}
62+
}
63+
trainingJobs: optional array[Urn]
64+
3065
/**
3166
* Version of the MLModelGroup
3267
*/

metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl

+26-2
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import com.linkedin.common.Time
66
import com.linkedin.common.VersionTag
77
import com.linkedin.common.CustomProperties
88
import com.linkedin.common.ExternalReference
9+
import com.linkedin.common.TimeStamp
910

1011
/**
1112
* Properties associated with a ML Model
@@ -15,6 +16,18 @@ import com.linkedin.common.ExternalReference
1516
}
1617
record MLModelProperties includes CustomProperties, ExternalReference {
1718

19+
/**
20+
* Display name of the MLModel
21+
*/
22+
@Searchable = {
23+
"fieldType": "WORD_GRAM",
24+
"enableAutocomplete": true,
25+
"boostScore": 10.0,
26+
"queryByDefault": true,
27+
}
28+
name: optional string
29+
30+
1831
/**
1932
* Documentation of the MLModel
2033
*/
@@ -27,8 +40,19 @@ record MLModelProperties includes CustomProperties, ExternalReference {
2740
/**
2841
* Date when the MLModel was developed
2942
*/
43+
@deprecated
3044
date: optional Time
3145

46+
/**
47+
* Audit stamp containing who created this and when
48+
*/
49+
created: optional TimeStamp
50+
51+
/**
52+
* Date when the MLModel was last modified
53+
*/
54+
lastModified: optional TimeStamp
55+
3256
/**
3357
* Version of the MLModel
3458
*/
@@ -93,12 +117,12 @@ record MLModelProperties includes CustomProperties, ExternalReference {
93117
deployments: optional array[Urn]
94118

95119
/**
96-
* List of jobs (if any) used to train the model
120+
* List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.
97121
*/
98122
@Relationship = {
99123
"/*": {
100124
"name": "TrainedBy",
101-
"entityTypes": [ "dataJob" ],
125+
"entityTypes": [ "dataJob", "dataProcessInstance" ],
102126
"isLineage": true
103127
}
104128
}

0 commit comments

Comments
 (0)