Skip to content

Commit 2a91e6d

Browse files
committed
Merge remote-tracking branch 'oss-datahub/master' into vertex_src_temp
2 parents 8e40b7c + 9cb5886 commit 2a91e6d

37 files changed

+1161
-429
lines changed

datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java

+2
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,8 @@ public CompletableFuture<AppConfig> get(final DataFetchingEnvironment environmen
204204
.setShowNavBarRedesign(_featureFlags.isShowNavBarRedesign())
205205
.setShowAutoCompleteResults(_featureFlags.isShowAutoCompleteResults())
206206
.setEntityVersioningEnabled(_featureFlags.isEntityVersioning())
207+
.setShowSearchBarAutocompleteRedesign(
208+
_featureFlags.isShowSearchBarAutocompleteRedesign())
207209
.build();
208210

209211
appConfig.setFeatureFlags(featureFlagsConfig);

datahub-graphql-core/src/main/resources/app.graphql

+5
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,11 @@ type FeatureFlagsConfig {
593593
If turned on, exposes the versioning feature by allowing users to link entities in the UI.
594594
"""
595595
entityVersioningEnabled: Boolean!
596+
597+
"""
598+
If turned on, show the redesigned search bar's autocomplete
599+
"""
600+
showSearchBarAutocompleteRedesign: Boolean!
596601
}
597602

598603
"""

datahub-web-react/src/app/entity/mlModelGroup/profile/ModelGroupModels.tsx

+2-2
Original file line numberDiff line numberDiff line change
@@ -125,8 +125,8 @@ export default function MLGroupModels() {
125125
},
126126
},
127127
{
128-
title: 'Tags',
129-
key: 'tags',
128+
title: 'Properties',
129+
key: 'properties',
130130
width: 200,
131131
render: (_: any, record: any) => {
132132
const tags = record.properties?.tags || [];

datahub-web-react/src/app/entityV2/mlModelGroup/profile/ModelGroupModels.tsx

+2-2
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,8 @@ export default function MLGroupModels() {
128128
},
129129
},
130130
{
131-
title: 'Tags',
132-
key: 'tags',
131+
title: 'Properties',
132+
key: 'properties',
133133
width: 200,
134134
render: (_: any, record: any) => {
135135
const tags = record.properties?.tags || [];

datahub-web-react/src/appConfigContext.tsx

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ export const DEFAULT_APP_CONFIG = {
6767
showNavBarRedesign: false,
6868
showAutoCompleteResults: false,
6969
entityVersioningEnabled: false,
70+
showSearchBarAutocompleteRedesign: false,
7071
},
7172
chromeExtensionConfig: {
7273
enabled: false,

datahub-web-react/src/graphql/app.graphql

+1
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ query appConfig {
8282
showNavBarRedesign
8383
showAutoCompleteResults
8484
entityVersioningEnabled
85+
showSearchBarAutocompleteRedesign
8586
}
8687
chromeExtensionConfig {
8788
enabled

docs/deploy/azure.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -225,10 +225,10 @@ global:
225225
password:
226226
value: "${POSTGRES_ADMIN_PASSWORD}"
227227
```
228-
Run this command helm command to update datahub configuration
228+
Run this helm command to update datahub configuration
229229

230230
```
231231
helm upgrade --install datahub datahub/datahub --values values.yaml
232232
```
233233

234-
And there you go! You have now installed DataHub on an Azure Kubernetes Cluster with an ingress controller set up to expose the frontend. Additionally you have utilized PostgreSQL as the storage layer of DataHub.
234+
And there you go! You have now installed DataHub on an Azure Kubernetes Cluster with an ingress controller set up to expose the frontend. Additionally you have utilized PostgreSQL as the storage layer of DataHub.

metadata-ingestion/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[build-system]
22
build-backend = "setuptools.build_meta"
3-
requires = ["setuptools>=63.0.0", "wheel"]
3+
requires = ["setuptools >= 71.1", "wheel"]
44

55
[tool.ruff.lint.isort]
66
section-order = ["future", "patch", "standard-library", "third-party", "first-party", "local-folder"]

metadata-ingestion/scripts/avro_codegen.py

+30
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,36 @@ def get_notebook_id(self) -> str:
526526
"""
527527
],
528528
"tag": [_create_from_id.format(class_name="TagUrn")],
529+
"chart": [
530+
"""
531+
@classmethod
532+
def create_from_ids(
533+
cls,
534+
platform: str,
535+
name: str,
536+
platform_instance: Optional[str] = None,
537+
) -> "ChartUrn":
538+
return ChartUrn(
539+
dashboard_tool=platform,
540+
chart_id=f"{platform_instance}.{name}" if platform_instance else name,
541+
)
542+
"""
543+
],
544+
"dashboard": [
545+
"""
546+
@classmethod
547+
def create_from_ids(
548+
cls,
549+
platform: str,
550+
name: str,
551+
platform_instance: Optional[str] = None,
552+
) -> "DashboardUrn":
553+
return DashboardUrn(
554+
dashboard_tool=platform,
555+
dashboard_id=f"{platform_instance}.{name}" if platform_instance else name,
556+
)
557+
"""
558+
],
529559
}
530560

531561

metadata-ingestion/setup.py

-1
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,6 @@
556556

557557
mypy_stubs = {
558558
"types-dataclasses",
559-
"types-setuptools",
560559
"types-six",
561560
"types-python-dateutil",
562561
# We need to avoid 2.31.0.5 and 2.31.0.4 due to

metadata-ingestion/src/datahub/emitter/mce_builder.py

+28-13
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,15 @@
5252
UpstreamLineageClass,
5353
_Aspect as AspectAbstract,
5454
)
55-
from datahub.metadata.urns import DataFlowUrn, DatasetUrn, TagUrn
55+
from datahub.metadata.urns import (
56+
ChartUrn,
57+
DashboardUrn,
58+
DataFlowUrn,
59+
DataJobUrn,
60+
DataPlatformUrn,
61+
DatasetUrn,
62+
TagUrn,
63+
)
5664
from datahub.utilities.urn_encoder import UrnEncoder
5765

5866
logger = logging.getLogger(__name__)
@@ -119,7 +127,7 @@ def parse_ts_millis(ts: Optional[float]) -> Optional[datetime]:
119127
def make_data_platform_urn(platform: str) -> str:
120128
if platform.startswith("urn:li:dataPlatform:"):
121129
return platform
122-
return f"urn:li:dataPlatform:{platform}"
130+
return DataPlatformUrn.create_from_id(platform).urn()
123131

124132

125133
def make_dataset_urn(platform: str, name: str, env: str = DEFAULT_ENV) -> str:
@@ -236,7 +244,7 @@ def make_user_urn(username: str) -> str:
236244
Makes a user urn if the input is not a user or group urn already
237245
"""
238246
return (
239-
f"urn:li:corpuser:{username}"
247+
f"urn:li:corpuser:{UrnEncoder.encode_string(username)}"
240248
if not username.startswith(("urn:li:corpuser:", "urn:li:corpGroup:"))
241249
else username
242250
)
@@ -249,7 +257,7 @@ def make_group_urn(groupname: str) -> str:
249257
if groupname and groupname.startswith(("urn:li:corpGroup:", "urn:li:corpuser:")):
250258
return groupname
251259
else:
252-
return f"urn:li:corpGroup:{groupname}"
260+
return f"urn:li:corpGroup:{UrnEncoder.encode_string(groupname)}"
253261

254262

255263
def make_tag_urn(tag: str) -> str:
@@ -301,7 +309,12 @@ def make_data_flow_urn(
301309

302310

303311
def make_data_job_urn_with_flow(flow_urn: str, job_id: str) -> str:
304-
return f"urn:li:dataJob:({flow_urn},{job_id})"
312+
data_flow_urn = DataFlowUrn.from_string(flow_urn)
313+
data_job_urn = DataJobUrn.create_from_ids(
314+
data_flow_urn=data_flow_urn.urn(),
315+
job_id=job_id,
316+
)
317+
return data_job_urn.urn()
305318

306319

307320
def make_data_process_instance_urn(dataProcessInstanceId: str) -> str:
@@ -324,10 +337,11 @@ def make_dashboard_urn(
324337
platform: str, name: str, platform_instance: Optional[str] = None
325338
) -> str:
326339
# FIXME: dashboards don't currently include data platform urn prefixes.
327-
if platform_instance:
328-
return f"urn:li:dashboard:({platform},{platform_instance}.{name})"
329-
else:
330-
return f"urn:li:dashboard:({platform},{name})"
340+
return DashboardUrn.create_from_ids(
341+
platform=platform,
342+
name=name,
343+
platform_instance=platform_instance,
344+
).urn()
331345

332346

333347
def dashboard_urn_to_key(dashboard_urn: str) -> Optional[DashboardKeyClass]:
@@ -342,10 +356,11 @@ def make_chart_urn(
342356
platform: str, name: str, platform_instance: Optional[str] = None
343357
) -> str:
344358
# FIXME: charts don't currently include data platform urn prefixes.
345-
if platform_instance:
346-
return f"urn:li:chart:({platform},{platform_instance}.{name})"
347-
else:
348-
return f"urn:li:chart:({platform},{name})"
359+
return ChartUrn.create_from_ids(
360+
platform=platform,
361+
name=name,
362+
platform_instance=platform_instance,
363+
).urn()
349364

350365

351366
def chart_urn_to_key(chart_urn: str) -> Optional[ChartKeyClass]:

metadata-ingestion/src/datahub/ingestion/source/common/subtypes.py

+7
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,15 @@ class BIContainerSubTypes(StrEnum):
6060
MODE_COLLECTION = "Collection"
6161

6262

63+
class FlowContainerSubTypes(StrEnum):
64+
MSSQL_JOB = "Job"
65+
MSSQL_PROCEDURE_CONTAINER = "Procedures Container"
66+
67+
6368
class JobContainerSubTypes(StrEnum):
6469
NIFI_PROCESS_GROUP = "Process Group"
70+
MSSQL_JOBSTEP = "Job Step"
71+
MSSQL_STORED_PROCEDURE = "Stored Procedure"
6572

6673

6774
class BIAssetSubTypes(StrEnum):

metadata-ingestion/src/datahub/ingestion/source/identity/okta.py

+22
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,27 @@ def _map_okta_user_profile_to_username(
666666
self.config.okta_profile_to_username_regex,
667667
)
668668

669+
def _map_okta_user_profile_custom_properties(
670+
self, profile: UserProfile
671+
) -> Dict[str, str]:
672+
# filter out the common fields that are already mapped to the CorpUserInfo aspect and the private ones
673+
return {
674+
k: str(v)
675+
for k, v in profile.__dict__.items()
676+
if v
677+
and k
678+
not in [
679+
"displayName",
680+
"firstName",
681+
"lastName",
682+
"email",
683+
"title",
684+
"countryCode",
685+
"department",
686+
]
687+
and not k.startswith("_")
688+
}
689+
669690
# Converts Okta User Profile into a CorpUserInfo.
670691
def _map_okta_user_profile(self, profile: UserProfile) -> CorpUserInfoClass:
671692
# TODO: Extract user's manager if provided.
@@ -683,6 +704,7 @@ def _map_okta_user_profile(self, profile: UserProfile) -> CorpUserInfoClass:
683704
title=profile.title,
684705
countryCode=profile.countryCode,
685706
departmentName=profile.department,
707+
customProperties=self._map_okta_user_profile_custom_properties(profile),
686708
)
687709

688710
def _make_corp_group_urn(self, name: str) -> str:

metadata-ingestion/src/datahub/ingestion/source/metabase.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ def construct_dashboard_from_api_data(
313313
return None
314314

315315
dashboard_urn = builder.make_dashboard_urn(
316-
self.platform, dashboard_details.get("id", "")
316+
self.platform, str(dashboard_details.get("id", ""))
317317
)
318318
dashboard_snapshot = DashboardSnapshot(
319319
urn=dashboard_urn,
@@ -337,7 +337,7 @@ def construct_dashboard_from_api_data(
337337
card_id = card_info.get("card").get("id", "")
338338
if not card_id:
339339
continue # most likely a virtual card without an id (text or heading), not relevant.
340-
chart_urn = builder.make_chart_urn(self.platform, card_id)
340+
chart_urn = builder.make_chart_urn(self.platform, str(card_id))
341341
chart_urns.append(chart_urn)
342342

343343
dashboard_info_class = DashboardInfoClass(
@@ -459,7 +459,7 @@ def construct_card_from_api_data(self, card_data: dict) -> Optional[ChartSnapsho
459459
)
460460
return None
461461

462-
chart_urn = builder.make_chart_urn(self.platform, card_id)
462+
chart_urn = builder.make_chart_urn(self.platform, str(card_id))
463463
chart_snapshot = ChartSnapshot(
464464
urn=chart_urn,
465465
aspects=[],

metadata-ingestion/src/datahub/ingestion/source/mode.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ def _browse_path_chart(
377377
]
378378

379379
def _dashboard_urn(self, report_info: dict) -> str:
380-
return builder.make_dashboard_urn(self.platform, report_info.get("id", ""))
380+
return builder.make_dashboard_urn(self.platform, str(report_info.get("id", "")))
381381

382382
def _parse_last_run_at(self, report_info: dict) -> Optional[int]:
383383
# Mode queries are refreshed, and that timestamp is reflected correctly here.

metadata-ingestion/src/datahub/ingestion/source/sql/mssql/job_models.py

+29
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,17 @@
1111
DatabaseKey,
1212
SchemaKey,
1313
)
14+
from datahub.ingestion.source.common.subtypes import (
15+
FlowContainerSubTypes,
16+
JobContainerSubTypes,
17+
)
1418
from datahub.metadata.schema_classes import (
1519
ContainerClass,
1620
DataFlowInfoClass,
1721
DataJobInfoClass,
1822
DataJobInputOutputClass,
1923
DataPlatformInstanceClass,
24+
SubTypesClass,
2025
)
2126

2227

@@ -211,6 +216,18 @@ def as_datajob_info_aspect(self) -> DataJobInfoClass:
211216
status=self.status,
212217
)
213218

219+
@property
220+
def as_subtypes_aspect(self) -> SubTypesClass:
221+
assert isinstance(self.entity, (JobStep, StoredProcedure))
222+
type = (
223+
JobContainerSubTypes.MSSQL_JOBSTEP
224+
if isinstance(self.entity, JobStep)
225+
else JobContainerSubTypes.MSSQL_STORED_PROCEDURE
226+
)
227+
return SubTypesClass(
228+
typeNames=[type],
229+
)
230+
214231
@property
215232
def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]:
216233
if self.entity.flow.platform_instance:
@@ -276,6 +293,18 @@ def as_dataflow_info_aspect(self) -> DataFlowInfoClass:
276293
externalUrl=self.external_url,
277294
)
278295

296+
@property
297+
def as_subtypes_aspect(self) -> SubTypesClass:
298+
assert isinstance(self.entity, (MSSQLJob, MSSQLProceduresContainer))
299+
type = (
300+
FlowContainerSubTypes.MSSQL_JOB
301+
if isinstance(self.entity, MSSQLJob)
302+
else FlowContainerSubTypes.MSSQL_PROCEDURE_CONTAINER
303+
)
304+
return SubTypesClass(
305+
typeNames=[type],
306+
)
307+
279308
@property
280309
def as_maybe_platform_instance_aspect(self) -> Optional[DataPlatformInstanceClass]:
281310
if self.entity.platform_instance:

0 commit comments

Comments
 (0)