Skip to content

Commit 54dc33e

Browse files
committed
More verbosity
1 parent 3684130 commit 54dc33e

File tree

3 files changed

+28
-11
lines changed

3 files changed

+28
-11
lines changed

metadata-ingestion/src/datahub/ingestion/api/source_helpers.py

+24-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import json
21
import logging
32
from datetime import datetime, timezone
43
from typing import (
@@ -25,6 +24,7 @@
2524
BrowsePathsV2Class,
2625
ChangeTypeClass,
2726
ContainerClass,
27+
DatasetProfileClass,
2828
DatasetPropertiesClass,
2929
DatasetUsageStatisticsClass,
3030
MetadataChangeEventClass,
@@ -80,20 +80,40 @@ def create_dataset_props_patch_builder(
8080

8181

8282
def check_mcp_correctness(mcp: MetadataChangeProposalClass):
83-
logger.debug(f"Processing as MCP with urn: {mcp.entityUrn} and aspect: {mcp.aspectName}, change type: {mcp.changeType}")
83+
logger.debug(
84+
f"Processing as MCP with urn: {mcp.entityUrn} and aspect: {mcp.aspectName}, change type: {mcp.changeType}"
85+
)
8486
logger.debug(f"Aspect length: {len(mcp.aspect.value)}")
8587
logger.debug(f"Full aspect:\n{mcp.aspect}")
8688

8789

8890
def check_mcpw_correctness(mcp: MetadataChangeProposalWrapper):
89-
logger.debug(f"Processing as MCPW with urn: {mcp.entityUrn} and aspect: {mcp.aspectName}, change type: {mcp.changeType}")
91+
logger.debug(
92+
f"Processing as MCPW with urn: {mcp.entityUrn} and aspect: {mcp.aspectName}, change type: {mcp.changeType}"
93+
)
9094
logger.debug(f"Full aspect:\n{mcp.aspect}")
9195
if isinstance(mcp.aspect, SchemaMetadataClass):
9296
schema: SchemaMetadataClass = mcp.aspect
9397
logger.debug(f"Schema aspect dump:\n{schema.to_obj()}")
98+
if isinstance(mcp.aspect, DatasetProfileClass):
99+
profile: DatasetProfileClass = mcp.aspect
100+
logger.debug(f"Dataset Profile aspect dump:\n{profile.to_obj()}")
101+
logger.debug(f"Length of field profiles: {len(profile.fieldProfiles)}")
102+
for field in profile.fieldProfiles:
103+
logger.debug(
104+
f"Field {field.fieldPath} has {len(field.sampleValues)} sample values"
105+
)
106+
values_len = 0
107+
for value in field.sampleValues:
108+
values_len += len(value)
109+
logger.debug(
110+
f"Field {field.fieldPath} has {len(field.sampleValues)} sample values, taking total bytes {values_len}"
111+
)
94112

95113

96-
def check_workunit_correctness(stream: Iterable[MetadataWorkUnit]) -> Iterable[MetadataWorkUnit]:
114+
def check_workunit_correctness(
115+
stream: Iterable[MetadataWorkUnit],
116+
) -> Iterable[MetadataWorkUnit]:
97117
for wu in stream:
98118
logger.debug(f"Checking correctnes for workunit: {wu.id}")
99119
if isinstance(wu.metadata, MetadataChangeProposalClass):

metadata-ingestion/src/datahub/ingestion/source/state/entity_removal_state.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -146,11 +146,7 @@ def urn_count(self) -> int:
146146
def compute_percent_entities_changed(
147147
new_entities: List[str], old_entities: List[str]
148148
) -> float:
149-
(
150-
overlap_count,
151-
old_count,
152-
_,
153-
) = _get_entity_overlap_and_cardinalities(
149+
(overlap_count, old_count, _,) = _get_entity_overlap_and_cardinalities(
154150
new_entities=new_entities, old_entities=old_entities
155151
)
156152

metadata-ingestion/src/datahub/ingestion/source/unity/source.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,9 @@
4141
TestConnectionReport,
4242
)
4343
from datahub.ingestion.api.source_helpers import (
44+
check_workunit_correctness,
4445
create_dataset_owners_patch_builder,
45-
create_dataset_props_patch_builder, check_workunit_correctness,
46+
create_dataset_props_patch_builder,
4647
)
4748
from datahub.ingestion.api.workunit import MetadataWorkUnit
4849
from datahub.ingestion.source.aws import s3_util
@@ -260,7 +261,7 @@ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
260261
StaleEntityRemovalHandler.create(
261262
self, self.config, self.ctx
262263
).workunit_processor,
263-
check_workunit_correctness
264+
check_workunit_correctness,
264265
]
265266

266267
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:

0 commit comments

Comments
 (0)