Skip to content

Commit b74ba11

Browse files
authored
fix(ingest/delta-lake): skip file count if require_files is false (#11611)
1 parent 3387110 commit b74ba11

File tree

2 files changed

+4
-7
lines changed

2 files changed

+4
-7
lines changed

metadata-ingestion/src/datahub/ingestion/source/delta_lake/source.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -223,15 +223,14 @@ def ingest_table(
223223
)
224224

225225
customProperties = {
226-
"number_of_files": str(get_file_count(delta_table)),
227226
"partition_columns": str(delta_table.metadata().partition_columns),
228227
"table_creation_time": str(delta_table.metadata().created_time),
229228
"id": str(delta_table.metadata().id),
230229
"version": str(delta_table.version()),
231230
"location": self.source_config.complete_path,
232231
}
233-
if not self.source_config.require_files:
234-
del customProperties["number_of_files"] # always 0
232+
if self.source_config.require_files:
233+
customProperties["number_of_files"] = str(get_file_count(delta_table))
235234

236235
dataset_properties = DatasetPropertiesClass(
237236
description=delta_table.metadata().description,

metadata-ingestion/tests/unit/test_mlflow_source.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import datetime
22
from pathlib import Path
3-
from typing import Any, TypeVar, Union
3+
from typing import Any, Union
44

55
import pytest
66
from mlflow import MlflowClient
@@ -11,8 +11,6 @@
1111
from datahub.ingestion.api.common import PipelineContext
1212
from datahub.ingestion.source.mlflow import MLflowConfig, MLflowSource
1313

14-
T = TypeVar("T")
15-
1614

1715
@pytest.fixture
1816
def tracking_uri(tmp_path: Path) -> str:
@@ -46,7 +44,7 @@ def model_version(
4644
)
4745

4846

49-
def dummy_search_func(page_token: Union[None, str], **kwargs: Any) -> PagedList[T]:
47+
def dummy_search_func(page_token: Union[None, str], **kwargs: Any) -> PagedList[str]:
5048
dummy_pages = dict(
5149
page_1=PagedList(items=["a", "b"], token="page_2"),
5250
page_2=PagedList(items=["c", "d"], token="page_3"),

0 commit comments

Comments
 (0)