Skip to content

Commit cd9eac3

Browse files
committed
linting
1 parent 2af9681 commit cd9eac3

File tree

2 files changed

+304
-288
lines changed

2 files changed

+304
-288
lines changed

metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/profiler.py

+18-15
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ def _get_time_hierarchy_values(
252252
timeout: int,
253253
) -> Dict[str, Any]:
254254
"""Get values for time hierarchy columns."""
255-
result = {}
255+
result: Dict[str, Any] = {}
256256
current_time = datetime.now(timezone.utc)
257257

258258
# Set defaults from current time
@@ -323,7 +323,7 @@ def _get_date_column_values(
323323
timeout: int,
324324
) -> Dict[str, Any]:
325325
"""Get values for date columns."""
326-
result = {}
326+
result: Dict[str, Any] = {}
327327

328328
if not date_columns:
329329
return result
@@ -418,7 +418,7 @@ def _get_remaining_column_values(
418418
timeout: int,
419419
) -> Dict[str, Any]:
420420
"""Get values for remaining partition columns."""
421-
result = {}
421+
result: Dict[str, Any] = {}
422422

423423
if not remaining_columns:
424424
return result
@@ -563,7 +563,7 @@ def _extract_partitioning_from_ddl(
563563
ddl_norm = ddl.replace("\n", " ").replace("\t", " ") # Preserve case
564564

565565
# Track found partition cols
566-
found_partition_cols = set()
566+
found_partition_cols: Set[str] = set()
567567

568568
# Case 1: Standard PARTITION BY column
569569
if "PARTITION BY" in ddl_upper:
@@ -606,7 +606,7 @@ def _extract_partitioning_from_ddl(
606606
metadata["partition_columns"]["_PARTITIONTIME"] = "TIMESTAMP"
607607

608608
def _extract_partition_by_clause(
609-
self, upper_clause: str, original_clause: str, found_partition_cols: set
609+
self, upper_clause: str, original_clause: str, found_partition_cols: Set[str]
610610
) -> None:
611611
"""Extract partition columns from a PARTITION BY clause in DDL."""
612612
try:
@@ -1184,7 +1184,7 @@ def _try_date_based_filtering_for_external(
11841184
"day",
11851185
):
11861186
# Try today, yesterday, last week, last month, etc.
1187-
test_dates = [
1187+
test_dates_datetime = [
11881188
time_now,
11891189
time_now - timedelta(days=1),
11901190
time_now - timedelta(days=7),
@@ -1194,7 +1194,7 @@ def _try_date_based_filtering_for_external(
11941194
datetime(time_now.year, 1, 1), # First of year
11951195
]
11961196

1197-
for test_date in test_dates:
1197+
for test_date in test_dates_datetime:
11981198
if col_type == "DATE":
11991199
date_str = test_date.strftime("%Y-%m-%d")
12001200
filter_value = f"DATE '{date_str}'"
@@ -1344,7 +1344,7 @@ def _get_external_table_partition_filters(
13441344

13451345
def _fetch_basic_table_metadata(self, table: BigqueryTable) -> Dict[str, Any]:
13461346
"""Get basic metadata from table object."""
1347-
metadata = {
1347+
metadata: Dict[str, Any] = {
13481348
"partition_columns": {},
13491349
"clustering_columns": {},
13501350
"row_count": table.rows_count,
@@ -1493,7 +1493,7 @@ def _try_time_hierarchy_approach(
14931493
"""Helper method to try time hierarchy partitioning approach"""
14941494
# Time hierarchy columns in order of precedence
14951495
time_hierarchy_cols = ["year", "month", "day", "hour"]
1496-
hierarchy_filters = []
1496+
hierarchy_filters: List[str] = []
14971497

14981498
# Build a filter using time hierarchy
14991499
time_now = datetime.now(timezone.utc)
@@ -2129,12 +2129,15 @@ def get_profile_request(
21292129
f"Using extended timeouts for very large table {profile_request.pretty_name}"
21302130
)
21312131
# Add extended query timeouts for very large tables
2132-
profile_request.profiler_config.update(
2133-
{
2134-
"query_timeout": 300, # 5 minutes
2135-
"chunk_size": 5000,
2136-
}
2137-
)
2132+
if hasattr(profile_request, "profiler_config"):
2133+
profile_request.profiler_config.update(
2134+
{
2135+
"query_timeout": 300, # 5 minutes
2136+
"chunk_size": 5000,
2137+
}
2138+
)
2139+
else:
2140+
logger.debug("profiler_config not available on TableProfilerRequest")
21382141

21392142
return profile_request
21402143

0 commit comments

Comments
 (0)