@@ -47,6 +47,8 @@ def __init__(
47
47
self ._queried_tables : Set [str ] = set ()
48
48
# Cache for successful partition filters
49
49
self ._successful_filters_cache : Dict [str , List [str ]] = {}
50
+ # Detect BigQuery schema version and set up column mappings
51
+ self ._detect_bq_schema_version ()
50
52
51
53
def _execute_cached_query (
52
54
self ,
@@ -64,10 +66,8 @@ def _execute_cached_query(
64
66
65
67
while retries <= max_retries :
66
68
try :
67
- # Apply query modifier to adjust column names based on BigQuery version
68
- modified_query = self ._adjust_query_for_bq_version (query )
69
-
70
- def execute_query (query_to_execute = modified_query ):
69
+ # Define a function to execute the query to avoid lambda binding issues
70
+ def execute_query (query_to_execute = query ):
71
71
return list (
72
72
self .config .get_bigquery_client ()
73
73
.query (query_to_execute )
@@ -88,9 +88,7 @@ def execute_query(query_to_execute=modified_query):
88
88
)
89
89
retries += 1
90
90
if retries > max_retries :
91
- logger .warning (
92
- f"Final timeout for query: { modified_query [:200 ]} ..."
93
- )
91
+ logger .warning (f"Final timeout for query: { query [:200 ]} ..." )
94
92
return []
95
93
# Increase timeout for retries
96
94
timeout = min (timeout * 2 , 300 ) # Max 5 minutes
@@ -154,37 +152,69 @@ def _adjust_query_for_bq_version(self, query: str) -> str:
154
152
155
153
return modified_query
156
154
157
- def _detect_bq_schema_version (self ):
155
+ def _detect_bq_schema_version (self ) -> None :
158
156
"""
159
157
Detect which version of INFORMATION_SCHEMA we're working with.
160
- Set self._bq_uses_new_schema to True if we have the newer schema version .
158
+ Sets self._column_name_mapping with the appropriate column mappings .
161
159
"""
162
160
try :
163
- # Try to execute a simple query to detect the schema version
164
- detect_query = """
165
- SELECT column_name
166
- FROM INFORMATION_SCHEMA.COLUMNS
167
- WHERE table_name = ' TABLES'
168
- AND table_schema = 'INFORMATION_SCHEMA'
161
+ # Try to execute a simple test query that works on all versions
162
+ # This just gets us a small amount of data to check column names
163
+ test_query = """
164
+ SELECT *
165
+ FROM INFORMATION_SCHEMA. TABLES
166
+ LIMIT 1
169
167
"""
170
168
171
- results = list (
172
- self .config .get_bigquery_client ().query (detect_query ).result ()
173
- )
169
+ results = list (self .config .get_bigquery_client ().query (test_query ).result ())
170
+ if not results :
171
+ # If no results, default to old schema (safer choice)
172
+ self ._column_name_mapping = {
173
+ "row_count" : "row_count" ,
174
+ "size_bytes" : "size_bytes" ,
175
+ }
176
+ logger .info (
177
+ "No test results returned. Defaulting to old schema column names."
178
+ )
179
+ return
174
180
175
- # Look for the new column names in the results
176
- column_names = [row .column_name for row in results ]
177
- self ._bq_uses_new_schema = "total_rows" in column_names
181
+ # Check what column names are available in the result
182
+ row = results [0 ]
183
+
184
+ # Initialize mapping dict
185
+ self ._column_name_mapping = {}
186
+
187
+ # Check for row count column
188
+ if hasattr (row , "total_rows" ):
189
+ self ._column_name_mapping ["row_count" ] = "total_rows"
190
+ elif hasattr (row , "row_count" ):
191
+ self ._column_name_mapping ["row_count" ] = "row_count"
192
+ else :
193
+ # Default
194
+ self ._column_name_mapping ["row_count" ] = "row_count"
195
+
196
+ # Check for size bytes column
197
+ if hasattr (row , "total_logical_bytes" ):
198
+ self ._column_name_mapping ["size_bytes" ] = "total_logical_bytes"
199
+ elif hasattr (row , "size_bytes" ):
200
+ self ._column_name_mapping ["size_bytes" ] = "size_bytes"
201
+ else :
202
+ # Default
203
+ self ._column_name_mapping ["size_bytes" ] = "size_bytes"
178
204
179
205
logger .info (
180
- f"Detected BigQuery INFORMATION_SCHEMA version. Uses new schema : { self ._bq_uses_new_schema } "
206
+ f"Detected BigQuery INFORMATION_SCHEMA column mapping : { self ._column_name_mapping } "
181
207
)
208
+
182
209
except Exception as e :
183
- # If detection fails, default to the new schema (safer option)
210
+ # If detection fails, default to old schema as it's more common
211
+ self ._column_name_mapping = {
212
+ "row_count" : "row_count" ,
213
+ "size_bytes" : "size_bytes" ,
214
+ }
184
215
logger .warning (
185
- f"Could not detect BigQuery schema version: { e } . Defaulting to new schema."
216
+ f"Error detecting BigQuery schema version: { e } . Defaulting to old schema column names ."
186
217
)
187
- self ._bq_uses_new_schema = True
188
218
189
219
@staticmethod
190
220
def get_partition_range_from_partition_id (
@@ -1499,15 +1529,22 @@ def _fetch_schema_info(
1499
1529
self , table : BigqueryTable , project : str , schema : str , metadata : Dict [str , Any ]
1500
1530
) -> Dict [str , Any ]:
1501
1531
"""Fetch schema information from INFORMATION_SCHEMA."""
1502
- # Use column aliases to handle both schema versions
1532
+ if not hasattr (self , "_column_name_mapping" ):
1533
+ self ._detect_bq_schema_version ()
1534
+
1535
+ # Get column name mappings
1536
+ row_count_col = self ._column_name_mapping .get ("row_count" , "row_count" )
1537
+ size_bytes_col = self ._column_name_mapping .get ("size_bytes" , "size_bytes" )
1538
+
1539
+ # Use explicit column selection and aliases to avoid issues
1503
1540
combined_query = f"""
1504
1541
SELECT
1505
1542
c.column_name,
1506
1543
c.data_type,
1507
1544
c.is_partitioning_column,
1508
1545
c.clustering_ordinal_position,
1509
- t.row_count,
1510
- t.size_bytes,
1546
+ t.{ row_count_col } as row_count,
1547
+ t.{ size_bytes_col } as size_bytes,
1511
1548
t.ddl,
1512
1549
t.creation_time,
1513
1550
t.last_modified_time,
@@ -1544,7 +1581,7 @@ def _fetch_schema_info(
1544
1581
}
1545
1582
1546
1583
# Update table metadata from first row (all rows have same values)
1547
- # Use getattr with default values to handle missing attributes safely
1584
+ # Use hasattr with default values to handle missing attributes safely
1548
1585
if (
1549
1586
hasattr (row , "row_count" )
1550
1587
and row .row_count
@@ -1588,11 +1625,18 @@ def _fetch_table_stats(
1588
1625
) -> Dict [str , Any ]:
1589
1626
"""Fetch additional table stats if needed."""
1590
1627
if not metadata .get ("row_count" ) or not metadata .get ("size_bytes" ):
1591
- # Use basic column names - our _adjust_query_for_bq_version function will handle the conversion
1628
+ if not hasattr (self , "_column_name_mapping" ):
1629
+ self ._detect_bq_schema_version ()
1630
+
1631
+ # Get column name mappings
1632
+ row_count_col = self ._column_name_mapping .get ("row_count" , "row_count" )
1633
+ size_bytes_col = self ._column_name_mapping .get ("size_bytes" , "size_bytes" )
1634
+
1635
+ # Use explicit column selection with aliases
1592
1636
stats_query = f"""
1593
1637
SELECT
1594
- row_count,
1595
- size_bytes,
1638
+ { row_count_col } as row_count,
1639
+ { size_bytes_col } as size_bytes,
1596
1640
creation_time,
1597
1641
last_modified_time
1598
1642
FROM
@@ -1608,13 +1652,16 @@ def _fetch_table_stats(
1608
1652
1609
1653
if stats_results :
1610
1654
row = stats_results [0 ]
1611
- if hasattr (row , "row_count" ) and row .row_count :
1655
+ if hasattr (row , "row_count" ) and row .row_count is not None :
1612
1656
metadata ["row_count" ] = row .row_count
1613
- if hasattr (row , "size_bytes" ) and row .size_bytes :
1657
+ if hasattr (row , "size_bytes" ) and row .size_bytes is not None :
1614
1658
metadata ["size_bytes" ] = row .size_bytes
1615
- if hasattr (row , "creation_time" ) and row .creation_time :
1659
+ if hasattr (row , "creation_time" ) and row .creation_time is not None :
1616
1660
metadata ["creation_time" ] = row .creation_time
1617
- if hasattr (row , "last_modified_time" ) and row .last_modified_time :
1661
+ if (
1662
+ hasattr (row , "last_modified_time" )
1663
+ and row .last_modified_time is not None
1664
+ ):
1618
1665
metadata ["last_modified_time" ] = row .last_modified_time
1619
1666
except Exception as e :
1620
1667
logger .warning (
0 commit comments