Skip to content

Commit 4abbfea

Browse files
hsheth2sleeperdeep
authored andcommitted
fix(ingest/powerbi): reduce type cast usage (datahub-project#12004)
1 parent eb5005a commit 4abbfea

File tree

5 files changed

+36
-56
lines changed

5 files changed

+36
-56
lines changed

metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/data_classes.py

+2-13
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import os
2-
from abc import ABC
32
from dataclasses import dataclass
43
from enum import Enum
54
from typing import Any, Dict, List, Optional
@@ -12,18 +11,8 @@
1211
TRACE_POWERBI_MQUERY_PARSER = os.getenv("DATAHUB_TRACE_POWERBI_MQUERY_PARSER", False)
1312

1413

15-
class AbstractIdentifierAccessor(ABC): # To pass lint
16-
pass
17-
18-
19-
# @dataclass
20-
# class ItemSelector:
21-
# items: Dict[str, Any]
22-
# next: Optional[AbstractIdentifierAccessor]
23-
24-
2514
@dataclass
26-
class IdentifierAccessor(AbstractIdentifierAccessor):
15+
class IdentifierAccessor:
2716
"""
2817
statement
2918
public_order_date = Source{[Schema="public",Item="order_date"]}[Data]
@@ -40,7 +29,7 @@ class IdentifierAccessor(AbstractIdentifierAccessor):
4029

4130
identifier: str
4231
items: Dict[str, Any]
43-
next: Optional[AbstractIdentifierAccessor]
32+
next: Optional["IdentifierAccessor"]
4433

4534

4635
@dataclass

metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/pattern_handler.py

+19-27
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22
from abc import ABC, abstractmethod
33
from enum import Enum
4-
from typing import Dict, List, Optional, Tuple, Type, Union, cast
4+
from typing import Dict, List, Optional, Tuple, Type, cast
55

66
from lark import Tree
77

@@ -22,7 +22,6 @@
2222
)
2323
from datahub.ingestion.source.powerbi.m_query import native_sql_parser, tree_function
2424
from datahub.ingestion.source.powerbi.m_query.data_classes import (
25-
AbstractIdentifierAccessor,
2625
DataAccessFunctionDetail,
2726
DataPlatformTable,
2827
FunctionName,
@@ -412,33 +411,25 @@ def create_lineage(
412411
)
413412
table_detail: Dict[str, str] = {}
414413
temp_accessor: Optional[
415-
Union[IdentifierAccessor, AbstractIdentifierAccessor]
414+
IdentifierAccessor
416415
] = data_access_func_detail.identifier_accessor
417416

418417
while temp_accessor:
419-
if isinstance(temp_accessor, IdentifierAccessor):
420-
# Condition to handle databricks M-query pattern where table, schema and database all are present in
421-
# the same invoke statement
422-
if all(
423-
element in temp_accessor.items
424-
for element in ["Item", "Schema", "Catalog"]
425-
):
426-
table_detail["Schema"] = temp_accessor.items["Schema"]
427-
table_detail["Table"] = temp_accessor.items["Item"]
428-
else:
429-
table_detail[temp_accessor.items["Kind"]] = temp_accessor.items[
430-
"Name"
431-
]
432-
433-
if temp_accessor.next is not None:
434-
temp_accessor = temp_accessor.next
435-
else:
436-
break
418+
# Condition to handle databricks M-query pattern where table, schema and database all are present in
419+
# the same invoke statement
420+
if all(
421+
element in temp_accessor.items
422+
for element in ["Item", "Schema", "Catalog"]
423+
):
424+
table_detail["Schema"] = temp_accessor.items["Schema"]
425+
table_detail["Table"] = temp_accessor.items["Item"]
437426
else:
438-
logger.debug(
439-
"expecting instance to be IdentifierAccessor, please check if parsing is done properly"
440-
)
441-
return Lineage.empty()
427+
table_detail[temp_accessor.items["Kind"]] = temp_accessor.items["Name"]
428+
429+
if temp_accessor.next is not None:
430+
temp_accessor = temp_accessor.next
431+
else:
432+
break
442433

443434
table_reference = self.create_reference_table(
444435
arg_list=data_access_func_detail.arg_list,
@@ -786,9 +777,10 @@ def get_db_name(self, data_access_tokens: List[str]) -> Optional[str]:
786777
def create_lineage(
787778
self, data_access_func_detail: DataAccessFunctionDetail
788779
) -> Lineage:
789-
t1: Tree = cast(
790-
Tree, tree_function.first_arg_list_func(data_access_func_detail.arg_list)
780+
t1: Optional[Tree] = tree_function.first_arg_list_func(
781+
data_access_func_detail.arg_list
791782
)
783+
assert t1 is not None
792784
flat_argument_list: List[Tree] = tree_function.flat_argument_list(t1)
793785

794786
if len(flat_argument_list) != 2:

metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/resolver.py

+8-10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
22
from abc import ABC, abstractmethod
3-
from typing import Any, Dict, List, Optional, Tuple, Union, cast
3+
from typing import Any, Dict, List, Optional, Tuple, Union
44

55
from lark import Tree
66

@@ -95,14 +95,12 @@ def get_item_selector_tokens(
9595
# remove whitespaces and quotes from token
9696
tokens: List[str] = tree_function.strip_char_from_list(
9797
tree_function.remove_whitespaces_from_list(
98-
tree_function.token_values(
99-
cast(Tree, item_selector), parameters=self.parameters
100-
)
98+
tree_function.token_values(item_selector, parameters=self.parameters)
10199
),
102100
)
103101
identifier: List[str] = tree_function.token_values(
104-
cast(Tree, identifier_tree)
105-
) # type :ignore
102+
identifier_tree, parameters={}
103+
)
106104

107105
# convert tokens to dict
108106
iterator = iter(tokens)
@@ -238,10 +236,10 @@ def _process_invoke_expression(
238236
def _process_item_selector_expression(
239237
self, rh_tree: Tree
240238
) -> Tuple[Optional[str], Optional[Dict[str, str]]]:
241-
new_identifier, key_vs_value = self.get_item_selector_tokens( # type: ignore
242-
cast(Tree, tree_function.first_expression_func(rh_tree))
243-
)
239+
first_expression: Optional[Tree] = tree_function.first_expression_func(rh_tree)
240+
assert first_expression is not None
244241

242+
new_identifier, key_vs_value = self.get_item_selector_tokens(first_expression)
245243
return new_identifier, key_vs_value
246244

247245
@staticmethod
@@ -327,7 +325,7 @@ def internal(
327325
# The first argument can be a single table argument or list of table.
328326
# For example Table.Combine({t1,t2},....), here first argument is list of table.
329327
# Table.AddColumn(t1,....), here first argument is single table.
330-
for token in cast(List[str], result):
328+
for token in result:
331329
internal(token, identifier_accessor)
332330

333331
else:

metadata-ingestion/src/datahub/ingestion/source/powerbi/m_query/tree_function.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
22
from functools import partial
3-
from typing import Any, Dict, List, Optional, Union, cast
3+
from typing import Any, Dict, List, Optional, Union
44

55
from lark import Token, Tree
66

@@ -58,7 +58,7 @@ def internal(node: Union[Tree, Token]) -> Optional[Tree]:
5858
if isinstance(node, Token):
5959
return None
6060

61-
for child in cast(Tree, node).children:
61+
for child in node.children:
6262
child_node: Optional[Tree] = internal(child)
6363
if child_node is not None:
6464
return child_node
@@ -99,7 +99,7 @@ def internal(node: Union[Tree, Token]) -> None:
9999
logger.debug(f"Unable to resolve parameter reference to {ref}")
100100
values.append(ref)
101101
elif isinstance(node, Token):
102-
values.append(cast(Token, node).value)
102+
values.append(node.value)
103103
return
104104
else:
105105
for child in node.children:

metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_config.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import logging
22
from collections import defaultdict
33
from dataclasses import dataclass
4-
from typing import Dict, List, Optional, Set, cast
4+
from typing import Dict, List, Optional, Set
55

66
import pydantic
77
from pydantic import Field, SecretStr, root_validator, validator
@@ -118,9 +118,10 @@ def validate_legacy_schema_pattern(cls, values: Dict) -> Dict:
118118
)
119119

120120
# Always exclude reporting metadata for INFORMATION_SCHEMA schema
121-
if schema_pattern is not None and schema_pattern:
121+
if schema_pattern:
122122
logger.debug("Adding deny for INFORMATION_SCHEMA to schema_pattern.")
123-
cast(AllowDenyPattern, schema_pattern).deny.append(r".*INFORMATION_SCHEMA$")
123+
assert isinstance(schema_pattern, AllowDenyPattern)
124+
schema_pattern.deny.append(r".*INFORMATION_SCHEMA$")
124125

125126
return values
126127

0 commit comments

Comments
 (0)