Skip to content

Commit dd37113

Browse files
chore(ruff): enable some ignored rules (#12815)
1 parent b28291b commit dd37113

23 files changed

+39
-99
lines changed

metadata-ingestion/pyproject.toml

-6
Original file line numberDiff line numberDiff line change
@@ -43,19 +43,13 @@ extend-ignore = [
4343
"RUF015", # unnecessary-iterable-allocation-for-first-element
4444

4545
# Can be enabled later if someone wants to fix existing cases or an auto-fix becomes available
46-
"SIM101", # Multiple isinstance calls for {name}, merge into a single call
4746
"SIM102", # Use a single `if` statement instead of nested `if` statements
4847
"SIM103", # Return the condition directly
4948
"SIM105", # Use `contextlib.suppress(...)` instead of `try`-`except`-`pass`
50-
"SIM108", # Use ternary operator {contents} instead of if-else-block
51-
"SIM110", # Use `return any(re.match(regex_pattern, path, re.IGNORECASE) for path in paths)` instead of `for` loop
52-
"SIM113", # Use enumerate() for index variable {index} in for loop
5349
"SIM115", # Use a context manager for opening files
5450
"SIM116", # Use a dictionary instead of consecutive `if` statements
5551
"SIM117", # Use a single with statement with multiple contexts instead of nested with statements
5652
"SIM118", # Use key {operator} dict instead of key {operator} dict.keys()
57-
"SIM210", # Use `bool(...)` instead of `True if ... else False`
58-
"SIM401", # Use `sample_data.get(schema_field.fieldPath, [])` instead of an `if` block
5953
]
6054

6155
[tool.ruff.lint.mccabe]

metadata-ingestion/src/datahub/configuration/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ class PermissiveConfigModel(ConfigModel):
130130
# It is usually used for argument bags that are passed through to third-party libraries.
131131

132132
class Config:
133-
if PYDANTIC_VERSION_2:
133+
if PYDANTIC_VERSION_2: # noqa: SIM108
134134
extra = "allow"
135135
else:
136136
extra = Extra.allow

metadata-ingestion/src/datahub/ingestion/glossary/classification_mixin.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -279,11 +279,7 @@ def get_columns_to_classify(
279279
"Dataset_Name": dataset_name,
280280
}
281281
),
282-
values=(
283-
sample_data[schema_field.fieldPath]
284-
if schema_field.fieldPath in sample_data
285-
else []
286-
),
282+
values=sample_data.get(schema_field.fieldPath, []),
287283
)
288284
)
289285

metadata-ingestion/src/datahub/ingestion/graph/client.py

-2
Original file line numberDiff line numberDiff line change
@@ -782,9 +782,7 @@ def get_container_urns_by_filter(
782782
results: Dict = self._post_generic(url, search_body)
783783
num_entities = results["value"]["numEntities"]
784784
logger.debug(f"Matched {num_entities} containers")
785-
entities_yielded: int = 0
786785
for x in results["value"]["entities"]:
787-
entities_yielded += 1
788786
logger.debug(f"yielding {x['entity']}")
789787
yield x["entity"]
790788

metadata-ingestion/src/datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def _convert_sets_to_lists(obj: Any) -> Any:
163163
key: DatahubIngestionRunSummaryProvider._convert_sets_to_lists(value)
164164
for key, value in obj.items()
165165
}
166-
elif isinstance(obj, list) or isinstance(obj, set):
166+
elif isinstance(obj, (list, set)):
167167
return [
168168
DatahubIngestionRunSummaryProvider._convert_sets_to_lists(element)
169169
for element in obj

metadata-ingestion/src/datahub/ingestion/run/pipeline.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -526,10 +526,8 @@ def process_commits(self) -> None:
526526
Evaluates the commit_policy for each committable in the context and triggers the commit operation
527527
on the committable if its required commit policies are satisfied.
528528
"""
529-
has_errors: bool = (
530-
True
531-
if self.source.get_report().failures or self.sink.get_report().failures
532-
else False
529+
has_errors: bool = bool(
530+
self.source.get_report().failures or self.sink.get_report().failures
533531
)
534532
has_warnings: bool = bool(
535533
self.source.get_report().warnings or self.sink.get_report().warnings

metadata-ingestion/src/datahub/ingestion/source/data_lake_common/path_spec.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -454,10 +454,8 @@ def get_partition_from_path(self, path: str) -> Optional[List[Tuple[str, str]]]:
454454
return None
455455
partition = partition_split[0]
456456
# If partition is in the form of /value1/value2/value3 we infer it from the path and assign partition_0, partition_1, partition_2 etc
457-
num = 0
458-
for partition_value in partition.split("/"):
457+
for num, partition_value in enumerate(partition.split("/")):
459458
partition_keys.append((f"partition_{num}", partition_value))
460-
num += 1
461459
return partition_keys
462460

463461
return None

metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_common.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1774,10 +1774,8 @@ def _aggregate_owners(
17741774
logger.debug(
17751775
f"Owner after applying owner extraction pattern:'{self.config.owner_extraction_pattern}' is '{owner}'."
17761776
)
1777-
if isinstance(owner, list):
1778-
owners = owner
1779-
else:
1780-
owners = [owner]
1777+
owners = owner if isinstance(owner, list) else [owner]
1778+
17811779
for owner in owners:
17821780
if self.config.strip_user_ids_from_email:
17831781
owner = owner.split("@")[0]

metadata-ingestion/src/datahub/ingestion/source/dbt/dbt_tests.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -57,15 +57,11 @@ def _get_name_for_relationship_test(kw_args: Dict[str, str]) -> Optional[str]:
5757
# base assertions are violated, bail early
5858
return None
5959
m = re.match(r"^ref\(\'(.*)\'\)$", destination_ref)
60-
if m:
61-
destination_table = m.group(1)
62-
else:
63-
destination_table = destination_ref
60+
destination_table = m.group(1) if m else destination_ref
61+
6462
m = re.search(r"ref\(\'(.*)\'\)", source_ref)
65-
if m:
66-
source_table = m.group(1)
67-
else:
68-
source_table = source_ref
63+
source_table = m.group(1) if m else source_ref
64+
6965
return f"{source_table}.{column_name} referential integrity to {destination_table}.{dest_field_name}"
7066

7167

metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_api.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -683,11 +683,7 @@ def _check_pattern_match(
683683
# Add end anchor for exact matching
684684
regex_pattern = regex_pattern + "$"
685685

686-
for path in paths:
687-
if re.match(regex_pattern, path, re.IGNORECASE):
688-
return True
689-
690-
return False
686+
return any(re.match(regex_pattern, path, re.IGNORECASE) for path in paths)
691687

692688
def should_include_container(self, path: List[str], name: str) -> bool:
693689
"""

metadata-ingestion/src/datahub/ingestion/source/dremio/dremio_aspects.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,7 @@ def get_field_type(
116116
data_type = data_type.lower()
117117
type_class = cls.FIELD_TYPE_MAPPING.get(data_type, NullTypeClass)
118118

119-
if data_size:
120-
native_data_type = f"{data_type}({data_size})"
121-
else:
122-
native_data_type = data_type
119+
native_data_type = f"{data_type}({data_size})" if data_size else data_type
123120

124121
try:
125122
schema_field_type = SchemaFieldDataTypeClass(type=type_class())

metadata-ingestion/src/datahub/ingestion/source/kafka_connect/common.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -141,12 +141,7 @@ def get_dataset_name(
141141
database_name: Optional[str],
142142
source_table: str,
143143
) -> str:
144-
if database_name:
145-
dataset_name = database_name + "." + source_table
146-
else:
147-
dataset_name = source_table
148-
149-
return dataset_name
144+
return database_name + "." + source_table if database_name else source_table
150145

151146

152147
def get_platform_instance(

metadata-ingestion/src/datahub/ingestion/source/sql/athena.py

+6-12
Original file line numberDiff line numberDiff line change
@@ -540,19 +540,13 @@ def get_schema_fields_for_column(
540540
inspector=inspector,
541541
description=column.get("comment"),
542542
nullable=column.get("nullable", True),
543-
is_part_of_key=(
544-
True
545-
if (
546-
pk_constraints is not None
547-
and isinstance(pk_constraints, dict)
548-
and column["name"] in pk_constraints.get("constrained_columns", [])
549-
)
550-
else False
543+
is_part_of_key=bool(
544+
pk_constraints is not None
545+
and isinstance(pk_constraints, dict)
546+
and column["name"] in pk_constraints.get("constrained_columns", [])
551547
),
552-
is_partitioning_key=(
553-
True
554-
if (partition_keys is not None and column["name"] in partition_keys)
555-
else False
548+
is_partitioning_key=bool(
549+
partition_keys is not None and column["name"] in partition_keys
556550
),
557551
)
558552

metadata-ingestion/src/datahub/ingestion/source/sql/hive.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -821,12 +821,8 @@ def _process_view(
821821

822822
try:
823823
view_definition = inspector.get_view_definition(view, schema)
824-
if view_definition is None:
825-
view_definition = ""
826-
else:
827-
# Some dialects return a TextClause instead of a raw string,
828-
# so we need to convert them to a string.
829-
view_definition = str(view_definition)
824+
# Some dialects return a TextClause instead of a raw string, so we need to convert them to a string.
825+
view_definition = str(view_definition) if view_definition else ""
830826
except NotImplementedError:
831827
view_definition = ""
832828

metadata-ingestion/src/datahub/ingestion/source/sql/hive_metastore.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -893,8 +893,9 @@ def get_schema_fields_for_column(
893893
return get_schema_fields_for_hive_column(
894894
column["col_name"],
895895
column["col_type"],
896+
# column is actually an sqlalchemy.engine.row.LegacyRow, not a Dict and we cannot make column.get("col_description", "")
896897
description=(
897-
column["col_description"] if "col_description" in column else ""
898+
column["col_description"] if "col_description" in column else "" # noqa: SIM401
898899
),
899900
default_nullable=True,
900901
)

metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -1031,16 +1031,10 @@ def loop_views(
10311031
def _get_view_definition(self, inspector: Inspector, schema: str, view: str) -> str:
10321032
try:
10331033
view_definition = inspector.get_view_definition(view, schema)
1034-
if view_definition is None:
1035-
view_definition = ""
1036-
else:
1037-
# Some dialects return a TextClause instead of a raw string,
1038-
# so we need to convert them to a string.
1039-
view_definition = str(view_definition)
1034+
# Some dialects return a TextClause instead of a raw string, so we need to convert them to a string.
1035+
return str(view_definition) if view_definition else ""
10401036
except NotImplementedError:
1041-
view_definition = ""
1042-
1043-
return view_definition
1037+
return ""
10441038

10451039
def _process_view(
10461040
self,

metadata-ingestion/src/datahub/ingestion/source/state/stale_entity_removal_handler.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -114,14 +114,10 @@ def __init__(
114114
self.stateful_ingestion_config: Optional[StatefulStaleMetadataRemovalConfig] = (
115115
config.stateful_ingestion
116116
)
117-
self.checkpointing_enabled: bool = (
118-
True
119-
if (
120-
self.state_provider.is_stateful_ingestion_configured()
121-
and self.stateful_ingestion_config
122-
and self.stateful_ingestion_config.remove_stale_metadata
123-
)
124-
else False
117+
self.checkpointing_enabled: bool = bool(
118+
self.state_provider.is_stateful_ingestion_configured()
119+
and self.stateful_ingestion_config
120+
and self.stateful_ingestion_config.remove_stale_metadata
125121
)
126122
self._job_id = self._init_job_id()
127123
self._urns_to_skip: Set[str] = set()

metadata-ingestion/src/datahub/ingestion/source/superset.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -431,9 +431,7 @@ def construct_dashboard_from_api_data(
431431
dashboard_data.get("owners", []),
432432
)
433433
),
434-
"IsCertified": str(
435-
True if dashboard_data.get("certified_by") else False
436-
).lower(),
434+
"IsCertified": str(bool(dashboard_data.get("certified_by"))).lower(),
437435
}
438436

439437
if dashboard_data.get("certified_by"):

metadata-ingestion/src/datahub/ingestion/source/tableau/tableau_common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -902,7 +902,7 @@ def get_unique_custom_sql(custom_sql_list: List[dict]) -> List[dict]:
902902
"name": custom_sql.get("name"),
903903
# We assume that this is unsupported custom sql if "actual tables that this query references"
904904
# are missing from api result.
905-
"isUnsupportedCustomSql": True if not custom_sql.get("tables") else False,
905+
"isUnsupportedCustomSql": not custom_sql.get("tables"),
906906
"query": custom_sql.get("query"),
907907
"connectionType": custom_sql.get("connectionType"),
908908
"columns": custom_sql.get("columns"),

metadata-ingestion/src/datahub/lite/duckdb_lite.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -760,9 +760,7 @@ def post_update_hook(
760760
entity_id=[str(data_platform_urn), data_platform_instance],
761761
)
762762
self._create_edges_from_data_platform_instance(data_platform_instance_urn)
763-
elif isinstance(aspect, ChartInfoClass) or isinstance(
764-
aspect, DashboardInfoClass
765-
):
763+
elif isinstance(aspect, (ChartInfoClass, DashboardInfoClass)):
766764
urn = Urn.from_string(entity_urn)
767765
self.add_edge(
768766
entity_urn,

metadata-ingestion/src/datahub/sdk/dataset.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,8 @@
7474
def _parse_upstream_input(
7575
upstream_input: UpstreamInputType,
7676
) -> Union[models.UpstreamClass, models.FineGrainedLineageClass]:
77-
if isinstance(upstream_input, models.UpstreamClass) or isinstance(
78-
upstream_input, models.FineGrainedLineageClass
77+
if isinstance(
78+
upstream_input, (models.UpstreamClass, models.FineGrainedLineageClass)
7979
):
8080
return upstream_input
8181
elif isinstance(upstream_input, (str, DatasetUrn)):

metadata-ingestion/src/datahub/sql_parsing/sqlglot_utils.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -56,10 +56,7 @@ def get_dialect(platform: DialectOrStr) -> sqlglot.Dialect:
5656
def is_dialect_instance(
5757
dialect: sqlglot.Dialect, platforms: Union[str, Iterable[str]]
5858
) -> bool:
59-
if isinstance(platforms, str):
60-
platforms = [platforms]
61-
else:
62-
platforms = list(platforms)
59+
platforms = [platforms] if isinstance(platforms, str) else list(platforms)
6360

6461
dialects = [get_dialect(platform) for platform in platforms]
6562

metadata-ingestion/tests/performance/bigquery/bigquery_events.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def generate_events(
9999
if config.debug_include_full_payloads
100100
else None
101101
),
102-
query_on_view=True if referencedViews else False,
102+
query_on_view=bool(referencedViews),
103103
)
104104
)
105105
table_accesses: Dict[BigQueryTableRef, Set[str]] = defaultdict(set)

0 commit comments

Comments
 (0)