Skip to content

Commit 1e0f993

Browse files
authored
fix(ingest/azure-ad): limit the size of the ingestion report (#12498)
1 parent 317b740 commit 1e0f993

File tree

1 file changed

+6
-14
lines changed
  • metadata-ingestion/src/datahub/ingestion/source/identity

1 file changed

+6
-14
lines changed

metadata-ingestion/src/datahub/ingestion/source/identity/azure_ad.py

+6-14
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from datahub.configuration.common import AllowDenyPattern
1515
from datahub.configuration.source_common import DatasetSourceConfigMixin
16+
from datahub.configuration.validate_field_removal import pydantic_removed_field
1617
from datahub.emitter.mce_builder import make_group_urn, make_user_urn
1718
from datahub.emitter.mcp import MetadataChangeProposalWrapper
1819
from datahub.ingestion.api.common import PipelineContext
@@ -51,6 +52,7 @@
5152
OriginTypeClass,
5253
StatusClass,
5354
)
55+
from datahub.utilities.lossy_collections import LossyList
5456

5557
logger = logging.getLogger(__name__)
5658

@@ -132,11 +134,7 @@ class AzureADConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin):
132134
description="regex patterns for groups to include in ingestion.",
133135
)
134136

135-
# If enabled, report will contain names of filtered users and groups.
136-
filtered_tracking: bool = Field(
137-
default=True,
138-
description="If enabled, report will contain names of filtered users and groups.",
139-
)
137+
_remove_filtered_tracking = pydantic_removed_field("filtered_tracking")
140138

141139
# Optional: Whether to mask sensitive information from workunit ID's. On by default.
142140
mask_group_id: bool = Field(
@@ -156,14 +154,10 @@ class AzureADConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin):
156154

157155
@dataclass
158156
class AzureADSourceReport(StaleEntityRemovalSourceReport):
159-
filtered: List[str] = field(default_factory=list)
160-
filtered_tracking: bool = field(default=True, repr=False)
161-
filtered_count: int = field(default=0)
157+
filtered: LossyList[str] = field(default_factory=LossyList)
162158

163159
def report_filtered(self, name: str) -> None:
164-
self.filtered_count += 1
165-
if self.filtered_tracking:
166-
self.filtered.append(name)
160+
self.filtered.append(name)
167161

168162

169163
# Source that extracts Azure AD users, groups and group memberships using Microsoft Graph REST API
@@ -266,9 +260,7 @@ def create(cls, config_dict, ctx):
266260
def __init__(self, config: AzureADConfig, ctx: PipelineContext):
267261
super().__init__(config, ctx)
268262
self.config = config
269-
self.report = AzureADSourceReport(
270-
filtered_tracking=self.config.filtered_tracking
271-
)
263+
self.report = AzureADSourceReport()
272264
session = requests.Session()
273265
retries = Retry(
274266
total=5, backoff_factor=1, status_forcelist=[429, 500, 502, 503, 504]

0 commit comments

Comments
 (0)