Skip to content

Commit 476f26f

Browse files
authored
feat(issue-platform): Support passing fingerprints with multiple hashes when sending occurrences (#87311)
This introduces handling for fingerprints with multiple components to the issue platform. This works in the same way that events do - we try to fetch grouphashes for each component, where earlier hashes have priority. If none exist, create a new group and associated both hashes with that group. If there are multiple groups associated with the hashes, then associate this occurrence to the first group found, and don't change the hash association. This shouldn't happen in the issue platform in general, but worth being cautious. I will follow up with another pr to handle this for status changes as well. <!-- Describe your PR here. -->
1 parent 014eee9 commit 476f26f

File tree

4 files changed

+135
-20
lines changed

4 files changed

+135
-20
lines changed

src/sentry/event_manager.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2496,7 +2496,7 @@ def save_grouphash_and_group(
24962496
event: Event,
24972497
new_grouphash: str,
24982498
**group_kwargs: Any,
2499-
) -> tuple[Group, bool]:
2499+
) -> tuple[Group, bool, GroupHash]:
25002500
group = None
25012501
with transaction.atomic(router.db_for_write(GroupHash)):
25022502
group_hash, created = GroupHash.objects.get_or_create(project=project, hash=new_grouphash)
@@ -2510,7 +2510,7 @@ def save_grouphash_and_group(
25102510
# Group, we can guarantee that the Group will exist at this point and
25112511
# fetch it via GroupHash
25122512
group = Group.objects.get(grouphash__project=project, grouphash__hash=new_grouphash)
2513-
return group, created
2513+
return group, created, group_hash
25142514

25152515

25162516
@sentry_sdk.tracing.trace

src/sentry/issues/ingest.py

+42-15
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,11 @@ def process_occurrence_data(data: dict[str, Any]) -> None:
7474
return
7575

7676
# Hash fingerprints to make sure they're a consistent length
77-
data["fingerprint"] = [md5(part.encode("utf-8")).hexdigest() for part in data["fingerprint"]]
77+
data["fingerprint"] = hash_fingerprint(data["fingerprint"])
78+
79+
80+
def hash_fingerprint(fingerprint: list[str]) -> list[str]:
81+
return [md5(part.encode("utf-8")).hexdigest() for part in fingerprint]
7882

7983

8084
class IssueArgs(TypedDict):
@@ -172,20 +176,24 @@ def save_issue_from_occurrence(
172176
# until after we have created a `Group`.
173177
issue_kwargs["message"] = augment_message_with_occurrence(issue_kwargs["message"], occurrence)
174178

175-
# TODO: For now we will assume a single fingerprint. We can expand later if necessary.
176-
# Note that additional fingerprints won't be used to generated additional issues, they'll be
177-
# used to map the occurrence to a specific issue.
178-
new_grouphash = occurrence.fingerprint[0]
179-
existing_grouphash = (
180-
GroupHash.objects.filter(project=project, hash=new_grouphash)
181-
.select_related("group")
182-
.first()
183-
)
179+
existing_grouphashes = {
180+
gh.hash: gh
181+
for gh in GroupHash.objects.filter(
182+
project=project, hash__in=occurrence.fingerprint
183+
).select_related("group")
184+
}
185+
primary_grouphash = None
186+
for fingerprint_hash in occurrence.fingerprint:
187+
if fingerprint_hash in existing_grouphashes:
188+
primary_grouphash = existing_grouphashes[fingerprint_hash]
189+
break
190+
191+
if not primary_grouphash:
192+
primary_hash = occurrence.fingerprint[0]
184193

185-
if not existing_grouphash:
186194
cluster_key = settings.SENTRY_ISSUE_PLATFORM_RATE_LIMITER_OPTIONS.get("cluster", "default")
187195
client = redis.redis_clusters.get(cluster_key)
188-
if not should_create_group(occurrence.type, client, new_grouphash, project):
196+
if not should_create_group(occurrence.type, client, primary_hash, project):
189197
metrics.incr("issues.issue.dropped.noise_reduction")
190198
return None
191199

@@ -213,7 +221,9 @@ def save_issue_from_occurrence(
213221
) as metric_tags,
214222
transaction.atomic(router.db_for_write(GroupHash)),
215223
):
216-
group, is_new = save_grouphash_and_group(project, event, new_grouphash, **issue_kwargs)
224+
group, is_new, primary_grouphash = save_grouphash_and_group(
225+
project, event, primary_hash, **issue_kwargs
226+
)
217227
is_regression = False
218228
span.set_tag("save_issue_from_occurrence.outcome", "new_group")
219229
metric_tags["save_issue_from_occurrence.outcome"] = "new_group"
@@ -248,10 +258,10 @@ def save_issue_from_occurrence(
248258
except Exception:
249259
logger.exception("Failed process assignment for occurrence")
250260

251-
elif existing_grouphash.group is None:
261+
elif primary_grouphash.group is None:
252262
return None
253263
else:
254-
group = existing_grouphash.group
264+
group = primary_grouphash.group
255265
if group.issue_category.value != occurrence.type.category:
256266
logger.error(
257267
"save_issue_from_occurrence.category_mismatch",
@@ -268,6 +278,23 @@ def save_issue_from_occurrence(
268278
is_regression = _process_existing_aggregate(group, group_event, issue_kwargs, release)
269279
group_info = GroupInfo(group=group, is_new=False, is_regression=is_regression)
270280

281+
additional_hashes = [f for f in occurrence.fingerprint if f != primary_grouphash.hash]
282+
for fingerprint_hash in additional_hashes:
283+
# Attempt to create the additional grouphash links. They shouldn't be linked to other groups, but guard against
284+
# that
285+
group_hash, created = GroupHash.objects.get_or_create(
286+
project=project, hash=fingerprint_hash, defaults={"group": group_info.group}
287+
)
288+
if not created:
289+
logger.warning(
290+
"Failed to create additional grouphash for group, grouphash associated with existing group",
291+
extra={
292+
"new_group_id": group_info.group.id,
293+
"hash": fingerprint_hash,
294+
"existing_group_id": group_hash.group_id,
295+
},
296+
)
297+
271298
return group_info
272299

273300

tests/sentry/event_manager/test_event_manager.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -3423,13 +3423,13 @@ def test(self) -> None:
34233423
perf_data = load_data("transaction-n-plus-one", timestamp=before_now(minutes=10))
34243424
event = _get_event_instance(perf_data, project_id=self.project.id)
34253425
group_hash = "some_group"
3426-
group, created = save_grouphash_and_group(self.project, event, group_hash)
3426+
group, created, _ = save_grouphash_and_group(self.project, event, group_hash)
34273427
assert created
3428-
group_2, created = save_grouphash_and_group(self.project, event, group_hash)
3428+
group_2, created, _ = save_grouphash_and_group(self.project, event, group_hash)
34293429
assert group.id == group_2.id
34303430
assert not created
34313431
assert Group.objects.filter(grouphash__hash=group_hash).count() == 1
3432-
group_3, created = save_grouphash_and_group(self.project, event, "new_hash")
3432+
group_3, created, _ = save_grouphash_and_group(self.project, event, "new_hash")
34333433
assert created
34343434
assert group_2.id != group_3.id
34353435
assert Group.objects.filter(grouphash__hash=group_hash).count() == 1

tests/sentry/issues/test_ingest.py

+88
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
)
2020
from sentry.issues.ingest import (
2121
_create_issue_kwargs,
22+
hash_fingerprint,
2223
materialize_metadata,
2324
save_issue_from_occurrence,
2425
save_issue_occurrence,
@@ -28,6 +29,7 @@
2829
from sentry.models.group import Group
2930
from sentry.models.groupassignee import GroupAssignee
3031
from sentry.models.groupenvironment import GroupEnvironment
32+
from sentry.models.grouphash import GroupHash
3133
from sentry.models.grouprelease import GroupRelease
3234
from sentry.models.release import Release
3335
from sentry.models.releaseprojectenvironment import ReleaseProjectEnvironment
@@ -213,6 +215,21 @@ def test_new_group(self) -> None:
213215
},
214216
)
215217

218+
def test_new_group_multiple_fingerprint(self) -> None:
219+
fingerprint = ["hi", "bye"]
220+
occurrence = self.build_occurrence(type=ErrorGroupType.type_id, fingerprint=fingerprint)
221+
event = self.store_event(project_id=self.project.id, data={})
222+
223+
group_info = save_issue_from_occurrence(occurrence, event, None)
224+
assert group_info is not None
225+
assert group_info.is_new
226+
assert not group_info.is_regression
227+
228+
group = group_info.group
229+
assert group.title == occurrence.issue_title
230+
grouphashes = set(GroupHash.objects.filter(group=group).values_list("hash", flat=True))
231+
assert set(hash_fingerprint(fingerprint)) == grouphashes
232+
216233
def test_existing_group(self) -> None:
217234
event = self.store_event(data={}, project_id=self.project.id)
218235
occurrence = self.build_occurrence(fingerprint=["some-fingerprint"])
@@ -237,6 +254,77 @@ def test_existing_group(self) -> None:
237254
assert updated_group.times_seen == 2
238255
assert updated_group.message == "<unlabeled event> new title new subtitle api/123"
239256

257+
def test_existing_group_multiple_fingerprints(self) -> None:
258+
fingerprint = ["some-fingerprint"]
259+
event = self.store_event(data={}, project_id=self.project.id)
260+
occurrence = self.build_occurrence(fingerprint=fingerprint)
261+
group_info = save_issue_from_occurrence(occurrence, event, None)
262+
assert group_info is not None
263+
assert group_info.is_new
264+
grouphashes = set(
265+
GroupHash.objects.filter(group=group_info.group).values_list("hash", flat=True)
266+
)
267+
assert set(hash_fingerprint(fingerprint)) == grouphashes
268+
269+
fingerprint = ["some-fingerprint", "another-fingerprint"]
270+
new_event = self.store_event(data={}, project_id=self.project.id)
271+
new_occurrence = self.build_occurrence(fingerprint=fingerprint)
272+
with self.tasks():
273+
updated_group_info = save_issue_from_occurrence(new_occurrence, new_event, None)
274+
assert updated_group_info is not None
275+
assert group_info.group.id == updated_group_info.group.id
276+
assert not updated_group_info.is_new
277+
assert not updated_group_info.is_regression
278+
grouphashes = set(
279+
GroupHash.objects.filter(group=group_info.group).values_list("hash", flat=True)
280+
)
281+
assert set(hash_fingerprint(fingerprint)) == grouphashes
282+
283+
def test_existing_group_multiple_fingerprints_overlap(self) -> None:
284+
fingerprint = ["some-fingerprint"]
285+
group_info = save_issue_from_occurrence(
286+
self.build_occurrence(fingerprint=fingerprint),
287+
self.store_event(data={}, project_id=self.project.id),
288+
None,
289+
)
290+
assert group_info is not None
291+
assert group_info.is_new
292+
grouphashes = set(
293+
GroupHash.objects.filter(group=group_info.group).values_list("hash", flat=True)
294+
)
295+
assert set(hash_fingerprint(fingerprint)) == grouphashes
296+
other_fingerprint = ["another-fingerprint"]
297+
other_group_info = save_issue_from_occurrence(
298+
self.build_occurrence(fingerprint=other_fingerprint),
299+
self.store_event(data={}, project_id=self.project.id),
300+
None,
301+
)
302+
assert other_group_info is not None
303+
assert other_group_info.is_new
304+
grouphashes = set(
305+
GroupHash.objects.filter(group=other_group_info.group).values_list("hash", flat=True)
306+
)
307+
assert set(hash_fingerprint(other_fingerprint)) == grouphashes
308+
309+
# Should process the in order, and not join an already used fingerprint
310+
overlapping_fingerprint = ["another-fingerprint", "some-fingerprint"]
311+
new_event = self.store_event(data={}, project_id=self.project.id)
312+
new_occurrence = self.build_occurrence(fingerprint=overlapping_fingerprint)
313+
with self.tasks():
314+
overlapping_group_info = save_issue_from_occurrence(new_occurrence, new_event, None)
315+
assert overlapping_group_info is not None
316+
assert other_group_info.group.id == overlapping_group_info.group.id
317+
assert not overlapping_group_info.is_new
318+
assert not overlapping_group_info.is_regression
319+
grouphashes = set(
320+
GroupHash.objects.filter(group=group_info.group).values_list("hash", flat=True)
321+
)
322+
assert set(hash_fingerprint(fingerprint)) == grouphashes
323+
other_grouphashes = set(
324+
GroupHash.objects.filter(group=other_group_info.group).values_list("hash", flat=True)
325+
)
326+
assert set(hash_fingerprint(other_fingerprint)) == other_grouphashes
327+
240328
def test_existing_group_different_category(self) -> None:
241329
event = self.store_event(data={}, project_id=self.project.id)
242330
occurrence = self.build_occurrence(fingerprint=["some-fingerprint"])

0 commit comments

Comments
 (0)