Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(issue-platform): Support passing fingerprints with multiple hashes when sending occurrences #87311

Merged
merged 3 commits into from
Mar 19, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/sentry/event_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2496,7 +2496,7 @@ def save_grouphash_and_group(
event: Event,
new_grouphash: str,
**group_kwargs: Any,
) -> tuple[Group, bool]:
) -> tuple[Group, bool, GroupHash]:
group = None
with transaction.atomic(router.db_for_write(GroupHash)):
group_hash, created = GroupHash.objects.get_or_create(project=project, hash=new_grouphash)
Expand All @@ -2510,7 +2510,7 @@ def save_grouphash_and_group(
# Group, we can guarantee that the Group will exist at this point and
# fetch it via GroupHash
group = Group.objects.get(grouphash__project=project, grouphash__hash=new_grouphash)
return group, created
return group, created, group_hash


@sentry_sdk.tracing.trace
Expand Down
57 changes: 42 additions & 15 deletions src/sentry/issues/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,11 @@ def process_occurrence_data(data: dict[str, Any]) -> None:
return

# Hash fingerprints to make sure they're a consistent length
data["fingerprint"] = [md5(part.encode("utf-8")).hexdigest() for part in data["fingerprint"]]
data["fingerprint"] = hash_fingerprint(data["fingerprint"])


def hash_fingerprint(fingerprint: list[str]) -> list[str]:
return [md5(part.encode("utf-8")).hexdigest() for part in fingerprint]


class IssueArgs(TypedDict):
Expand Down Expand Up @@ -172,20 +176,24 @@ def save_issue_from_occurrence(
# until after we have created a `Group`.
issue_kwargs["message"] = augment_message_with_occurrence(issue_kwargs["message"], occurrence)

# TODO: For now we will assume a single fingerprint. We can expand later if necessary.
# Note that additional fingerprints won't be used to generated additional issues, they'll be
# used to map the occurrence to a specific issue.
new_grouphash = occurrence.fingerprint[0]
existing_grouphash = (
GroupHash.objects.filter(project=project, hash=new_grouphash)
.select_related("group")
.first()
)
existing_grouphashes = {
gh.hash: gh
for gh in GroupHash.objects.filter(
project=project, hash__in=occurrence.fingerprint
).select_related("group")
}
primary_grouphash = None
for fingerprint_hash in occurrence.fingerprint:
if fingerprint_hash in existing_grouphashes:
primary_grouphash = existing_grouphashes[fingerprint_hash]
break

if not primary_grouphash:
primary_hash = occurrence.fingerprint[0]

if not existing_grouphash:
cluster_key = settings.SENTRY_ISSUE_PLATFORM_RATE_LIMITER_OPTIONS.get("cluster", "default")
client = redis.redis_clusters.get(cluster_key)
if not should_create_group(occurrence.type, client, new_grouphash, project):
if not should_create_group(occurrence.type, client, primary_hash, project):
metrics.incr("issues.issue.dropped.noise_reduction")
return None

Expand Down Expand Up @@ -213,7 +221,9 @@ def save_issue_from_occurrence(
) as metric_tags,
transaction.atomic(router.db_for_write(GroupHash)),
):
group, is_new = save_grouphash_and_group(project, event, new_grouphash, **issue_kwargs)
group, is_new, primary_grouphash = save_grouphash_and_group(
project, event, primary_hash, **issue_kwargs
)
is_regression = False
span.set_tag("save_issue_from_occurrence.outcome", "new_group")
metric_tags["save_issue_from_occurrence.outcome"] = "new_group"
Expand Down Expand Up @@ -248,10 +258,10 @@ def save_issue_from_occurrence(
except Exception:
logger.exception("Failed process assignment for occurrence")

elif existing_grouphash.group is None:
elif primary_grouphash.group is None:
return None
else:
group = existing_grouphash.group
group = primary_grouphash.group
if group.issue_category.value != occurrence.type.category:
logger.error(
"save_issue_from_occurrence.category_mismatch",
Expand All @@ -268,6 +278,23 @@ def save_issue_from_occurrence(
is_regression = _process_existing_aggregate(group, group_event, issue_kwargs, release)
group_info = GroupInfo(group=group, is_new=False, is_regression=is_regression)

additional_hashes = [f for f in occurrence.fingerprint if f != primary_grouphash.hash]
for fingerprint_hash in additional_hashes:
# Attempt to create the additional grouphash links. They shouldn't be linked to other groups, but guard against
# that
group_hash, created = GroupHash.objects.get_or_create(
project=project, hash=fingerprint_hash, defaults={"group": group_info.group}
)
if not created:
logger.warning(
"Failed to create additional grouphash for group, grouphash associated with existing group",
extra={
"new_group_id": group_info.group.id,
"hash": fingerprint_hash,
"existing_group_id": group_hash.group_id,
},
)

return group_info


Expand Down
6 changes: 3 additions & 3 deletions tests/sentry/event_manager/test_event_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3423,13 +3423,13 @@ def test(self) -> None:
perf_data = load_data("transaction-n-plus-one", timestamp=before_now(minutes=10))
event = _get_event_instance(perf_data, project_id=self.project.id)
group_hash = "some_group"
group, created = save_grouphash_and_group(self.project, event, group_hash)
group, created, _ = save_grouphash_and_group(self.project, event, group_hash)
assert created
group_2, created = save_grouphash_and_group(self.project, event, group_hash)
group_2, created, _ = save_grouphash_and_group(self.project, event, group_hash)
assert group.id == group_2.id
assert not created
assert Group.objects.filter(grouphash__hash=group_hash).count() == 1
group_3, created = save_grouphash_and_group(self.project, event, "new_hash")
group_3, created, _ = save_grouphash_and_group(self.project, event, "new_hash")
assert created
assert group_2.id != group_3.id
assert Group.objects.filter(grouphash__hash=group_hash).count() == 1
Expand Down
88 changes: 88 additions & 0 deletions tests/sentry/issues/test_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
)
from sentry.issues.ingest import (
_create_issue_kwargs,
hash_fingerprint,
materialize_metadata,
save_issue_from_occurrence,
save_issue_occurrence,
Expand All @@ -28,6 +29,7 @@
from sentry.models.group import Group
from sentry.models.groupassignee import GroupAssignee
from sentry.models.groupenvironment import GroupEnvironment
from sentry.models.grouphash import GroupHash
from sentry.models.grouprelease import GroupRelease
from sentry.models.release import Release
from sentry.models.releaseprojectenvironment import ReleaseProjectEnvironment
Expand Down Expand Up @@ -213,6 +215,21 @@ def test_new_group(self) -> None:
},
)

def test_new_group_multiple_fingerprint(self) -> None:
fingerprint = ["hi", "bye"]
occurrence = self.build_occurrence(type=ErrorGroupType.type_id, fingerprint=fingerprint)
event = self.store_event(project_id=self.project.id, data={})

group_info = save_issue_from_occurrence(occurrence, event, None)
assert group_info is not None
assert group_info.is_new
assert not group_info.is_regression

group = group_info.group
assert group.title == occurrence.issue_title
grouphashes = set(GroupHash.objects.filter(group=group).values_list("hash", flat=True))
assert set(hash_fingerprint(fingerprint)) == grouphashes

def test_existing_group(self) -> None:
event = self.store_event(data={}, project_id=self.project.id)
occurrence = self.build_occurrence(fingerprint=["some-fingerprint"])
Expand All @@ -237,6 +254,77 @@ def test_existing_group(self) -> None:
assert updated_group.times_seen == 2
assert updated_group.message == "<unlabeled event> new title new subtitle api/123"

def test_existing_group_multiple_fingerprints(self) -> None:
fingerprint = ["some-fingerprint"]
event = self.store_event(data={}, project_id=self.project.id)
occurrence = self.build_occurrence(fingerprint=fingerprint)
group_info = save_issue_from_occurrence(occurrence, event, None)
assert group_info is not None
assert group_info.is_new
grouphashes = set(
GroupHash.objects.filter(group=group_info.group).values_list("hash", flat=True)
)
assert set(hash_fingerprint(fingerprint)) == grouphashes

fingerprint = ["some-fingerprint", "another-fingerprint"]
new_event = self.store_event(data={}, project_id=self.project.id)
new_occurrence = self.build_occurrence(fingerprint=fingerprint)
with self.tasks():
updated_group_info = save_issue_from_occurrence(new_occurrence, new_event, None)
assert updated_group_info is not None
assert group_info.group.id == updated_group_info.group.id
assert not updated_group_info.is_new
assert not updated_group_info.is_regression
grouphashes = set(
GroupHash.objects.filter(group=group_info.group).values_list("hash", flat=True)
)
assert set(hash_fingerprint(fingerprint)) == grouphashes

def test_existing_group_multiple_fingerprints_overlap(self) -> None:
fingerprint = ["some-fingerprint"]
group_info = save_issue_from_occurrence(
self.build_occurrence(fingerprint=fingerprint),
self.store_event(data={}, project_id=self.project.id),
None,
)
assert group_info is not None
assert group_info.is_new
grouphashes = set(
GroupHash.objects.filter(group=group_info.group).values_list("hash", flat=True)
)
assert set(hash_fingerprint(fingerprint)) == grouphashes
other_fingerprint = ["another-fingerprint"]
other_group_info = save_issue_from_occurrence(
self.build_occurrence(fingerprint=other_fingerprint),
self.store_event(data={}, project_id=self.project.id),
None,
)
assert other_group_info is not None
assert other_group_info.is_new
grouphashes = set(
GroupHash.objects.filter(group=other_group_info.group).values_list("hash", flat=True)
)
assert set(hash_fingerprint(other_fingerprint)) == grouphashes

# Should process the in order, and not join an already used fingerprint
overlapping_fingerprint = ["another-fingerprint", "some-fingerprint"]
new_event = self.store_event(data={}, project_id=self.project.id)
new_occurrence = self.build_occurrence(fingerprint=overlapping_fingerprint)
with self.tasks():
overlapping_group_info = save_issue_from_occurrence(new_occurrence, new_event, None)
assert overlapping_group_info is not None
assert other_group_info.group.id == overlapping_group_info.group.id
assert not overlapping_group_info.is_new
assert not overlapping_group_info.is_regression
grouphashes = set(
GroupHash.objects.filter(group=group_info.group).values_list("hash", flat=True)
)
assert set(hash_fingerprint(fingerprint)) == grouphashes
other_grouphashes = set(
GroupHash.objects.filter(group=other_group_info.group).values_list("hash", flat=True)
)
assert set(hash_fingerprint(other_fingerprint)) == other_grouphashes

def test_existing_group_different_category(self) -> None:
event = self.store_event(data={}, project_id=self.project.id)
occurrence = self.build_occurrence(fingerprint=["some-fingerprint"])
Expand Down
Loading