Skip to content

Commit 8812f8a

Browse files
authoredMar 17, 2025··
fix(autofix): fix parent-child relationship logic (#87195)
Noticed parent-child relationships weren't getting built correctly in prod. Also noticed missing error events. Hoping these changes fix it. Also added some more tests.
1 parent 77878cb commit 8812f8a

File tree

2 files changed

+381
-59
lines changed

2 files changed

+381
-59
lines changed
 

‎src/sentry/seer/autofix.py

+101-49
Original file line numberDiff line numberDiff line change
@@ -58,21 +58,32 @@ def _get_trace_tree_for_event(event: Event | GroupEvent, project: Project) -> di
5858
).values_list("id", "slug")
5959
).keys()
6060
)
61-
event_filter = eventstore.Filter(
61+
start = event.datetime - timedelta(days=1)
62+
end = event.datetime + timedelta(days=1)
63+
transaction_event_filter = eventstore.Filter(
6264
project_ids=project_ids,
6365
conditions=[
6466
["trace_id", "=", trace_id],
6567
],
68+
start=start,
69+
end=end,
6670
)
6771
transactions = eventstore.backend.get_events(
68-
filter=event_filter,
72+
filter=transaction_event_filter,
6973
dataset=Dataset.Transactions,
7074
referrer=Referrer.API_GROUP_AI_AUTOFIX,
7175
tenant_ids={"organization_id": project.organization_id},
7276
)
77+
error_event_filter = eventstore.Filter(
78+
project_ids=project_ids,
79+
conditions=[
80+
["trace", "=", trace_id],
81+
],
82+
start=start,
83+
end=end,
84+
)
7385
errors = eventstore.backend.get_events(
74-
filter=event_filter,
75-
dataset=Dataset.Events,
86+
filter=error_event_filter,
7687
referrer=Referrer.API_GROUP_AI_AUTOFIX,
7788
tenant_ids={"organization_id": project.organization_id},
7889
)
@@ -82,10 +93,12 @@ def _get_trace_tree_for_event(event: Event | GroupEvent, project: Project) -> di
8293
return None
8394

8495
events_by_span_id: dict[str, dict] = {}
85-
children_by_parent_span_id: dict[str, list[dict]] = {}
96+
events_by_parent_span_id: dict[str, list[dict]] = {}
97+
span_to_transaction: dict[str, dict] = {} # Maps span IDs to their parent transaction events
8698
root_events: list[dict] = []
99+
all_events: list[dict] = [] # Track all events for orphan detection
87100

88-
# First pass: collect all events and their relationships
101+
# First pass: collect all events and their metadata
89102
for event in results:
90103
event_data = event.data
91104
is_transaction = event_data.get("spans") is not None
@@ -101,6 +114,9 @@ def _get_trace_tree_for_event(event: Event | GroupEvent, project: Project) -> di
101114
"children": [],
102115
}
103116

117+
# Add to all_events for later orphan detection
118+
all_events.append(event_node)
119+
104120
if is_transaction:
105121
op = event_data.get("contexts", {}).get("trace", {}).get("op")
106122
transaction_title = event.title
@@ -111,18 +127,26 @@ def _get_trace_tree_for_event(event: Event | GroupEvent, project: Project) -> di
111127
f"{duration_obj.get('value', 0)} {duration_obj.get('unit', 'millisecond')}s"
112128
)
113129
profile_id = event_data.get("contexts", {}).get("profile", {}).get("profile_id")
130+
131+
# Store all span IDs from this transaction for later relationship building
132+
spans = event_data.get("spans", [])
133+
span_ids = [span.get("span_id") for span in spans if span.get("span_id")]
134+
114135
event_node.update(
115136
{
116137
"title": f"{op} - {transaction_title}" if op else transaction_title,
117138
"platform": event.platform,
118139
"is_current_project": event.project_id == project.id,
119140
"duration": duration_str,
120141
"profile_id": profile_id,
121-
"children_span_ids": [
122-
span.get("span_id") for span in event_data.get("spans", [])
123-
],
142+
"span_ids": span_ids, # Store for later use
124143
}
125144
)
145+
146+
# Register this transaction as the parent for all its spans
147+
for span_id in span_ids:
148+
if span_id:
149+
span_to_transaction[span_id] = event_node
126150
else:
127151
event_node.update(
128152
{
@@ -135,51 +159,79 @@ def _get_trace_tree_for_event(event: Event | GroupEvent, project: Project) -> di
135159
span_id = event_node["span_id"]
136160
parent_span_id = event_node["parent_span_id"]
137161

162+
# Index events by their span_id
138163
if span_id:
139164
events_by_span_id[span_id] = event_node
140165

141-
# Add to children list of parent
166+
# Index events by their parent_span_id for easier lookup
142167
if parent_span_id:
143-
if parent_span_id not in children_by_parent_span_id:
144-
children_by_parent_span_id[parent_span_id] = []
145-
children_by_parent_span_id[parent_span_id].append(event_node)
168+
if parent_span_id not in events_by_parent_span_id:
169+
events_by_parent_span_id[parent_span_id] = []
170+
events_by_parent_span_id[parent_span_id].append(event_node)
146171
else:
147-
# This is a root node (no parent)
172+
# This is a potential root node (no parent)
148173
root_events.append(event_node)
149174

150-
# Handle case where this event's span is a parent for events we've already seen
151-
if span_id and span_id in children_by_parent_span_id:
152-
event_node["children"].extend(children_by_parent_span_id[span_id])
153-
children_by_parent_span_id[span_id] = event_node["children"]
154-
155-
# Second pass: add children from spans to parent events
156-
for span_id, event_node in events_by_span_id.items():
157-
if event_node["is_transaction"] and "children_span_ids" in event_node:
158-
for child_span_id in event_node["children_span_ids"]:
159-
# If this span ID belongs to another event, establish parent-child relationship
160-
if (
161-
child_span_id in events_by_span_id
162-
and events_by_span_id[child_span_id] not in event_node["children"]
163-
):
164-
event_node["children"].append(events_by_span_id[child_span_id])
165-
166-
# Third pass: connect any remaining children to their parents
167-
for parent_span_id, children in children_by_parent_span_id.items():
168-
if parent_span_id in events_by_span_id:
169-
parent_node = events_by_span_id[parent_span_id]
170-
for child in children:
171-
if child not in parent_node["children"]:
172-
parent_node["children"].append(child)
173-
174-
# Fourth pass: find orphaned events (events with parent_span_id but no actual parent) and add them to root_events
175-
all_event_nodes = list(events_by_span_id.values())
176-
for event_node in all_event_nodes:
177-
parent_span_id = event_node.get("parent_span_id")
178-
if (
179-
parent_span_id
180-
and parent_span_id not in events_by_span_id
181-
and event_node not in root_events
182-
):
175+
# Second pass: establish parent-child relationships based on the three rules
176+
for event_node in list(events_by_span_id.values()):
177+
span_id = event_node["span_id"]
178+
parent_span_id = event_node["parent_span_id"]
179+
180+
# Rule 1: An event whose span_id is X is a parent of an event whose parent_span_id is X
181+
if span_id and span_id in events_by_parent_span_id:
182+
for child_event in events_by_parent_span_id[span_id]:
183+
if child_event not in event_node["children"]:
184+
event_node["children"].append(child_event)
185+
# If this child was previously considered a root, remove it
186+
if child_event in root_events:
187+
root_events.remove(child_event)
188+
189+
# Handle case where this event has a parent based on parent_span_id
190+
if parent_span_id:
191+
# Rule 1 (other direction): This event's parent_span_id matches another event's span_id
192+
if parent_span_id in events_by_span_id:
193+
parent_event = events_by_span_id[parent_span_id]
194+
if event_node not in parent_event["children"]:
195+
parent_event["children"].append(event_node)
196+
# If this event was previously considered a root, remove it
197+
if event_node in root_events:
198+
root_events.remove(event_node)
199+
200+
# Rule 2: A transaction event that contains a span with span_id X is a parent
201+
# of an event whose parent_span_id is X
202+
elif parent_span_id in span_to_transaction:
203+
parent_event = span_to_transaction[parent_span_id]
204+
if event_node not in parent_event["children"]:
205+
parent_event["children"].append(event_node)
206+
# If this event was previously considered a root, remove it
207+
if event_node in root_events:
208+
root_events.remove(event_node)
209+
210+
# Rule 3: A transaction event that contains a span with span_id X is a parent
211+
# of an event whose span_id is X
212+
if span_id and span_id in span_to_transaction:
213+
parent_event = span_to_transaction[span_id]
214+
# Only establish this relationship if there's no more direct relationship
215+
# (i.e., the event doesn't already have a parent through rules 1 or 2)
216+
if event_node in root_events:
217+
if event_node not in parent_event["children"]:
218+
parent_event["children"].append(event_node)
219+
# Remove from root events since it now has a parent
220+
root_events.remove(event_node)
221+
222+
# Third pass: find orphaned events and add them to root_events
223+
# These are events with parent_span_id that don't match any span_id
224+
# and didn't get connected through any of our relationship rules
225+
for event_node in all_events:
226+
has_parent = False
227+
# Check if this event is already a child of any other event
228+
for other_event in all_events:
229+
if event_node in other_event["children"]:
230+
has_parent = True
231+
break
232+
233+
# If not a child of any event and not already in root_events, add it
234+
if not has_parent and event_node not in root_events:
183235
root_events.append(event_node)
184236

185237
# Function to recursively sort children by datetime
@@ -199,8 +251,8 @@ def sort_tree(node):
199251

200252
# Clean up temporary fields before returning
201253
def cleanup_node(node):
202-
if "children_span_ids" in node:
203-
del node["children_span_ids"]
254+
if "span_ids" in node:
255+
del node["span_ids"]
204256
for child in node["children"]:
205257
cleanup_node(child)
206258
return node

‎tests/sentry/seer/test_autofix.py

+280-10
Original file line numberDiff line numberDiff line change
@@ -268,12 +268,11 @@ def test_get_trace_tree_for_event(self):
268268
# Update to patch both Transactions and Events dataset calls
269269
with patch("sentry.eventstore.backend.get_events") as mock_get_events:
270270

271-
def side_effect(filter, dataset, **kwargs):
271+
def side_effect(filter, dataset=None, **kwargs):
272272
if dataset == Dataset.Transactions:
273273
return tx_events
274-
elif dataset == Dataset.Events:
274+
else:
275275
return error_events
276-
return []
277276

278277
mock_get_events.side_effect = side_effect
279278

@@ -336,10 +335,6 @@ def side_effect(filter, dataset, **kwargs):
336335

337336
# Verify that get_events was called twice - once for transactions and once for errors
338337
assert mock_get_events.call_count == 2
339-
# Check that the first call used the Transactions dataset
340-
assert mock_get_events.call_args_list[0][1]["dataset"] == Dataset.Transactions
341-
# Check that the second call used the Events dataset
342-
assert mock_get_events.call_args_list[1][1]["dataset"] == Dataset.Events
343338

344339
@patch("sentry.eventstore.backend.get_events")
345340
def test_get_trace_tree_empty_results(self, mock_get_events):
@@ -431,12 +426,11 @@ def test_get_trace_tree_out_of_order_processing(self, mock_get_events):
431426
parent_event.trace_id = trace_id
432427

433428
# Set up the mock to return different results for different dataset calls
434-
def side_effect(filter, dataset, **kwargs):
429+
def side_effect(filter, dataset=None, **kwargs):
435430
if dataset == Dataset.Transactions:
436431
return [parent_event] # Parent is a transaction
437-
elif dataset == Dataset.Events:
432+
else:
438433
return [child_event] # Child is an error
439-
return []
440434

441435
mock_get_events.side_effect = side_effect
442436

@@ -460,6 +454,282 @@ def side_effect(filter, dataset, **kwargs):
460454
# Verify that get_events was called twice
461455
assert mock_get_events.call_count == 2
462456

457+
@patch("sentry.eventstore.backend.get_events")
458+
def test_get_trace_tree_with_only_errors(self, mock_get_events):
459+
"""
460+
Tests that when results contain only error events (no transactions),
461+
the function still creates a valid trace tree.
462+
463+
Expected trace structure with the corrected approach:
464+
trace (1234567890abcdef1234567890abcdef)
465+
├── error1-id (10:00:00Z) "First Error" (has non-matching parent_span_id)
466+
├── error2-id (10:00:10Z) "Second Error" (has non-matching parent_span_id)
467+
│ └── error3-id (10:00:20Z) "Child Error"
468+
└── error4-id (10:00:30Z) "Orphaned Error" (has non-matching parent_span_id)
469+
470+
Note: In real-world scenarios, error events often have parent_span_ids even
471+
when their parent events aren't captured in our trace data.
472+
"""
473+
trace_id = "1234567890abcdef1234567890abcdef"
474+
test_span_id = "abcdef0123456789"
475+
event_data = load_data("python")
476+
event_data.update({"contexts": {"trace": {"trace_id": trace_id, "span_id": test_span_id}}})
477+
event = self.store_event(data=event_data, project_id=self.project.id)
478+
479+
# Create error events with parent-child relationships
480+
error1_span_id = "error1-span-id"
481+
error1 = Mock()
482+
error1.event_id = "error1-id"
483+
error1.datetime = datetime.fromisoformat("2023-01-01T10:00:00+00:00")
484+
error1.data = {
485+
"contexts": {
486+
"trace": {
487+
"trace_id": trace_id,
488+
"span_id": error1_span_id,
489+
"parent_span_id": "non-existent-parent-1", # Parent that doesn't exist in our data
490+
}
491+
},
492+
"title": "First Error",
493+
}
494+
error1.title = "First Error"
495+
error1.platform = "python"
496+
error1.project_id = self.project.id
497+
error1.trace_id = trace_id
498+
499+
error2_span_id = "error2-span-id"
500+
error2 = Mock()
501+
error2.event_id = "error2-id"
502+
error2.datetime = datetime.fromisoformat("2023-01-01T10:00:10+00:00")
503+
error2.data = {
504+
"contexts": {
505+
"trace": {
506+
"trace_id": trace_id,
507+
"span_id": error2_span_id,
508+
"parent_span_id": "non-existent-parent-2", # Parent that doesn't exist in our data
509+
}
510+
},
511+
"title": "Second Error",
512+
}
513+
error2.title = "Second Error"
514+
error2.platform = "python"
515+
error2.project_id = self.project.id
516+
error2.trace_id = trace_id
517+
518+
# This error is a child of error2
519+
error3 = Mock()
520+
error3.event_id = "error3-id"
521+
error3.datetime = datetime.fromisoformat("2023-01-01T10:00:20+00:00")
522+
error3.data = {
523+
"contexts": {
524+
"trace": {
525+
"trace_id": trace_id,
526+
"span_id": "error3-span-id",
527+
"parent_span_id": error2_span_id, # Points to error2
528+
}
529+
},
530+
"title": "Child Error",
531+
}
532+
error3.title = "Child Error"
533+
error3.platform = "python"
534+
error3.project_id = self.project.id
535+
error3.trace_id = trace_id
536+
537+
# Another "orphaned" error with a parent_span_id that doesn't point to anything
538+
error4 = Mock()
539+
error4.event_id = "error4-id"
540+
error4.datetime = datetime.fromisoformat("2023-01-01T10:00:30+00:00")
541+
error4.data = {
542+
"contexts": {
543+
"trace": {
544+
"trace_id": trace_id,
545+
"span_id": "error4-span-id",
546+
"parent_span_id": "non-existent-parent-3", # Parent that doesn't exist in our data
547+
}
548+
},
549+
"title": "Orphaned Error",
550+
}
551+
error4.title = "Orphaned Error"
552+
error4.platform = "python"
553+
error4.project_id = self.project.id
554+
error4.trace_id = trace_id
555+
556+
# Return empty transactions list but populate errors list
557+
def side_effect(filter, dataset=None, **kwargs):
558+
if dataset == Dataset.Transactions:
559+
return []
560+
else:
561+
return [error1, error2, error3, error4]
562+
563+
mock_get_events.side_effect = side_effect
564+
565+
# Call the function directly
566+
trace_tree = _get_trace_tree_for_event(event, self.project)
567+
568+
# Verify the trace tree structure
569+
assert trace_tree is not None
570+
assert trace_tree["trace_id"] == trace_id
571+
572+
# We should have three root-level errors in the result (error1, error2, error4)
573+
# In the old logic, this would be empty because all errors have parent_span_ids
574+
assert len(trace_tree["events"]) == 3
575+
576+
# Verify all the root events are in chronological order
577+
events = trace_tree["events"]
578+
assert events[0]["event_id"] == "error1-id"
579+
assert events[1]["event_id"] == "error2-id"
580+
assert events[2]["event_id"] == "error4-id"
581+
582+
# error3 should be a child of error2
583+
assert len(events[1]["children"]) == 1
584+
child = events[1]["children"][0]
585+
assert child["event_id"] == "error3-id"
586+
assert child["title"] == "Child Error"
587+
588+
# Verify get_events was called twice - once for transactions and once for errors
589+
assert mock_get_events.call_count == 2
590+
591+
@patch("sentry.eventstore.backend.get_events")
592+
def test_get_trace_tree_all_relationship_rules(self, mock_get_events):
593+
"""
594+
Tests that all three relationship rules are correctly implemented:
595+
1. An event whose span_id is X is a parent of an event whose parent_span_id is X
596+
2. A transaction event with a span with span_id X is a parent of an event whose parent_span_id is X
597+
3. A transaction event with a span with span_id X is a parent of an event whose span_id is X
598+
599+
Expected trace structure:
600+
trace (1234567890abcdef1234567890abcdef)
601+
└── root-tx-id (10:00:00Z) "Root Transaction"
602+
├── rule1-child-id (10:00:10Z) "Rule 1 Child" (parent_span_id=root-tx-span-id)
603+
├── rule2-child-id (10:00:20Z) "Rule 2 Child" (parent_span_id=tx-span-1)
604+
└── rule3-child-id (10:00:30Z) "Rule 3 Child" (span_id=tx-span-2)
605+
"""
606+
trace_id = "1234567890abcdef1234567890abcdef"
607+
test_span_id = "abcdef0123456789"
608+
event_data = load_data("python")
609+
event_data.update({"contexts": {"trace": {"trace_id": trace_id, "span_id": test_span_id}}})
610+
event = self.store_event(data=event_data, project_id=self.project.id)
611+
612+
# Root transaction with two spans
613+
root_tx_span_id = "root-tx-span-id"
614+
tx_span_1 = "tx-span-1"
615+
tx_span_2 = "tx-span-2"
616+
617+
root_tx = Mock()
618+
root_tx.event_id = "root-tx-id"
619+
root_tx.datetime = datetime.fromisoformat("2023-01-01T10:00:00+00:00")
620+
root_tx.data = {
621+
"spans": [{"span_id": tx_span_1}, {"span_id": tx_span_2}],
622+
"contexts": {
623+
"trace": {"trace_id": trace_id, "span_id": root_tx_span_id, "op": "http.server"}
624+
},
625+
"title": "Root Transaction",
626+
}
627+
root_tx.title = "Root Transaction"
628+
root_tx.platform = "python"
629+
root_tx.project_id = self.project.id
630+
root_tx.trace_id = trace_id
631+
632+
# Rule 1: Child whose parent_span_id matches another event's span_id
633+
rule1_child = Mock()
634+
rule1_child.event_id = "rule1-child-id"
635+
rule1_child.datetime = datetime.fromisoformat("2023-01-01T10:00:10+00:00")
636+
rule1_child.data = {
637+
"contexts": {
638+
"trace": {
639+
"trace_id": trace_id,
640+
"span_id": "rule1-child-span-id",
641+
"parent_span_id": root_tx_span_id, # Points to root transaction's span_id
642+
}
643+
},
644+
"title": "Rule 1 Child",
645+
}
646+
rule1_child.title = "Rule 1 Child"
647+
rule1_child.platform = "python"
648+
rule1_child.project_id = self.project.id
649+
rule1_child.trace_id = trace_id
650+
651+
# Rule 2: Child whose parent_span_id matches a span in a transaction
652+
rule2_child = Mock()
653+
rule2_child.event_id = "rule2-child-id"
654+
rule2_child.datetime = datetime.fromisoformat("2023-01-01T10:00:20+00:00")
655+
rule2_child.data = {
656+
"contexts": {
657+
"trace": {
658+
"trace_id": trace_id,
659+
"span_id": "rule2-child-span-id",
660+
"parent_span_id": tx_span_1, # Points to a span in the root transaction
661+
}
662+
},
663+
"title": "Rule 2 Child",
664+
}
665+
rule2_child.title = "Rule 2 Child"
666+
rule2_child.platform = "python"
667+
rule2_child.project_id = self.project.id
668+
rule2_child.trace_id = trace_id
669+
670+
# Rule 3: Child whose span_id matches a span in a transaction
671+
rule3_child = Mock()
672+
rule3_child.event_id = "rule3-child-id"
673+
rule3_child.datetime = datetime.fromisoformat("2023-01-01T10:00:30+00:00")
674+
rule3_child.data = {
675+
"contexts": {
676+
"trace": {
677+
"trace_id": trace_id,
678+
"span_id": tx_span_2, # Same as one of the spans in the root transaction
679+
}
680+
},
681+
"title": "Rule 3 Child",
682+
}
683+
rule3_child.title = "Rule 3 Child"
684+
rule3_child.platform = "python"
685+
rule3_child.project_id = self.project.id
686+
rule3_child.trace_id = trace_id
687+
688+
# Set up the mock to return our test events
689+
def side_effect(filter, dataset=None, **kwargs):
690+
if dataset == Dataset.Transactions:
691+
return [root_tx]
692+
else:
693+
return [rule1_child, rule2_child, rule3_child]
694+
695+
mock_get_events.side_effect = side_effect
696+
697+
# Call the function
698+
trace_tree = _get_trace_tree_for_event(event, self.project)
699+
700+
# Verify the trace tree structure
701+
assert trace_tree is not None
702+
assert trace_tree["trace_id"] == trace_id
703+
assert len(trace_tree["events"]) == 1 # One root node (the transaction)
704+
705+
# Verify root transaction
706+
root = trace_tree["events"][0]
707+
assert root["event_id"] == "root-tx-id"
708+
assert root["title"] == "http.server - Root Transaction"
709+
assert root["is_transaction"] is True
710+
assert root["is_error"] is False
711+
712+
# Root should have all three children according to the rules
713+
assert len(root["children"]) == 3
714+
715+
# Children should be in chronological order
716+
children = root["children"]
717+
718+
# First child - Rule 1
719+
assert children[0]["event_id"] == "rule1-child-id"
720+
assert children[0]["title"] == "Rule 1 Child"
721+
722+
# Second child - Rule 2
723+
assert children[1]["event_id"] == "rule2-child-id"
724+
assert children[1]["title"] == "Rule 2 Child"
725+
726+
# Third child - Rule 3
727+
assert children[2]["event_id"] == "rule3-child-id"
728+
assert children[2]["title"] == "Rule 3 Child"
729+
730+
# Verify get_events was called twice
731+
assert mock_get_events.call_count == 2
732+
463733

464734
@requires_snuba
465735
@pytest.mark.django_db

0 commit comments

Comments
 (0)
Please sign in to comment.