Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(autofix): Add more fields in trace payload #87260

Merged
merged 5 commits into from
Mar 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 94 additions & 3 deletions src/sentry/seer/autofix.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,77 @@
TIMEOUT_SECONDS = 60 * 30 # 30 minutes


def build_spans_tree(spans_data: list[dict]) -> list[dict]:
"""
Builds a hierarchical tree structure from a flat list of spans.

Handles multiple potential roots and preserves parent-child relationships.
A span is considered a root if:
1. It has no parent_span_id, or
2. Its parent_span_id doesn't match any span_id in the provided data

Each node in the tree contains the span data and a list of children.
The tree is sorted by duration (longest spans first) at each level.
"""
# Maps for quick lookup
spans_by_id: dict[str, dict] = {}
children_by_parent_id: dict[str, list[dict]] = {}
root_spans: list[dict] = []

# First pass: organize spans by ID and parent_id
for span in spans_data:
span_id = span.get("span_id")
if not span_id:
continue

# Deep copy the span to avoid modifying the original
span_with_children = span.copy()
span_with_children["children"] = []
spans_by_id[span_id] = span_with_children

parent_id = span.get("parent_span_id")
if parent_id:
if parent_id not in children_by_parent_id:
children_by_parent_id[parent_id] = []
children_by_parent_id[parent_id].append(span_with_children)

# Second pass: identify root spans
# A root span is either:
# 1. A span without a parent_span_id
# 2. A span whose parent_span_id doesn't match any span_id in our data
for span_id, span in spans_by_id.items():
parent_id = span.get("parent_span_id")
if not parent_id or parent_id not in spans_by_id:
root_spans.append(span)

# Third pass: build the tree by connecting children to parents
for parent_id, children in children_by_parent_id.items():
if parent_id in spans_by_id:
parent = spans_by_id[parent_id]
for child in children:
# Only add if not already a child
if child not in parent["children"]:
parent["children"].append(child)

# Function to sort children in each node by duration
def sort_span_tree(node):
if node["children"]:
# Sort children by duration (in descending order to show longest spans first)
node["children"].sort(
key=lambda x: float(x.get("duration", "0").split("s")[0]), reverse=True
)
# Recursively sort each child's children
for child in node["children"]:
sort_span_tree(child)
del node["parent_span_id"]
return node

# Sort the root spans by duration
root_spans.sort(key=lambda x: float(x.get("duration", "0").split("s")[0]), reverse=True)
# Apply sorting to the whole tree
return [sort_span_tree(root) for root in root_spans]


def _get_serialized_event(
event_id: str, group: Group, user: User | RpcUser | AnonymousUser
) -> tuple[dict[str, Any] | None, Event | GroupEvent | None]:
Expand Down Expand Up @@ -65,6 +136,7 @@ def _get_trace_tree_for_event(event: Event | GroupEvent, project: Project) -> di
conditions=[
["trace_id", "=", trace_id],
],
organization_id=project.organization_id,
start=start,
end=end,
)
Expand All @@ -79,6 +151,7 @@ def _get_trace_tree_for_event(event: Event | GroupEvent, project: Project) -> di
conditions=[
["trace", "=", trace_id],
],
organization_id=project.organization_id,
start=start,
end=end,
)
Expand Down Expand Up @@ -111,6 +184,9 @@ def _get_trace_tree_for_event(event: Event | GroupEvent, project: Project) -> di
"parent_span_id": event_data.get("contexts", {}).get("trace", {}).get("parent_span_id"),
"is_transaction": is_transaction,
"is_error": is_error,
"is_current_project": event.project_id == project.id,
"project_slug": event.project.slug,
"project_id": event.project_id,
"children": [],
}

Expand All @@ -132,14 +208,26 @@ def _get_trace_tree_for_event(event: Event | GroupEvent, project: Project) -> di
spans = event_data.get("spans", [])
span_ids = [span.get("span_id") for span in spans if span.get("span_id")]

spans_selected_data = [
{
"span_id": span.get("span_id"),
"parent_span_id": span.get("parent_span_id"),
"title": f"{span.get('op', '')} - {span.get('description', '')}",
"data": span.get("data"),
"duration": f"{span.get('timestamp', 0) - span.get('start_timestamp', 0)}s",
}
for span in spans
]
selected_spans_tree = build_spans_tree(spans_selected_data)

event_node.update(
{
"title": f"{op} - {transaction_title}" if op else transaction_title,
"platform": event.platform,
"is_current_project": event.project_id == project.id,
"duration": duration_str,
"profile_id": profile_id,
"span_ids": span_ids, # Store for later use
"spans": selected_spans_tree,
}
)

Expand All @@ -162,7 +250,6 @@ def _get_trace_tree_for_event(event: Event | GroupEvent, project: Project) -> di
{
"title": error_title,
"platform": event.platform,
"is_current_project": event.project_id == project.id,
}
)

Expand Down Expand Up @@ -269,7 +356,11 @@ def cleanup_node(node):

cleaned_tree = [cleanup_node(root) for root in sorted_tree]

return {"trace_id": event.trace_id, "events": cleaned_tree}
return {
"trace_id": event.trace_id,
"org_id": project.organization_id,
"events": cleaned_tree,
}


def _get_profile_from_trace_tree(
Expand Down
196 changes: 196 additions & 0 deletions tests/sentry/seer/test_autofix.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
_get_profile_from_trace_tree,
_get_trace_tree_for_event,
_respond_with_error,
build_spans_tree,
trigger_autofix,
)
from sentry.snuba.dataset import Dataset
Expand Down Expand Up @@ -1147,3 +1148,198 @@ def test_respond_with_error(self):

assert response.status_code == 400
assert response.data["detail"] == "Test error message"


class TestBuildSpansTree(TestCase):
def test_build_spans_tree_basic(self):
"""Test that a simple list of spans is correctly converted to a tree."""
spans_data: list[dict] = [
{
"span_id": "root-span",
"parent_span_id": None,
"title": "Root Span",
"duration": "10.0s",
},
{
"span_id": "child1",
"parent_span_id": "root-span",
"title": "Child 1",
"duration": "5.0s",
},
{
"span_id": "child2",
"parent_span_id": "root-span",
"title": "Child 2",
"duration": "3.0s",
},
{
"span_id": "grandchild",
"parent_span_id": "child1",
"title": "Grandchild",
"duration": "2.0s",
},
]

tree = build_spans_tree(spans_data)

# Should have one root
assert len(tree) == 1
root = tree[0]
assert root["span_id"] == "root-span"
assert root["title"] == "Root Span"

# Root should have two children, sorted by duration (child1 first)
assert len(root["children"]) == 2
assert root["children"][0]["span_id"] == "child1"
assert root["children"][1]["span_id"] == "child2"

# Child1 should have one child
assert len(root["children"][0]["children"]) == 1
grandchild = root["children"][0]["children"][0]
assert grandchild["span_id"] == "grandchild"

def test_build_spans_tree_multiple_roots(self):
"""Test that spans with multiple roots are correctly handled."""
spans_data: list[dict] = [
{
"span_id": "root1",
"parent_span_id": None,
"title": "Root 1",
"duration": "10.0s",
},
{
"span_id": "root2",
"parent_span_id": None,
"title": "Root 2",
"duration": "15.0s",
},
{
"span_id": "child1",
"parent_span_id": "root1",
"title": "Child of Root 1",
"duration": "5.0s",
},
{
"span_id": "child2",
"parent_span_id": "root2",
"title": "Child of Root 2",
"duration": "7.0s",
},
]

tree = build_spans_tree(spans_data)

# Should have two roots, sorted by duration (root2 first)
assert len(tree) == 2
assert tree[0]["span_id"] == "root2"
assert tree[1]["span_id"] == "root1"

# Each root should have one child
assert len(tree[0]["children"]) == 1
assert tree[0]["children"][0]["span_id"] == "child2"

assert len(tree[1]["children"]) == 1
assert tree[1]["children"][0]["span_id"] == "child1"

def test_build_spans_tree_orphaned_parent(self):
"""Test that spans with parent_span_id not in the data are treated as roots."""
spans_data: list[dict] = [
{
"span_id": "span1",
"parent_span_id": "non-existent-parent",
"title": "Orphaned Span",
"duration": "10.0s",
},
{
"span_id": "span2",
"parent_span_id": "span1",
"title": "Child of Orphaned Span",
"duration": "5.0s",
},
]

tree = build_spans_tree(spans_data)

# span1 should be treated as a root even though it has a parent_span_id
assert len(tree) == 1
assert tree[0]["span_id"] == "span1"

# span2 should be a child of span1
assert len(tree[0]["children"]) == 1
assert tree[0]["children"][0]["span_id"] == "span2"

def test_build_spans_tree_empty_input(self):
"""Test handling of empty input."""
assert build_spans_tree([]) == []

def test_build_spans_tree_missing_span_ids(self):
"""Test that spans without span_ids are ignored."""
spans_data: list[dict] = [
{
"span_id": "valid-span",
"parent_span_id": None,
"title": "Valid Span",
"duration": "10.0s",
},
{
"span_id": None, # Missing span_id
"parent_span_id": "valid-span",
"title": "Invalid Span",
"duration": "5.0s",
},
{
# No span_id key
"parent_span_id": "valid-span",
"title": "Another Invalid Span",
"duration": "3.0s",
},
]

tree = build_spans_tree(spans_data)

# Only the valid span should be in the tree
assert len(tree) == 1
assert tree[0]["span_id"] == "valid-span"
# No children since the other spans had invalid/missing span_ids
assert len(tree[0]["children"]) == 0

def test_build_spans_tree_duration_sorting(self):
"""Test that spans are correctly sorted by duration."""
spans_data: list[dict] = [
{
"span_id": "root",
"parent_span_id": None,
"title": "Root Span",
"duration": "10.0s",
},
{
"span_id": "fast-child",
"parent_span_id": "root",
"title": "Fast Child",
"duration": "1.0s",
},
{
"span_id": "medium-child",
"parent_span_id": "root",
"title": "Medium Child",
"duration": "5.0s",
},
{
"span_id": "slow-child",
"parent_span_id": "root",
"title": "Slow Child",
"duration": "9.0s",
},
]

tree = build_spans_tree(spans_data)

# Should have one root
assert len(tree) == 1
root = tree[0]

# Root should have three children, sorted by duration (slow-child first)
assert len(root["children"]) == 3
assert root["children"][0]["span_id"] == "slow-child"
assert root["children"][1]["span_id"] == "medium-child"
assert root["children"][2]["span_id"] == "fast-child"
Loading