Skip to content

Commit a06172d

Browse files
authored
feat(spans): Implement segment enrichment (#87273)
Ports the relevant bits from Relay's transaction-based normalization logic into the segment consumer. Note that we intentionally keep all normalization within spans in Relay. That is, we assume that Relay will write normalized data attributes into spans and the segment consumer merely has to move them to child spans and/or infer new attributes across the span hierarchy. To keep things simple, we pull normalized attributes from `sentry_tags`. In a future refactor, this logic may be moved out from Relay, too, in which case we will have to access `span.data` directly. The final remaining function, `set_span_exclusive_time`, is going to be added in a follow-up.
1 parent a5f221d commit a06172d

File tree

2 files changed

+124
-19
lines changed

2 files changed

+124
-19
lines changed

src/sentry/spans/consumers/process_segments/message.py

+92-5
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,37 @@
3232
logger = logging.getLogger(__name__)
3333

3434

35+
# Keys in `sentry_tags` that are shared across all spans in a segment. This list
36+
# is taken from `extract_shared_tags` in Relay.
37+
SHARED_TAG_KEYS = (
38+
"release",
39+
"user",
40+
"user.id",
41+
"user.ip",
42+
"user.username",
43+
"user.email",
44+
"user.geo.country_code",
45+
"user.geo.subregion",
46+
"environment",
47+
"transaction",
48+
"transaction.method",
49+
"transaction.op",
50+
"trace.status",
51+
"mobile",
52+
"os.name",
53+
"device.class",
54+
"browser.name",
55+
"profiler_id",
56+
"sdk.name",
57+
"sdk.version",
58+
"platform",
59+
"thread.id",
60+
"thread.name",
61+
)
62+
63+
MOBILE_MAIN_THREAD_NAME = "main"
64+
65+
3566
class Span(SchemaSpan, total=False):
3667
start_timestamp_precise: float # Missing in schema
3768
end_timestamp_precise: float # Missing in schema
@@ -41,15 +72,14 @@ class Span(SchemaSpan, total=False):
4172

4273
def process_segment(spans: list[Span]) -> list[Span]:
4374
segment_span = _find_segment_span(spans)
75+
_enrich_spans(segment_span, spans)
76+
4477
if segment_span is None:
45-
# TODO: Handle segments without a defined segment span once all
46-
# functions are refactored to a span interface.
4778
return spans
4879

4980
with metrics.timer("spans.consumers.process_segments.get_project"):
5081
project = Project.objects.get_from_cache(id=segment_span["project_id"])
5182

52-
_enrich_spans(segment_span, spans)
5383
_create_models(segment_span, project)
5484
_detect_performance_problems(segment_span, spans, project)
5585
_record_signals(segment_span, spans, project)
@@ -85,16 +115,73 @@ def _find_segment_span(spans: list[Span]) -> Span | None:
85115
@metrics.wraps("spans.consumers.process_segments.enrich_spans")
86116
def _enrich_spans(segment: Span | None, spans: list[Span]) -> None:
87117
for span in spans:
88-
span["op"] = span.get("sentry_tags", {}).get("op") or DEFAULT_SPAN_OP
118+
# TODO: TEST THAT THIS RUNS WITHOUT A SEGMENT SPAN!
119+
sentry_tags = span.setdefault("sentry_tags", {})
120+
span["op"] = sentry_tags.get("op") or DEFAULT_SPAN_OP
121+
# TODO: port set_span_exclusive_time
89122

90-
# TODO: Add Relay's enrichment here.
123+
if segment:
124+
_set_shared_tags(segment, spans)
91125

92126
# Calculate grouping hashes for performance issue detection
93127
config = load_span_grouping_config()
94128
groupings = config.execute_strategy_standalone(spans)
95129
groupings.write_to_spans(spans)
96130

97131

132+
def _set_shared_tags(segment: Span, spans: list[Span]) -> None:
133+
# Assume that Relay has extracted the shared tags into `sentry_tags` on the
134+
# root span. Once `sentry_tags` is removed, the logic from
135+
# `extract_shared_tags` should be moved here.
136+
segment_tags = segment.get("sentry_tags", {})
137+
shared_tags = {k: v for k, v in segment_tags.items() if k in SHARED_TAG_KEYS}
138+
139+
is_mobile = segment_tags.get("mobile") == "true"
140+
mobile_start_type = _get_mobile_start_type(segment)
141+
ttid_ts = _timestamp_by_op(spans, "ui.load.initial_display")
142+
ttfd_ts = _timestamp_by_op(spans, "ui.load.full_display")
143+
144+
for span in spans:
145+
span_tags = cast(dict[str, Any], span["sentry_tags"])
146+
147+
if is_mobile:
148+
if span_tags.get("thread.name") == MOBILE_MAIN_THREAD_NAME:
149+
span_tags["main_thread"] = "true"
150+
if not span_tags.get("app_start_type") and mobile_start_type:
151+
span_tags["app_start_type"] = mobile_start_type
152+
153+
if ttid_ts is not None and span["end_timestamp_precise"] <= ttid_ts:
154+
span_tags["ttid"] = "ttid"
155+
if ttfd_ts is not None and span["end_timestamp_precise"] <= ttfd_ts:
156+
span_tags["ttfd"] = "ttfd"
157+
158+
for key, value in shared_tags.items():
159+
if span_tags.get(key) is None:
160+
span_tags[key] = value
161+
162+
163+
def _get_mobile_start_type(segment: Span) -> str | None:
164+
"""
165+
Check the measurements on the span to determine what kind of start type the
166+
event is.
167+
"""
168+
measurements = segment.get("measurements") or {}
169+
170+
if "app_start_cold" in measurements:
171+
return "cold"
172+
if "app_start_warm" in measurements:
173+
return "warm"
174+
175+
return None
176+
177+
178+
def _timestamp_by_op(spans: list[Span], op: str) -> float | None:
179+
for span in spans:
180+
if span["op"] == op:
181+
return span["end_timestamp_precise"]
182+
return None
183+
184+
98185
@metrics.wraps("spans.consumers.process_segments.create_models")
99186
def _create_models(segment: Span, project: Project) -> None:
100187
"""

tests/sentry/spans/consumers/process_segments/test_message.py

+32-14
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ def generate_basic_spans(self):
2929
start_timestamp_precise=1707953018.867,
3030
)
3131

32+
del child_span["sentry_tags"]["transaction"]
33+
del child_span["sentry_tags"]["transaction.method"]
34+
del child_span["sentry_tags"]["transaction.op"]
35+
del child_span["sentry_tags"]["user"]
36+
3237
return [child_span, segment_span]
3338

3439
def generate_n_plus_one_spans(self):
@@ -71,6 +76,33 @@ def repeating_span():
7176

7277
return spans
7378

79+
def test_enrich_spans(self):
80+
spans = self.generate_basic_spans()
81+
processed_spans = process_segment(spans)
82+
83+
assert len(processed_spans) == len(spans)
84+
child_span, segment_span = processed_spans
85+
child_tags = child_span["sentry_tags"]
86+
segment_tags = segment_span["sentry_tags"]
87+
88+
assert child_tags["transaction"] == segment_tags["transaction"]
89+
assert child_tags["transaction.method"] == segment_tags["transaction.method"]
90+
assert child_tags["transaction.op"] == segment_tags["transaction.op"]
91+
assert child_tags["user"] == segment_tags["user"] # type: ignore[typeddict-item]
92+
93+
def test_enrich_spans_no_segment(self):
94+
spans = self.generate_basic_spans()
95+
for span in spans:
96+
span["is_segment"] = False
97+
del span["sentry_tags"]
98+
99+
processed_spans = process_segment(spans)
100+
assert len(processed_spans) == len(spans)
101+
for i, span in enumerate(processed_spans):
102+
assert span["span_id"] == spans[i]["span_id"]
103+
assert span["op"]
104+
assert span["hash"]
105+
74106
def test_create_models(self):
75107
spans = self.generate_basic_spans()
76108
assert process_segment(spans)
@@ -86,20 +118,6 @@ def test_create_models(self):
86118
)
87119
assert release.date_added.timestamp() == spans[0]["end_timestamp_precise"]
88120

89-
def test_empty_defaults(self):
90-
spans = self.generate_basic_spans()
91-
for span in spans:
92-
del span["sentry_tags"]
93-
94-
processed_spans = process_segment(spans)
95-
assert len(processed_spans) == len(spans)
96-
assert processed_spans[0]["span_id"] == spans[0]["span_id"]
97-
assert processed_spans[1]["span_id"] == spans[1]["span_id"]
98-
99-
# double-check that we actually ran through processing. The "op"
100-
# attribute does not exist in the original spans.
101-
assert processed_spans[0]["op"]
102-
103121
@override_options(
104122
{
105123
"standalone-spans.detect-performance-problems.enable": True,

0 commit comments

Comments
 (0)