Skip to content

Commit d457ede

Browse files
feat(eap-spans): Support formulas with time series endpoint (#87031)
Closes getsentry/team-visibility#36 1. Adds support for formulas and conditional_aggregations in time series requests 2. Converts `Column.BinaryFormula` -> `Expression.BinaryFormula` (until getsentry/eap-planning#206) so that custom functions support timeseries. 3. I'm also now passing all aggregations via `expressions` instead of `aggregations`. This better matches what we already do in the table query `rpc_dataset_common.py` --------- Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com>
1 parent ba7b3ab commit d457ede

File tree

5 files changed

+163
-15
lines changed

5 files changed

+163
-15
lines changed

src/sentry/search/eap/utils.py

+37-1
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,20 @@
33
from typing import Any
44

55
from google.protobuf.timestamp_pb2 import Timestamp
6-
from sentry_protos.snuba.v1.endpoint_time_series_pb2 import TimeSeriesRequest
6+
from sentry_protos.snuba.v1.endpoint_time_series_pb2 import Expression, TimeSeriesRequest
7+
from sentry_protos.snuba.v1.endpoint_trace_item_table_pb2 import Column
78

89
from sentry.exceptions import InvalidSearchQuery
910

11+
# TODO: Remove when https://github.com/getsentry/eap-planning/issues/206 is merged, since we can use formulas in both APIs at that point
12+
BINARY_FORMULA_OPERATOR_MAP = {
13+
Column.BinaryFormula.OP_ADD: Expression.BinaryFormula.OP_ADD,
14+
Column.BinaryFormula.OP_SUBTRACT: Expression.BinaryFormula.OP_SUBTRACT,
15+
Column.BinaryFormula.OP_MULTIPLY: Expression.BinaryFormula.OP_MULTIPLY,
16+
Column.BinaryFormula.OP_DIVIDE: Expression.BinaryFormula.OP_DIVIDE,
17+
Column.BinaryFormula.OP_UNSPECIFIED: Expression.BinaryFormula.OP_UNSPECIFIED,
18+
}
19+
1020

1121
def literal_validator(values: list[Any]) -> Callable[[str], bool]:
1222
def _validator(input: str) -> bool:
@@ -28,3 +38,29 @@ def add_start_end_conditions(
2838
in_msg.meta.end_timestamp.CopyFrom(end_time_proto)
2939

3040
return in_msg
41+
42+
43+
def transform_binary_formula_to_expression(
44+
column: Column.BinaryFormula,
45+
) -> Expression.BinaryFormula:
46+
"""TODO: Remove when https://github.com/getsentry/eap-planning/issues/206 is merged, since we can use formulas in both APIs at that point"""
47+
return Expression.BinaryFormula(
48+
left=transform_column_to_expression(column.left),
49+
right=transform_column_to_expression(column.right),
50+
op=BINARY_FORMULA_OPERATOR_MAP[column.op],
51+
)
52+
53+
54+
def transform_column_to_expression(column: Column) -> Expression:
55+
"""TODO: Remove when https://github.com/getsentry/eap-planning/issues/206 is merged, since we can use formulas in both APIs at that point"""
56+
if column.formula.op != Column.BinaryFormula.OP_UNSPECIFIED:
57+
return Expression(
58+
formula=transform_binary_formula_to_expression(column.formula),
59+
label=column.label,
60+
)
61+
62+
return Expression(
63+
aggregation=column.aggregation,
64+
conditional_aggregation=column.conditional_aggregation,
65+
label=column.label,
66+
)

src/sentry/snuba/spans_rpc.py

+29-11
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,13 @@
66

77
import sentry_sdk
88
from sentry_protos.snuba.v1.endpoint_get_trace_pb2 import GetTraceRequest
9-
from sentry_protos.snuba.v1.endpoint_time_series_pb2 import TimeSeries, TimeSeriesRequest
9+
from sentry_protos.snuba.v1.endpoint_time_series_pb2 import (
10+
Expression,
11+
TimeSeries,
12+
TimeSeriesRequest,
13+
)
1014
from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType
11-
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeAggregation, AttributeKey
15+
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey
1216
from sentry_protos.snuba.v1.trace_item_filter_pb2 import AndFilter, OrFilter, TraceItemFilter
1317

1418
from sentry.api.event_search import SearchFilter, SearchKey, SearchValue
@@ -23,6 +27,7 @@
2327
from sentry.search.eap.resolver import SearchResolver
2428
from sentry.search.eap.spans.definitions import SPAN_DEFINITIONS
2529
from sentry.search.eap.types import CONFIDENCES, EAPResponse, SearchResolverConfig
30+
from sentry.search.eap.utils import transform_binary_formula_to_expression
2631
from sentry.search.events.fields import is_function
2732
from sentry.search.events.types import EventsMeta, SnubaData, SnubaParams
2833
from sentry.snuba import rpc_dataset_common
@@ -33,6 +38,23 @@
3338
logger = logging.getLogger("sentry.snuba.spans_rpc")
3439

3540

41+
def categorize_aggregate(
42+
column: ResolvedAggregate | ResolvedConditionalAggregate | ResolvedFormula,
43+
) -> Expression:
44+
if isinstance(column, ResolvedFormula):
45+
# TODO: Remove when https://github.com/getsentry/eap-planning/issues/206 is merged, since we can use formulas in both APIs at that point
46+
return Expression(
47+
formula=transform_binary_formula_to_expression(column.proto_definition),
48+
label=column.public_alias,
49+
)
50+
if isinstance(column, ResolvedAggregate):
51+
return Expression(aggregation=column.proto_definition, label=column.public_alias)
52+
if isinstance(column, ResolvedConditionalAggregate):
53+
return Expression(
54+
conditional_aggregation=column.proto_definition, label=column.public_alias
55+
)
56+
57+
3658
@dataclass
3759
class ProcessedTimeseries:
3860
timeseries: SnubaData = field(default_factory=list)
@@ -89,7 +111,7 @@ def get_timeseries_query(
89111
resolver = get_resolver(params=params, config=config)
90112
meta = resolver.resolve_meta(referrer=referrer)
91113
query, _, query_contexts = resolver.resolve_query(query_string)
92-
(aggregations, _) = resolver.resolve_functions(y_axes)
114+
(functions, _) = resolver.resolve_functions(y_axes)
93115
(groupbys, _) = resolver.resolve_attributes(groupby)
94116
if extra_conditions is not None:
95117
if query is not None:
@@ -101,19 +123,15 @@ def get_timeseries_query(
101123
TimeSeriesRequest(
102124
meta=meta,
103125
filter=query,
104-
aggregations=[
105-
agg.proto_definition
106-
for agg in aggregations
107-
if isinstance(agg.proto_definition, AttributeAggregation)
108-
],
126+
expressions=[categorize_aggregate(fn) for fn in functions if fn.is_aggregate],
109127
group_by=[
110128
groupby.proto_definition
111129
for groupby in groupbys
112130
if isinstance(groupby.proto_definition, AttributeKey)
113131
],
114132
granularity_secs=granularity_secs,
115133
),
116-
aggregations,
134+
functions,
117135
groupbys,
118136
)
119137

@@ -178,7 +196,7 @@ def run_timeseries_query(
178196
)
179197

180198
if comparison_delta is not None:
181-
if len(rpc_request.aggregations) != 1:
199+
if len(rpc_request.expressions) != 1:
182200
raise InvalidSearchQuery("Only one column can be selected for comparison queries")
183201

184202
comp_query_params = params.copy()
@@ -305,7 +323,7 @@ def run_top_events_timeseries_query(
305323
params,
306324
query_string,
307325
y_axes,
308-
[], # in the other series, we want eveything in a single group, so remove the group by
326+
[], # in the other series, we want eveything in a single group, so the group by
309327
referrer,
310328
config,
311329
granularity_secs,

tests/sentry/snuba/test_entity_subscriptions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ def test_get_entity_subscription_for_eap_rpc_query(self) -> None:
412412

413413
assert rpc_timeseries_request.granularity_secs == 3600
414414
assert rpc_timeseries_request.filter.comparison_filter.value.val_str == "http.client"
415-
assert rpc_timeseries_request.aggregations[0].label == "count(span.duration)"
415+
assert rpc_timeseries_request.expressions[0].aggregation.label == "count(span.duration)"
416416

417417

418418
class GetEntitySubscriptionFromSnubaQueryTest(TestCase):

tests/sentry/snuba/test_tasks.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -306,11 +306,11 @@ def test_eap_rpc_query_count(self):
306306
== "http.client"
307307
)
308308
assert (
309-
createSubscriptionRequest.time_series_request.aggregations[0].aggregate
309+
createSubscriptionRequest.time_series_request.expressions[0].aggregation.aggregate
310310
== FUNCTION_COUNT
311311
)
312312
assert (
313-
createSubscriptionRequest.time_series_request.aggregations[0].key.name
313+
createSubscriptionRequest.time_series_request.expressions[0].aggregation.key.name
314314
== "sentry.duration_ms"
315315
)
316316
# Validate that the spm function uses the correct time window

tests/snuba/api/endpoints/test_organization_events_stats_span_indexed.py

+94
Original file line numberDiff line numberDiff line change
@@ -1287,3 +1287,97 @@ def test_interval_larger_than_period_uses_default_period(self):
12871287
data = response.data["data"]
12881288
assert len(data) == 73
12891289
assert response.data["meta"]["dataset"] == self.dataset
1290+
1291+
def test_cache_miss_rate(self):
1292+
self.store_spans(
1293+
[
1294+
self.create_span(
1295+
{
1296+
"data": {"cache.hit": False},
1297+
},
1298+
start_ts=self.day_ago + timedelta(minutes=1),
1299+
),
1300+
self.create_span(
1301+
{
1302+
"data": {"cache.hit": True},
1303+
},
1304+
start_ts=self.day_ago + timedelta(minutes=2),
1305+
),
1306+
self.create_span(
1307+
{
1308+
"data": {"cache.hit": False},
1309+
},
1310+
start_ts=self.day_ago + timedelta(minutes=2),
1311+
),
1312+
self.create_span(
1313+
{
1314+
"data": {"cache.hit": True},
1315+
},
1316+
start_ts=self.day_ago + timedelta(minutes=2),
1317+
),
1318+
self.create_span(
1319+
{
1320+
"data": {"cache.hit": True},
1321+
},
1322+
start_ts=self.day_ago + timedelta(minutes=2),
1323+
),
1324+
],
1325+
is_eap=self.is_eap,
1326+
)
1327+
1328+
response = self._do_request(
1329+
data={
1330+
"start": self.day_ago,
1331+
"end": self.day_ago + timedelta(minutes=3),
1332+
"interval": "1m",
1333+
"yAxis": "cache_miss_rate()",
1334+
"project": self.project.id,
1335+
"dataset": self.dataset,
1336+
},
1337+
)
1338+
assert response.status_code == 200, response.content
1339+
data = response.data["data"]
1340+
assert len(data) == 3
1341+
1342+
assert data[0][1][0]["count"] == 0.0
1343+
assert data[1][1][0]["count"] == 1.0
1344+
assert data[2][1][0]["count"] == 0.25
1345+
assert response.data["meta"]["dataset"] == self.dataset
1346+
1347+
def test_count_op(self):
1348+
self.store_spans(
1349+
[
1350+
self.create_span(
1351+
{"op": "queue.process", "sentry_tags": {"op": "queue.publish"}},
1352+
start_ts=self.day_ago + timedelta(minutes=1),
1353+
),
1354+
self.create_span(
1355+
{"op": "queue.process", "sentry_tags": {"op": "queue.publish"}},
1356+
start_ts=self.day_ago + timedelta(minutes=1),
1357+
),
1358+
self.create_span(
1359+
{"op": "queue.publish", "sentry_tags": {"op": "queue.publish"}},
1360+
start_ts=self.day_ago + timedelta(minutes=2),
1361+
),
1362+
],
1363+
is_eap=self.is_eap,
1364+
)
1365+
1366+
response = self._do_request(
1367+
data={
1368+
"start": self.day_ago,
1369+
"end": self.day_ago + timedelta(minutes=3),
1370+
"interval": "1m",
1371+
"yAxis": "count_op(queue.publish)",
1372+
"project": self.project.id,
1373+
"dataset": self.dataset,
1374+
},
1375+
)
1376+
assert response.status_code == 200, response.content
1377+
data = response.data["data"]
1378+
assert len(data) == 3
1379+
1380+
assert data[0][1][0]["count"] == 0.0
1381+
assert data[1][1][0]["count"] == 2.0
1382+
assert data[2][1][0]["count"] == 1.0
1383+
assert response.data["meta"]["dataset"] == self.dataset

0 commit comments

Comments
 (0)