diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 8a70044d221005..0119d9478fced6 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -336,11 +336,13 @@ tests/sentry/api/endpoints/test_organization_dashboard_widget_details.py @ge ## Replays -/static/app/components/replays/ @getsentry/replay-frontend -/static/app/utils/replays/ @getsentry/replay-frontend -/static/app/views/replays/ @getsentry/replay-frontend -/src/sentry/replays/ @getsentry/replay-backend -/tests/sentry/replays/ @getsentry/replay-backend +/static/app/components/replays/ @getsentry/replay-frontend +/static/app/utils/replays/ @getsentry/replay-frontend +/static/app/views/replays/ @getsentry/replay-frontend +/src/sentry/replays/ @getsentry/replay-backend +/tests/sentry/replays/ @getsentry/replay-backend +/src/sentry/issues/endpoints/organization_issue_metrics.py @getsentry/replay-backend +/tests/sentry/issues/endpoints/test_organization_issue_metrics.py @getsentry/replay-backend ## End of Replays diff --git a/src/sentry/api/urls.py b/src/sentry/api/urls.py index be2761f846d466..3b180448a89e79 100644 --- a/src/sentry/api/urls.py +++ b/src/sentry/api/urls.py @@ -200,6 +200,7 @@ SourceMapDebugEndpoint, TeamGroupsOldEndpoint, ) +from sentry.issues.endpoints.organization_issue_metrics import OrganizationIssueMetricsEndpoint from sentry.monitors.endpoints.organization_monitor_checkin_index import ( OrganizationMonitorCheckInIndexEndpoint, ) @@ -1643,6 +1644,11 @@ def create_group_urls(name_prefix: str) -> list[URLPattern | URLResolver]: OrganizationGroupIndexStatsEndpoint.as_view(), name="sentry-api-0-organization-group-index-stats", ), + re_path( + r"^(?P[^\/]+)/issues-metrics/$", + OrganizationIssueMetricsEndpoint.as_view(), + name="sentry-api-0-organization-issue-metrics", + ), re_path( r"^(?P[^\/]+)/integrations/$", OrganizationIntegrationsEndpoint.as_view(), diff --git a/src/sentry/issues/endpoints/organization_issue_metrics.py b/src/sentry/issues/endpoints/organization_issue_metrics.py new file mode 100644 index 00000000000000..7851d72432ff82 --- /dev/null +++ b/src/sentry/issues/endpoints/organization_issue_metrics.py @@ -0,0 +1,288 @@ +import collections +from collections.abc import Iterator +from datetime import datetime, timedelta +from heapq import nlargest +from itertools import chain +from typing import TypedDict + +from django.db.models import Count, DateTimeField, F, Func, Q +from django.db.models.functions import Extract +from rest_framework.exceptions import ParseError +from rest_framework.request import Request +from rest_framework.response import Response + +from sentry.api.api_owners import ApiOwner +from sentry.api.api_publish_status import ApiPublishStatus +from sentry.api.base import EnvironmentMixin, region_silo_endpoint +from sentry.api.bases.organization import OrganizationEndpoint +from sentry.api.helpers.environments import get_environments +from sentry.api.utils import get_date_range_from_params +from sentry.issues.grouptype import GroupCategory +from sentry.models.group import Group, GroupStatus +from sentry.models.organization import Organization +from sentry.models.project import Project + + +@region_silo_endpoint +class OrganizationIssueMetricsEndpoint(OrganizationEndpoint, EnvironmentMixin): + owner = ApiOwner.REPLAY + publish_status = {"GET": ApiPublishStatus.PRIVATE} + + def get(self, request: Request, organization: Organization) -> Response: + """Stats bucketed by time.""" + environments = [e.id for e in get_environments(request, organization)] + projects = self.get_projects(request, organization) + start, end = get_date_range_from_params(request.GET) + issue_category = request.GET.get("category", "error") + type_filter = ( + ~Q(type=GroupCategory.FEEDBACK) + if issue_category == "error" + else Q(type=GroupCategory.FEEDBACK) + ) + + try: + interval_s = int(request.GET["interval"]) // 1000 + if interval_s == 0: + raise ParseError("Interval must be greater than 1000 milliseconds.") + interval = timedelta(seconds=interval_s) + except KeyError: + # Defaulting for now. Probably better to compute some known interval. I.e. if we're + # close to an hour round up to an hour to ensure the best visual experience. + # + # Or maybe we require this field and ignore all these problems. + interval_s = 3600 + interval = timedelta(seconds=interval_s) + except ValueError: + raise ParseError("Could not parse interval value.") + + # This step validates our maximum granularity. Without this we could see unbounded + # cardinality in our queries. Our maximum granularity is 200 which is more than enough to + # accommodate common aggregation intervals. + # + # Max granularity estimates for a given range (rounded to understandable intervals): + # - One week range -> one hour interval. + # - One day range -> ten minute interval. + # - One hour range -> twenty second interval. + number_of_buckets = (end - start).total_seconds() // interval.total_seconds() + if number_of_buckets > 200: + raise ParseError("The specified granularity is too precise. Increase your interval.") + + def gen_ts( + qs, + group_by: list[str], + date_column_name: str, + axis: str, + ): + qs = make_timeseries_query( + qs, + projects, + environments, + type_filter, + group_by, + interval, + date_column_name, + start, + end, + ) + + grouped_counter: collections.defaultdict[str, int] = collections.defaultdict(int) + grouped_series: dict[str, list[TimeSeries]] = collections.defaultdict(list) + for row in qs: + grouping = [row[g] for g in group_by] + key = "||||".join(grouping) + grouped_counter[key] += row["value"] + grouped_series[key].append({"timestamp": row["timestamp"], "value": row["value"]}) + + # Group the smallest series into the "other" bucket. + if len(grouped_series) > 4: + keys = [v[0] for v in nlargest(5, grouped_counter.items(), key=lambda i: i[0])] + + new_grouped_series: dict[str, list[TimeSeries]] = {} + other_series: collections.defaultdict[float, float] = collections.defaultdict(float) + for key, series in grouped_series.items(): + if key in keys: + new_grouped_series[key] = series + else: + for s in series: + other_series[s["timestamp"]] += s["value"] + + if other_series: + new_grouped_series["other"] = list( + map( + lambda i: {"timestamp": i[0], "value": i[1]}, + sorted(list(other_series.items()), key=lambda i: i[0]), + ) + ) + else: + new_grouped_series = grouped_series + + return [ + make_timeseries_result( + axis=axis, + group=key.split("||||") if key else [], + start=start, + end=end, + interval=interval, + is_other=key == "other", + order=i, + values=series, + ) + for i, (key, series) in enumerate(new_grouped_series.items()) + ] + + return Response( + { + "timeseries": chain( + gen_ts( + Group.objects, + axis="new_issues_count", + date_column_name="first_seen", + group_by=[], + ), + gen_ts( + Group.objects.filter(status=GroupStatus.RESOLVED), + axis="resolved_issues_count", + date_column_name="resolved_at", + group_by=[], + ), + gen_ts( + Group.objects.filter(first_release__isnull=False), + axis="new_issues_count_by_release", + date_column_name="first_seen", + group_by=["first_release__version"], + ), + ), + "meta": { + "dataset": "issues", + "end": end.timestamp(), + "start": start.timestamp(), + }, + }, + status=200, + ) + + +class TimeSeries(TypedDict): + timestamp: float + value: float + + +class TimeSeriesResultMeta(TypedDict): + interval: float + isOther: bool + order: int + valueType: str + valueUnit: str | None + + +class TimeSeriesResult(TypedDict): + axis: str + groupBy: list[str] + meta: TimeSeriesResultMeta + values: list[TimeSeries] + + +def make_timeseries_query( + qs, + projects: list[Project], + environments: list[int], + type_filter: Q, + group_by: list[str], + stride: timedelta, + source: str, + start: datetime, + end: datetime, +): + environment_filter = ( + Q(groupenvironment__environment_id=environments[0]) if environments else Q() + ) + range_filters = {f"{source}__gte": start, f"{source}__lte": end} + + annotations: dict[str, F | Extract] = {} + order_by = [] + values = [] + for group in group_by: + annotations[group] = F(group) + order_by.append(group) + values.append(group) + + annotations["timestamp"] = Extract( + Func( + stride, + source, + start, + function="date_bin", + output_field=DateTimeField(), + ), + "epoch", + ) + order_by.append("timestamp") + values.append("timestamp") + + qs = ( + qs.filter( + environment_filter, + type_filter, + project_id__in=[p.id for p in projects], + **range_filters, + ) + .annotate(**annotations) + .order_by(*order_by) + .values(*values) + .annotate(value=Count("id")) + ) + return qs + + +def make_timeseries_result( + axis: str, + group: list[str], + start: datetime, + end: datetime, + interval: timedelta, + is_other: bool, + order: int, + values: list[TimeSeries], +) -> TimeSeriesResult: + return { + "axis": axis, + "groupBy": group, + "meta": { + "interval": interval.seconds * 1000, + "isOther": is_other, + "order": order, + "valueType": "integer", + "valueUnit": None, + }, + "values": fill_timeseries(start, end, interval, values), + } + + +class UnconsumedBuckets(LookupError): + pass + + +def fill_timeseries( + start: datetime, + end: datetime, + interval: timedelta, + values: list[TimeSeries], +) -> list[TimeSeries]: + def iter_interval(start: datetime, end: datetime, interval: timedelta) -> Iterator[int]: + while start <= end: + yield int(start.timestamp()) + start = start + interval + + filled_values: list[TimeSeries] = [] + idx = 0 + for ts in iter_interval(start, end, interval): + if idx < len(values) and ts == values[idx]["timestamp"]: + filled_values.append(values[idx]) + idx += 1 + else: + filled_values.append({"timestamp": ts, "value": 0}) + + if idx != len(values): + raise UnconsumedBuckets("Could not fill every bucket.") + + return filled_values diff --git a/tests/sentry/issues/endpoints/test_organization_issue_metrics.py b/tests/sentry/issues/endpoints/test_organization_issue_metrics.py new file mode 100644 index 00000000000000..7ea5e633294e03 --- /dev/null +++ b/tests/sentry/issues/endpoints/test_organization_issue_metrics.py @@ -0,0 +1,344 @@ +from datetime import datetime, timedelta, timezone + +from django.urls import reverse + +from sentry.testutils.cases import APITestCase + + +class OrganizationIssueMetricsTestCase(APITestCase): + endpoint = "sentry-api-0-organization-issue-metrics" + + def setUp(self): + super().setUp() + self.login_as(user=self.user) + self.url = reverse(self.endpoint, args=(self.organization.slug,)) + + def test_get_errors(self): + project1 = self.create_project(teams=[self.team], slug="foo") + project2 = self.create_project(teams=[self.team], slug="bar") + one = self.create_release(project1, version="1.0.0") + two = self.create_release(project2, version="1.2.0") + + curr = datetime.now(tz=timezone.utc) + prev = curr - timedelta(hours=1) + + # Release issues. + self.create_group(project=project1, status=0, first_seen=curr, first_release=one, type=1) + self.create_group(project=project1, status=1, first_seen=prev, first_release=one, type=2) + self.create_group(project=project2, status=1, first_seen=curr, first_release=two, type=3) + self.create_group(project=project2, status=2, first_seen=curr, first_release=two, type=4) + self.create_group(project=project2, status=2, first_seen=curr, first_release=two, type=6) + + # Time based issues. + self.create_group(project=project1, status=0, first_seen=curr, type=1) + self.create_group(project=project1, status=1, first_seen=curr, resolved_at=curr, type=2) + self.create_group(project=project2, status=1, first_seen=prev, resolved_at=prev, type=3) + self.create_group(project=project2, status=2, first_seen=prev, type=4) + self.create_group(project=project2, status=2, first_seen=prev, type=6) + + response = self.client.get( + self.url + f"?start={prev.isoformat()[:-6]}&end={curr.isoformat()[:-6]}&category=error" + ) + response_json = response.json() + assert response_json["timeseries"] == [ + { + "axis": "new_issues_count", + "groupBy": [], + "meta": { + "interval": 3600000, + "isOther": False, + "order": 0, + "valueType": "integer", + "valueUnit": None, + }, + "values": [ + {"timestamp": int(prev.timestamp()), "value": 3}, + {"timestamp": int(curr.timestamp()), "value": 5}, + ], + }, + { + "axis": "resolved_issues_count", + "groupBy": [], + "meta": { + "interval": 3600000, + "isOther": False, + "order": 0, + "valueType": "integer", + "valueUnit": None, + }, + "values": [ + {"timestamp": int(prev.timestamp()), "value": 1}, + {"timestamp": int(curr.timestamp()), "value": 1}, + ], + }, + { + "axis": "new_issues_count_by_release", + "groupBy": ["1.0.0"], + "meta": { + "interval": 3600000, + "isOther": False, + "order": 0, + "valueType": "integer", + "valueUnit": None, + }, + "values": [ + {"timestamp": int(prev.timestamp()), "value": 1}, + {"timestamp": int(curr.timestamp()), "value": 1}, + ], + }, + { + "axis": "new_issues_count_by_release", + "groupBy": ["1.2.0"], + "meta": { + "interval": 3600000, + "isOther": False, + "order": 1, + "valueType": "integer", + "valueUnit": None, + }, + "values": [ + {"timestamp": int(prev.timestamp()), "value": 0}, + {"timestamp": int(curr.timestamp()), "value": 2}, + ], + }, + ] + + def test_get_errors_by_project(self): + """Assert the project filter works.""" + project1 = self.create_project(teams=[self.team], slug="foo") + project2 = self.create_project(teams=[self.team], slug="bar") + + curr = datetime.now(tz=timezone.utc) + prev = curr - timedelta(hours=1) + self.create_group(project=project1, status=0, first_seen=curr, type=1) + self.create_group(project=project2, status=0, first_seen=curr, type=1) + + response = self.client.get( + self.url + + f"?start={prev.isoformat()[:-6]}&end={curr.isoformat()[:-6]}&category=error&project={project1.id}" + ) + response_json = response.json() + assert response_json["timeseries"] == [ + { + "axis": "new_issues_count", + "groupBy": [], + "meta": { + "interval": 3600000, + "isOther": False, + "order": 0, + "valueType": "integer", + "valueUnit": None, + }, + "values": [ + {"timestamp": int(prev.timestamp()), "value": 0}, + {"timestamp": int(curr.timestamp()), "value": 1}, + ], + } + ] + + def test_get_feedback(self): + project1 = self.create_project(teams=[self.team], slug="foo") + project2 = self.create_project(teams=[self.team], slug="bar") + + curr = datetime.now(tz=timezone.utc) + prev = curr - timedelta(hours=1) + # New cohort + self.create_group(project=project1, status=0, first_seen=curr, type=1) + self.create_group(project=project1, status=1, first_seen=curr, type=2) + self.create_group(project=project2, status=1, first_seen=curr, type=3) + self.create_group(project=project2, status=2, first_seen=prev, type=6) + self.create_group(project=project2, status=2, first_seen=curr, type=6) + # Resolved cohort + self.create_group(project=project1, status=0, resolved_at=curr, type=2) + self.create_group(project=project1, status=1, resolved_at=curr, type=3) + self.create_group(project=project2, status=1, resolved_at=prev, type=6) + self.create_group(project=project2, status=1, resolved_at=curr, type=6) + self.create_group(project=project2, status=2, resolved_at=curr, type=5) + + response = self.client.get( + self.url + + f"?start={prev.isoformat()[:-6]}&end={curr.isoformat()[:-6]}&category=feedback" + ) + response_json = response.json() + assert response_json["timeseries"] == [ + { + "axis": "new_issues_count", + "groupBy": [], + "meta": { + "interval": 3600000, + "isOther": False, + "order": 0, + "valueType": "integer", + "valueUnit": None, + }, + "values": [ + {"timestamp": int(prev.timestamp()), "value": 1}, + {"timestamp": int(curr.timestamp()), "value": 1}, + ], + }, + { + "axis": "resolved_issues_count", + "groupBy": [], + "meta": { + "interval": 3600000, + "isOther": False, + "order": 0, + "valueType": "integer", + "valueUnit": None, + }, + "values": [ + {"timestamp": int(prev.timestamp()), "value": 1}, + {"timestamp": int(curr.timestamp()), "value": 1}, + ], + }, + ] + + def test_get_too_much_granularity(self): + response = self.client.get(self.url + "?statsPeriod=14d&interval=1001") + assert response.status_code == 400 + assert response.json() == { + "detail": "The specified granularity is too precise. Increase your interval." + } + + def test_get_invalid_interval(self): + response = self.client.get(self.url + "?interval=foo") + assert response.status_code == 400 + assert response.json() == {"detail": "Could not parse interval value."} + + def test_get_zero_interval(self): + response = self.client.get(self.url + "?interval=0") + assert response.status_code == 400 + assert response.json() == {"detail": "Interval must be greater than 1000 milliseconds."} + + def test_other_grouping(self): + project1 = self.create_project(teams=[self.team], slug="foo") + project2 = self.create_project(teams=[self.team], slug="bar") + one = self.create_release(project1, version="1.0.0") + two = self.create_release(project2, version="1.1.0") + three = self.create_release(project2, version="1.2.0") + four = self.create_release(project2, version="1.3.0") + fifth = self.create_release(project2, version="1.4.0") + sixth = self.create_release(project2, version="1.5.0") + + curr = datetime.now(tz=timezone.utc) + prev = curr - timedelta(hours=1) + + # Release issues. + self.create_group(project=project1, status=0, first_seen=curr, first_release=one, type=1) + self.create_group(project=project1, status=0, first_seen=curr, first_release=two, type=1) + self.create_group(project=project1, status=0, first_seen=curr, first_release=three, type=1) + self.create_group(project=project1, status=0, first_seen=curr, first_release=four, type=1) + self.create_group(project=project1, status=0, first_seen=curr, first_release=fifth, type=1) + self.create_group(project=project1, status=0, first_seen=curr, first_release=sixth, type=1) + + response = self.client.get( + self.url + f"?start={prev.isoformat()[:-6]}&end={curr.isoformat()[:-6]}&category=error" + ) + response_json = response.json() + assert response_json["timeseries"] == [ + { + "axis": "new_issues_count", + "groupBy": [], + "meta": { + "interval": 3600000, + "isOther": False, + "order": 0, + "valueType": "integer", + "valueUnit": None, + }, + "values": [ + {"timestamp": int(prev.timestamp()), "value": 0}, + {"timestamp": int(curr.timestamp()), "value": 6}, + ], + }, + { + "axis": "new_issues_count_by_release", + "groupBy": ["1.1.0"], + "meta": { + "interval": 3600000, + "isOther": False, + "order": 0, + "valueType": "integer", + "valueUnit": None, + }, + "values": [ + {"timestamp": int(prev.timestamp()), "value": 0}, + {"timestamp": int(curr.timestamp()), "value": 1}, + ], + }, + { + "axis": "new_issues_count_by_release", + "groupBy": ["1.2.0"], + "meta": { + "interval": 3600000, + "isOther": False, + "order": 1, + "valueType": "integer", + "valueUnit": None, + }, + "values": [ + {"timestamp": int(prev.timestamp()), "value": 0}, + {"timestamp": int(curr.timestamp()), "value": 1}, + ], + }, + { + "axis": "new_issues_count_by_release", + "groupBy": ["1.3.0"], + "meta": { + "interval": 3600000, + "isOther": False, + "order": 2, + "valueType": "integer", + "valueUnit": None, + }, + "values": [ + {"timestamp": int(prev.timestamp()), "value": 0}, + {"timestamp": int(curr.timestamp()), "value": 1}, + ], + }, + { + "axis": "new_issues_count_by_release", + "groupBy": ["1.4.0"], + "meta": { + "interval": 3600000, + "isOther": False, + "order": 3, + "valueType": "integer", + "valueUnit": None, + }, + "values": [ + {"timestamp": int(prev.timestamp()), "value": 0}, + {"timestamp": int(curr.timestamp()), "value": 1}, + ], + }, + { + "axis": "new_issues_count_by_release", + "groupBy": ["1.5.0"], + "meta": { + "interval": 3600000, + "isOther": False, + "order": 4, + "valueType": "integer", + "valueUnit": None, + }, + "values": [ + {"timestamp": int(prev.timestamp()), "value": 0}, + {"timestamp": int(curr.timestamp()), "value": 1}, + ], + }, + { + "axis": "new_issues_count_by_release", + "groupBy": ["other"], + "meta": { + "interval": 3600000, + "isOther": True, + "order": 5, + "valueType": "integer", + "valueUnit": None, + }, + "values": [ + {"timestamp": int(prev.timestamp()), "value": 0}, + {"timestamp": int(curr.timestamp()), "value": 1}, + ], + }, + ]