Skip to content

Commit 0ff1032

Browse files
authoredMar 5, 2025··
botocore: add genai metrics for bedrock extensions (#3326)
This add genai metrics regarding the operation duration and the tokens used to botocore bedrock extension.
1 parent 81eaea5 commit 0ff1032

File tree

8 files changed

+534
-18
lines changed

8 files changed

+534
-18
lines changed
 

‎CHANGELOG.md

+2
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2525
([#3275](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3275))
2626
- `opentelemetry-instrumentation-botocore` Add support for GenAI tool events
2727
([#3302](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3302))
28+
- `opentelemetry-instrumentation-botocore` Add support for GenAI metrics
29+
([#3326](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3326))
2830
- `opentelemetry-instrumentation` make it simpler to initialize auto-instrumentation programmatically
2931
([#3273](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3273))
3032
- Add `opentelemetry-instrumentation-vertexai>=2.0b0` to `opentelemetry-bootstrap`

‎instrumentation/opentelemetry-instrumentation-botocore/pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ classifiers = [
2626
"Programming Language :: Python :: 3.13",
2727
]
2828
dependencies = [
29-
"opentelemetry-api ~= 1.12",
29+
"opentelemetry-api ~= 1.30",
3030
"opentelemetry-instrumentation == 0.52b0.dev",
3131
"opentelemetry-semantic-conventions == 0.52b0.dev",
3232
"opentelemetry-propagator-aws-xray ~= 1.0",

‎instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/__init__.py

+74-1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,38 @@ def response_hook(span, service_name, operation_name, result):
7676
)
7777
ec2 = self.session.create_client("ec2", region_name="us-west-2")
7878
ec2.describe_instances()
79+
80+
Extensions
81+
----------
82+
83+
The instrumentation supports creating extensions for AWS services for enriching what is collected. We have extensions
84+
for the following AWS services:
85+
86+
- Bedrock Runtime
87+
- DynamoDB
88+
- Lambda
89+
- SNS
90+
- SQS
91+
92+
Bedrock Runtime
93+
***************
94+
95+
This extension implements the GenAI semantic conventions for the following API calls:
96+
97+
- Converse
98+
- ConverseStream
99+
- InvokeModel
100+
- InvokeModelWithResponseStream
101+
102+
For the Converse and ConverseStream APIs tracing, events and metrics are implemented.
103+
104+
For the InvokeModel and InvokeModelWithResponseStream APIs tracing, events and metrics implemented only for a subset of
105+
the available models, namely:
106+
- Amazon Titan models
107+
- Amazon Nova models
108+
- Anthropic Claude
109+
110+
There is no support for tool calls with Amazon Models for the InvokeModel and InvokeModelWithResponseStream APIs.
79111
"""
80112

81113
import logging
@@ -104,6 +136,7 @@ def response_hook(span, service_name, operation_name, result):
104136
suppress_http_instrumentation,
105137
unwrap,
106138
)
139+
from opentelemetry.metrics import Instrument, Meter, get_meter
107140
from opentelemetry.propagators.aws.aws_xray_propagator import AwsXRayPropagator
108141
from opentelemetry.semconv.trace import SpanAttributes
109142
from opentelemetry.trace import get_tracer
@@ -134,6 +167,10 @@ def _instrument(self, **kwargs):
134167
self._tracers = {}
135168
# event_loggers are lazy initialized per-extension in _get_event_logger
136169
self._event_loggers = {}
170+
# meters are lazy initialized per-extension in _get_meter
171+
self._meters = {}
172+
# metrics are lazy initialized per-extension in _get_metrics
173+
self._metrics: Dict[str, Dict[str, Instrument]] = {}
137174

138175
self.request_hook = kwargs.get("request_hook")
139176
self.response_hook = kwargs.get("response_hook")
@@ -144,6 +181,7 @@ def _instrument(self, **kwargs):
144181

145182
self.tracer_provider = kwargs.get("tracer_provider")
146183
self.event_logger_provider = kwargs.get("event_logger_provider")
184+
self.meter_provider = kwargs.get("meter_provider")
147185

148186
wrap_function_wrapper(
149187
"botocore.client",
@@ -201,6 +239,38 @@ def _get_event_logger(self, extension: _AwsSdkExtension):
201239

202240
return self._event_loggers[instrumentation_name]
203241

242+
def _get_meter(self, extension: _AwsSdkExtension):
243+
"""This is a multiplexer in order to have a meter per extension"""
244+
245+
instrumentation_name = self._get_instrumentation_name(extension)
246+
meter = self._meters.get(instrumentation_name)
247+
if meter:
248+
return meter
249+
250+
schema_version = extension.meter_schema_version()
251+
self._meters[instrumentation_name] = get_meter(
252+
instrumentation_name,
253+
"",
254+
schema_url=f"https://opentelemetry.io/schemas/{schema_version}",
255+
meter_provider=self.meter_provider,
256+
)
257+
258+
return self._meters[instrumentation_name]
259+
260+
def _get_metrics(
261+
self, extension: _AwsSdkExtension, meter: Meter
262+
) -> Dict[str, Instrument]:
263+
"""This is a multiplexer for lazy initialization of metrics required by extensions"""
264+
instrumentation_name = self._get_instrumentation_name(extension)
265+
metrics = self._metrics.get(instrumentation_name)
266+
if metrics is not None:
267+
return metrics
268+
269+
self._metrics.setdefault(instrumentation_name, {})
270+
metrics = self._metrics[instrumentation_name]
271+
_safe_invoke(extension.setup_metrics, meter, metrics)
272+
return metrics
273+
204274
def _uninstrument(self, **kwargs):
205275
unwrap(BaseClient, "_make_api_call")
206276
unwrap(Endpoint, "prepare_request")
@@ -244,8 +314,11 @@ def _patched_api_call(self, original_func, instance, args, kwargs):
244314

245315
tracer = self._get_tracer(extension)
246316
event_logger = self._get_event_logger(extension)
317+
meter = self._get_meter(extension)
318+
metrics = self._get_metrics(extension, meter)
247319
instrumentor_ctx = _BotocoreInstrumentorContext(
248-
event_logger=event_logger
320+
event_logger=event_logger,
321+
metrics=metrics,
249322
)
250323
with tracer.start_as_current_span(
251324
call_context.span_name,

‎instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py

+252-13
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import io
2222
import json
2323
import logging
24+
from timeit import default_timer
2425
from typing import Any
2526

2627
from botocore.eventstream import EventStream
@@ -39,6 +40,7 @@
3940
_BotoClientErrorT,
4041
_BotocoreInstrumentorContext,
4142
)
43+
from opentelemetry.metrics import Instrument, Meter
4244
from opentelemetry.semconv._incubating.attributes.error_attributes import (
4345
ERROR_TYPE,
4446
)
@@ -51,16 +53,56 @@
5153
GEN_AI_REQUEST_TOP_P,
5254
GEN_AI_RESPONSE_FINISH_REASONS,
5355
GEN_AI_SYSTEM,
56+
GEN_AI_TOKEN_TYPE,
5457
GEN_AI_USAGE_INPUT_TOKENS,
5558
GEN_AI_USAGE_OUTPUT_TOKENS,
5659
GenAiOperationNameValues,
5760
GenAiSystemValues,
61+
GenAiTokenTypeValues,
62+
)
63+
from opentelemetry.semconv._incubating.metrics.gen_ai_metrics import (
64+
GEN_AI_CLIENT_OPERATION_DURATION,
65+
GEN_AI_CLIENT_TOKEN_USAGE,
5866
)
5967
from opentelemetry.trace.span import Span
6068
from opentelemetry.trace.status import Status, StatusCode
6169

6270
_logger = logging.getLogger(__name__)
6371

72+
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS = [
73+
0.01,
74+
0.02,
75+
0.04,
76+
0.08,
77+
0.16,
78+
0.32,
79+
0.64,
80+
1.28,
81+
2.56,
82+
5.12,
83+
10.24,
84+
20.48,
85+
40.96,
86+
81.92,
87+
]
88+
89+
_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS = [
90+
1,
91+
4,
92+
16,
93+
64,
94+
256,
95+
1024,
96+
4096,
97+
16384,
98+
65536,
99+
262144,
100+
1048576,
101+
4194304,
102+
16777216,
103+
67108864,
104+
]
105+
64106
_MODEL_ID_KEY: str = "modelId"
65107

66108

@@ -88,6 +130,40 @@ def should_end_span_on_exit(self):
88130
not in self._DONT_CLOSE_SPAN_ON_END_OPERATIONS
89131
)
90132

133+
def setup_metrics(self, meter: Meter, metrics: dict[str, Instrument]):
134+
metrics[GEN_AI_CLIENT_OPERATION_DURATION] = meter.create_histogram(
135+
name=GEN_AI_CLIENT_OPERATION_DURATION,
136+
description="GenAI operation duration",
137+
unit="s",
138+
explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS,
139+
)
140+
metrics[GEN_AI_CLIENT_TOKEN_USAGE] = meter.create_histogram(
141+
name=GEN_AI_CLIENT_TOKEN_USAGE,
142+
description="Measures number of input and output tokens used",
143+
unit="{token}",
144+
explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS,
145+
)
146+
147+
def _extract_metrics_attributes(self) -> _AttributeMapT:
148+
attributes = {GEN_AI_SYSTEM: GenAiSystemValues.AWS_BEDROCK.value}
149+
150+
model_id = self._call_context.params.get(_MODEL_ID_KEY)
151+
if not model_id:
152+
return attributes
153+
154+
attributes[GEN_AI_REQUEST_MODEL] = model_id
155+
156+
# titan in invoke model is a text completion one
157+
if "body" in self._call_context.params and "amazon.titan" in model_id:
158+
attributes[GEN_AI_OPERATION_NAME] = (
159+
GenAiOperationNameValues.TEXT_COMPLETION.value
160+
)
161+
else:
162+
attributes[GEN_AI_OPERATION_NAME] = (
163+
GenAiOperationNameValues.CHAT.value
164+
)
165+
return attributes
166+
91167
def extract_attributes(self, attributes: _AttributeMapT):
92168
if self._call_context.operation not in self._HANDLED_OPERATIONS:
93169
return
@@ -251,16 +327,18 @@ def before_service_call(
251327
for event in message_to_event(message, capture_content):
252328
event_logger.emit(event)
253329

254-
if not span.is_recording():
255-
return
330+
if span.is_recording():
331+
operation_name = span.attributes.get(GEN_AI_OPERATION_NAME, "")
332+
request_model = span.attributes.get(GEN_AI_REQUEST_MODEL, "")
333+
# avoid setting to an empty string if are not available
334+
if operation_name and request_model:
335+
span.update_name(f"{operation_name} {request_model}")
256336

257-
operation_name = span.attributes.get(GEN_AI_OPERATION_NAME, "")
258-
request_model = span.attributes.get(GEN_AI_REQUEST_MODEL, "")
259-
# avoid setting to an empty string if are not available
260-
if operation_name and request_model:
261-
span.update_name(f"{operation_name} {request_model}")
337+
# this is used to calculate the operation duration metric, duration may be skewed by request_hook
338+
# pylint: disable=attribute-defined-outside-init
339+
self._operation_start = default_timer()
262340

263-
# pylint: disable=no-self-use
341+
# pylint: disable=no-self-use,too-many-locals
264342
def _converse_on_success(
265343
self,
266344
span: Span,
@@ -300,6 +378,37 @@ def _converse_on_success(
300378
)
301379
)
302380

381+
metrics = instrumentor_context.metrics
382+
metrics_attributes = self._extract_metrics_attributes()
383+
if operation_duration_histogram := metrics.get(
384+
GEN_AI_CLIENT_OPERATION_DURATION
385+
):
386+
duration = max((default_timer() - self._operation_start), 0)
387+
operation_duration_histogram.record(
388+
duration,
389+
attributes=metrics_attributes,
390+
)
391+
392+
if token_usage_histogram := metrics.get(GEN_AI_CLIENT_TOKEN_USAGE):
393+
if usage := result.get("usage"):
394+
if input_tokens := usage.get("inputTokens"):
395+
input_attributes = {
396+
**metrics_attributes,
397+
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.INPUT.value,
398+
}
399+
token_usage_histogram.record(
400+
input_tokens, input_attributes
401+
)
402+
403+
if output_tokens := usage.get("outputTokens"):
404+
output_attributes = {
405+
**metrics_attributes,
406+
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.COMPLETION.value,
407+
}
408+
token_usage_histogram.record(
409+
output_tokens, output_attributes
410+
)
411+
303412
def _invoke_model_on_success(
304413
self,
305414
span: Span,
@@ -338,12 +447,31 @@ def _invoke_model_on_success(
338447
if original_body is not None:
339448
original_body.close()
340449

341-
def _on_stream_error_callback(self, span: Span, exception):
450+
def _on_stream_error_callback(
451+
self,
452+
span: Span,
453+
exception,
454+
instrumentor_context: _BotocoreInstrumentorContext,
455+
):
342456
span.set_status(Status(StatusCode.ERROR, str(exception)))
343457
if span.is_recording():
344458
span.set_attribute(ERROR_TYPE, type(exception).__qualname__)
345459
span.end()
346460

461+
metrics = instrumentor_context.metrics
462+
metrics_attributes = {
463+
**self._extract_metrics_attributes(),
464+
ERROR_TYPE: type(exception).__qualname__,
465+
}
466+
if operation_duration_histogram := metrics.get(
467+
GEN_AI_CLIENT_OPERATION_DURATION
468+
):
469+
duration = max((default_timer() - self._operation_start), 0)
470+
operation_duration_histogram.record(
471+
duration,
472+
attributes=metrics_attributes,
473+
)
474+
347475
def on_success(
348476
self,
349477
span: Span,
@@ -367,7 +495,9 @@ def stream_done_callback(response):
367495
span.end()
368496

369497
def stream_error_callback(exception):
370-
self._on_stream_error_callback(span, exception)
498+
self._on_stream_error_callback(
499+
span, exception, instrumentor_context
500+
)
371501

372502
result["stream"] = ConverseStreamWrapper(
373503
result["stream"],
@@ -405,7 +535,9 @@ def invoke_model_stream_done_callback(response):
405535
span.end()
406536

407537
def invoke_model_stream_error_callback(exception):
408-
self._on_stream_error_callback(span, exception)
538+
self._on_stream_error_callback(
539+
span, exception, instrumentor_context
540+
)
409541

410542
result["body"] = InvokeModelWithResponseStreamWrapper(
411543
result["body"],
@@ -415,7 +547,7 @@ def invoke_model_stream_error_callback(exception):
415547
)
416548
return
417549

418-
# pylint: disable=no-self-use
550+
# pylint: disable=no-self-use,too-many-locals
419551
def _handle_amazon_titan_response(
420552
self,
421553
span: Span,
@@ -445,7 +577,38 @@ def _handle_amazon_titan_response(
445577
)
446578
event_logger.emit(choice.to_choice_event())
447579

448-
# pylint: disable=no-self-use
580+
metrics = instrumentor_context.metrics
581+
metrics_attributes = self._extract_metrics_attributes()
582+
if operation_duration_histogram := metrics.get(
583+
GEN_AI_CLIENT_OPERATION_DURATION
584+
):
585+
duration = max((default_timer() - self._operation_start), 0)
586+
operation_duration_histogram.record(
587+
duration,
588+
attributes=metrics_attributes,
589+
)
590+
591+
if token_usage_histogram := metrics.get(GEN_AI_CLIENT_TOKEN_USAGE):
592+
if input_tokens := response_body.get("inputTextTokenCount"):
593+
input_attributes = {
594+
**metrics_attributes,
595+
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.INPUT.value,
596+
}
597+
token_usage_histogram.record(
598+
input_tokens, input_attributes
599+
)
600+
601+
if results := response_body.get("results"):
602+
if output_tokens := results[0].get("tokenCount"):
603+
output_attributes = {
604+
**metrics_attributes,
605+
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.COMPLETION.value,
606+
}
607+
token_usage_histogram.record(
608+
output_tokens, output_attributes
609+
)
610+
611+
# pylint: disable=no-self-use,too-many-locals
449612
def _handle_amazon_nova_response(
450613
self,
451614
span: Span,
@@ -472,6 +635,37 @@ def _handle_amazon_nova_response(
472635
choice = _Choice.from_converse(response_body, capture_content)
473636
event_logger.emit(choice.to_choice_event())
474637

638+
metrics = instrumentor_context.metrics
639+
metrics_attributes = self._extract_metrics_attributes()
640+
if operation_duration_histogram := metrics.get(
641+
GEN_AI_CLIENT_OPERATION_DURATION
642+
):
643+
duration = max((default_timer() - self._operation_start), 0)
644+
operation_duration_histogram.record(
645+
duration,
646+
attributes=metrics_attributes,
647+
)
648+
649+
if token_usage_histogram := metrics.get(GEN_AI_CLIENT_TOKEN_USAGE):
650+
if usage := response_body.get("usage"):
651+
if input_tokens := usage.get("inputTokens"):
652+
input_attributes = {
653+
**metrics_attributes,
654+
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.INPUT.value,
655+
}
656+
token_usage_histogram.record(
657+
input_tokens, input_attributes
658+
)
659+
660+
if output_tokens := usage.get("outputTokens"):
661+
output_attributes = {
662+
**metrics_attributes,
663+
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.COMPLETION.value,
664+
}
665+
token_usage_histogram.record(
666+
output_tokens, output_attributes
667+
)
668+
475669
# pylint: disable=no-self-use
476670
def _handle_anthropic_claude_response(
477671
self,
@@ -500,6 +694,37 @@ def _handle_anthropic_claude_response(
500694
)
501695
event_logger.emit(choice.to_choice_event())
502696

697+
metrics = instrumentor_context.metrics
698+
metrics_attributes = self._extract_metrics_attributes()
699+
if operation_duration_histogram := metrics.get(
700+
GEN_AI_CLIENT_OPERATION_DURATION
701+
):
702+
duration = max((default_timer() - self._operation_start), 0)
703+
operation_duration_histogram.record(
704+
duration,
705+
attributes=metrics_attributes,
706+
)
707+
708+
if token_usage_histogram := metrics.get(GEN_AI_CLIENT_TOKEN_USAGE):
709+
if usage := response_body.get("usage"):
710+
if input_tokens := usage.get("input_tokens"):
711+
input_attributes = {
712+
**metrics_attributes,
713+
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.INPUT.value,
714+
}
715+
token_usage_histogram.record(
716+
input_tokens, input_attributes
717+
)
718+
719+
if output_tokens := usage.get("output_tokens"):
720+
output_attributes = {
721+
**metrics_attributes,
722+
GEN_AI_TOKEN_TYPE: GenAiTokenTypeValues.COMPLETION.value,
723+
}
724+
token_usage_histogram.record(
725+
output_tokens, output_attributes
726+
)
727+
503728
def on_error(
504729
self,
505730
span: Span,
@@ -515,3 +740,17 @@ def on_error(
515740

516741
if not self.should_end_span_on_exit():
517742
span.end()
743+
744+
metrics = instrumentor_context.metrics
745+
metrics_attributes = {
746+
**self._extract_metrics_attributes(),
747+
ERROR_TYPE: type(exception).__qualname__,
748+
}
749+
if operation_duration_histogram := metrics.get(
750+
GEN_AI_CLIENT_OPERATION_DURATION
751+
):
752+
duration = max((default_timer() - self._operation_start), 0)
753+
operation_duration_histogram.record(
754+
duration,
755+
attributes=metrics_attributes,
756+
)

‎instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/types.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,13 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from __future__ import annotations
16+
1517
import logging
1618
from typing import Any, Dict, Optional, Tuple
1719

1820
from opentelemetry._events import EventLogger
21+
from opentelemetry.metrics import Instrument, Meter
1922
from opentelemetry.trace import SpanKind
2023
from opentelemetry.trace.span import Span
2124
from opentelemetry.util.types import AttributeValue
@@ -91,8 +94,13 @@ def _get_attr(obj, name: str, default=None):
9194

9295

9396
class _BotocoreInstrumentorContext:
94-
def __init__(self, event_logger: EventLogger):
97+
def __init__(
98+
self,
99+
event_logger: EventLogger,
100+
metrics: Dict[str, Instrument] | None = None,
101+
):
95102
self.event_logger = event_logger
103+
self.metrics = metrics or {}
96104

97105

98106
class _AwsSdkExtension:
@@ -109,6 +117,11 @@ def event_logger_schema_version() -> str:
109117
"""Returns the event logger OTel schema version the extension is following"""
110118
return "1.30.0"
111119

120+
@staticmethod
121+
def meter_schema_version() -> str:
122+
"""Returns the meter OTel schema version the extension is following"""
123+
return "1.30.0"
124+
112125
def should_trace_service_call(self) -> bool: # pylint:disable=no-self-use
113126
"""Returns if the AWS SDK service call should be traced or not
114127
@@ -125,6 +138,12 @@ def should_end_span_on_exit(self) -> bool: # pylint:disable=no-self-use
125138
"""
126139
return True
127140

141+
def setup_metrics(self, meter: Meter, metrics: Dict[str, Instrument]):
142+
"""Callback which gets invoked to setup metrics.
143+
144+
Extensions might override this function to add to the metrics dictionary all the metrics
145+
they want to receive later in _BotocoreInstrumentorContext."""
146+
128147
def extract_attributes(self, attributes: _AttributeMapT):
129148
"""Callback which gets invoked before the span is created.
130149

‎instrumentation/opentelemetry-instrumentation-botocore/tests/bedrock_utils.py

+116
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,25 @@
1919

2020
from botocore.response import StreamingBody
2121

22+
from opentelemetry.instrumentation.botocore.extensions.bedrock import (
23+
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS,
24+
_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS,
25+
)
26+
from opentelemetry.sdk.metrics._internal.point import ResourceMetrics
2227
from opentelemetry.sdk.trace import ReadableSpan
2328
from opentelemetry.semconv._incubating.attributes import (
2429
event_attributes as EventAttributes,
2530
)
2631
from opentelemetry.semconv._incubating.attributes import (
2732
gen_ai_attributes as GenAIAttributes,
2833
)
34+
from opentelemetry.semconv._incubating.attributes.error_attributes import (
35+
ERROR_TYPE,
36+
)
37+
from opentelemetry.semconv._incubating.metrics.gen_ai_metrics import (
38+
GEN_AI_CLIENT_OPERATION_DURATION,
39+
GEN_AI_CLIENT_TOKEN_USAGE,
40+
)
2941

3042

3143
# pylint: disable=too-many-branches, too-many-locals
@@ -259,3 +271,107 @@ def assert_message_in_logs(log, event_name, expected_content, parent_span):
259271
expected_content
260272
), dict(log.log_record.body)
261273
assert_log_parent(log, parent_span)
274+
275+
276+
def assert_all_metric_attributes(
277+
data_point, operation_name: str, model: str, error_type: str | None = None
278+
):
279+
assert GenAIAttributes.GEN_AI_OPERATION_NAME in data_point.attributes
280+
assert (
281+
data_point.attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]
282+
== operation_name
283+
)
284+
assert GenAIAttributes.GEN_AI_SYSTEM in data_point.attributes
285+
assert (
286+
data_point.attributes[GenAIAttributes.GEN_AI_SYSTEM]
287+
== GenAIAttributes.GenAiSystemValues.AWS_BEDROCK.value
288+
)
289+
assert GenAIAttributes.GEN_AI_REQUEST_MODEL in data_point.attributes
290+
assert data_point.attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] == model
291+
292+
if error_type is not None:
293+
assert ERROR_TYPE in data_point.attributes
294+
assert data_point.attributes[ERROR_TYPE] == error_type
295+
else:
296+
assert ERROR_TYPE not in data_point.attributes
297+
298+
299+
def assert_metrics(
300+
resource_metrics: ResourceMetrics,
301+
operation_name: str,
302+
model: str,
303+
input_tokens: float | None = None,
304+
output_tokens: float | None = None,
305+
error_type: str | None = None,
306+
):
307+
assert len(resource_metrics) == 1
308+
309+
metric_data = resource_metrics[0].scope_metrics[0].metrics
310+
if input_tokens is not None or output_tokens is not None:
311+
expected_metrics_data_len = 2
312+
else:
313+
expected_metrics_data_len = 1
314+
assert len(metric_data) == expected_metrics_data_len
315+
316+
duration_metric = next(
317+
(m for m in metric_data if m.name == GEN_AI_CLIENT_OPERATION_DURATION),
318+
None,
319+
)
320+
assert duration_metric is not None
321+
322+
duration_point = duration_metric.data.data_points[0]
323+
assert duration_point.sum > 0
324+
assert_all_metric_attributes(
325+
duration_point, operation_name, model, error_type
326+
)
327+
assert duration_point.explicit_bounds == tuple(
328+
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
329+
)
330+
331+
if input_tokens is not None:
332+
token_usage_metric = next(
333+
(m for m in metric_data if m.name == GEN_AI_CLIENT_TOKEN_USAGE),
334+
None,
335+
)
336+
assert token_usage_metric is not None
337+
338+
input_token_usage = next(
339+
(
340+
d
341+
for d in token_usage_metric.data.data_points
342+
if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
343+
== GenAIAttributes.GenAiTokenTypeValues.INPUT.value
344+
),
345+
None,
346+
)
347+
assert input_token_usage is not None
348+
assert input_token_usage.sum == input_tokens
349+
350+
assert input_token_usage.explicit_bounds == tuple(
351+
_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS
352+
)
353+
assert_all_metric_attributes(input_token_usage, operation_name, model)
354+
355+
if output_tokens is not None:
356+
token_usage_metric = next(
357+
(m for m in metric_data if m.name == GEN_AI_CLIENT_TOKEN_USAGE),
358+
None,
359+
)
360+
assert token_usage_metric is not None
361+
362+
output_token_usage = next(
363+
(
364+
d
365+
for d in token_usage_metric.data.data_points
366+
if d.attributes[GenAIAttributes.GEN_AI_TOKEN_TYPE]
367+
== GenAIAttributes.GenAiTokenTypeValues.COMPLETION.value
368+
),
369+
None,
370+
)
371+
assert output_token_usage is not None
372+
assert output_token_usage.sum == output_tokens
373+
374+
assert output_token_usage.explicit_bounds == tuple(
375+
_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS
376+
)
377+
assert_all_metric_attributes(output_token_usage, operation_name, model)

‎instrumentation/opentelemetry-instrumentation-botocore/tests/conftest.py

+29-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@
1717
InMemoryLogExporter,
1818
SimpleLogRecordProcessor,
1919
)
20+
from opentelemetry.sdk.metrics import (
21+
MeterProvider,
22+
)
23+
from opentelemetry.sdk.metrics.export import (
24+
InMemoryMetricReader,
25+
)
2026
from opentelemetry.sdk.trace import TracerProvider
2127
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
2228
from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
@@ -36,6 +42,12 @@ def fixture_log_exporter():
3642
yield exporter
3743

3844

45+
@pytest.fixture(scope="function", name="metric_reader")
46+
def fixture_metric_reader():
47+
reader = InMemoryMetricReader()
48+
yield reader
49+
50+
3951
@pytest.fixture(scope="function", name="tracer_provider")
4052
def fixture_tracer_provider(span_exporter):
4153
provider = TracerProvider()
@@ -52,6 +64,15 @@ def fixture_event_logger_provider(log_exporter):
5264
return event_logger_provider
5365

5466

67+
@pytest.fixture(scope="function", name="meter_provider")
68+
def fixture_meter_provider(metric_reader):
69+
meter_provider = MeterProvider(
70+
metric_readers=[metric_reader],
71+
)
72+
73+
return meter_provider
74+
75+
5576
@pytest.fixture
5677
def bedrock_runtime_client():
5778
return boto3.client("bedrock-runtime")
@@ -81,7 +102,9 @@ def vcr_config():
81102

82103

83104
@pytest.fixture(scope="function")
84-
def instrument_no_content(tracer_provider, event_logger_provider):
105+
def instrument_no_content(
106+
tracer_provider, event_logger_provider, meter_provider
107+
):
85108
os.environ.update(
86109
{OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "False"}
87110
)
@@ -90,6 +113,7 @@ def instrument_no_content(tracer_provider, event_logger_provider):
90113
instrumentor.instrument(
91114
tracer_provider=tracer_provider,
92115
event_logger_provider=event_logger_provider,
116+
meter_provider=meter_provider,
93117
)
94118

95119
yield instrumentor
@@ -98,14 +122,17 @@ def instrument_no_content(tracer_provider, event_logger_provider):
98122

99123

100124
@pytest.fixture(scope="function")
101-
def instrument_with_content(tracer_provider, event_logger_provider):
125+
def instrument_with_content(
126+
tracer_provider, event_logger_provider, meter_provider
127+
):
102128
os.environ.update(
103129
{OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "True"}
104130
)
105131
instrumentor = BotocoreInstrumentor()
106132
instrumentor.instrument(
107133
tracer_provider=tracer_provider,
108134
event_logger_provider=event_logger_provider,
135+
meter_provider=meter_provider,
109136
)
110137

111138
yield instrumentor

‎instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py

+40
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
assert_completion_attributes_from_streaming_body,
3535
assert_converse_completion_attributes,
3636
assert_message_in_logs,
37+
assert_metrics,
3738
assert_stream_completion_attributes,
3839
)
3940

@@ -51,9 +52,11 @@ def filter_message_keys(message, keys):
5152
def test_converse_with_content(
5253
span_exporter,
5354
log_exporter,
55+
metric_reader,
5456
bedrock_runtime_client,
5557
instrument_with_content,
5658
):
59+
# pylint:disable=too-many-locals
5760
messages = [{"role": "user", "content": [{"text": "Say this is a test"}]}]
5861

5962
llm_model_value = "amazon.titan-text-lite-v1"
@@ -95,6 +98,13 @@ def test_converse_with_content(
9598
}
9699
assert_message_in_logs(logs[1], "gen_ai.choice", choice_body, span)
97100

101+
input_tokens = response["usage"]["inputTokens"]
102+
output_tokens = response["usage"]["outputTokens"]
103+
metrics = metric_reader.get_metrics_data().resource_metrics
104+
assert_metrics(
105+
metrics, "chat", llm_model_value, input_tokens, output_tokens
106+
)
107+
98108

99109
@pytest.mark.skipif(
100110
BOTO3_VERSION < (1, 35, 56), reason="Converse API not available"
@@ -103,6 +113,7 @@ def test_converse_with_content(
103113
def test_converse_with_content_different_events(
104114
span_exporter,
105115
log_exporter,
116+
metric_reader,
106117
bedrock_runtime_client,
107118
instrument_with_content,
108119
):
@@ -150,6 +161,13 @@ def test_converse_with_content_different_events(
150161
}
151162
assert_message_in_logs(logs[4], "gen_ai.choice", choice_body, span)
152163

164+
input_tokens = response["usage"]["inputTokens"]
165+
output_tokens = response["usage"]["outputTokens"]
166+
metrics = metric_reader.get_metrics_data().resource_metrics
167+
assert_metrics(
168+
metrics, "chat", llm_model_value, input_tokens, output_tokens
169+
)
170+
153171

154172
def converse_tool_call(
155173
span_exporter, log_exporter, bedrock_runtime_client, expect_content
@@ -452,6 +470,7 @@ def test_converse_tool_call_no_content(
452470
def test_converse_with_invalid_model(
453471
span_exporter,
454472
log_exporter,
473+
metric_reader,
455474
bedrock_runtime_client,
456475
instrument_with_content,
457476
):
@@ -479,6 +498,11 @@ def test_converse_with_invalid_model(
479498
user_content = filter_message_keys(messages[0], ["content"])
480499
assert_message_in_logs(logs[0], "gen_ai.user.message", user_content, span)
481500

501+
metrics = metric_reader.get_metrics_data().resource_metrics
502+
assert_metrics(
503+
metrics, "chat", llm_model_value, error_type="ValidationException"
504+
)
505+
482506

483507
@pytest.mark.skipif(
484508
BOTO3_VERSION < (1, 35, 56), reason="ConverseStream API not available"
@@ -487,6 +511,7 @@ def test_converse_with_invalid_model(
487511
def test_converse_stream_with_content(
488512
span_exporter,
489513
log_exporter,
514+
metric_reader,
490515
bedrock_runtime_client,
491516
instrument_with_content,
492517
):
@@ -553,6 +578,11 @@ def test_converse_stream_with_content(
553578
}
554579
assert_message_in_logs(logs[1], "gen_ai.choice", choice_body, span)
555580

581+
metrics = metric_reader.get_metrics_data().resource_metrics
582+
assert_metrics(
583+
metrics, "chat", llm_model_value, input_tokens, output_tokens
584+
)
585+
556586

557587
@pytest.mark.skipif(
558588
BOTO3_VERSION < (1, 35, 56), reason="ConverseStream API not available"
@@ -561,6 +591,7 @@ def test_converse_stream_with_content(
561591
def test_converse_stream_with_content_different_events(
562592
span_exporter,
563593
log_exporter,
594+
metric_reader,
564595
bedrock_runtime_client,
565596
instrument_with_content,
566597
):
@@ -614,6 +645,9 @@ def test_converse_stream_with_content_different_events(
614645
}
615646
assert_message_in_logs(logs[4], "gen_ai.choice", choice_body, span)
616647

648+
metrics = metric_reader.get_metrics_data().resource_metrics
649+
assert_metrics(metrics, "chat", llm_model_value, mock.ANY, mock.ANY)
650+
617651

618652
def _rebuild_stream_message(response):
619653
message = {"content": []}
@@ -986,6 +1020,7 @@ def test_converse_stream_no_content_tool_call(
9861020
def test_converse_stream_handles_event_stream_error(
9871021
span_exporter,
9881022
log_exporter,
1023+
metric_reader,
9891024
bedrock_runtime_client,
9901025
instrument_with_content,
9911026
):
@@ -1039,6 +1074,11 @@ def test_converse_stream_handles_event_stream_error(
10391074
user_content = filter_message_keys(messages[0], ["content"])
10401075
assert_message_in_logs(logs[0], "gen_ai.user.message", user_content, span)
10411076

1077+
metrics = metric_reader.get_metrics_data().resource_metrics
1078+
assert_metrics(
1079+
metrics, "chat", llm_model_value, error_type="EventStreamError"
1080+
)
1081+
10421082

10431083
@pytest.mark.skipif(
10441084
BOTO3_VERSION < (1, 35, 56), reason="ConverseStream API not available"

0 commit comments

Comments
 (0)
Please sign in to comment.