Skip to content

Commit 15a04fd

Browse files
committed
Feat: Added Operation and GFE Metrics
1 parent 87a697f commit 15a04fd

13 files changed

+291
-58
lines changed

google/cloud/spanner_v1/_opentelemetry_tracing.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
except ImportError:
3434
HAS_OPENTELEMETRY_INSTALLED = False
3535

36+
from google.cloud.spanner_v1.metrics.metrics_capture import MetricsCapture
37+
3638
TRACER_NAME = "cloud.google.com/python/spanner"
3739
TRACER_VERSION = gapic_version.__version__
3840
extended_tracing_globally_disabled = (
@@ -107,26 +109,27 @@ def trace_call(name, session=None, extra_attributes=None, observability_options=
107109
with tracer.start_as_current_span(
108110
name, kind=trace.SpanKind.CLIENT, attributes=attributes
109111
) as span:
110-
try:
111-
yield span
112-
except Exception as error:
113-
span.set_status(Status(StatusCode.ERROR, str(error)))
114-
# OpenTelemetry-Python imposes invoking span.record_exception on __exit__
115-
# on any exception. We should file a bug later on with them to only
116-
# invoke .record_exception if not already invoked, hence we should not
117-
# invoke .record_exception on our own else we shall have 2 exceptions.
118-
raise
119-
else:
120-
# All spans still have set_status available even if for example
121-
# NonRecordingSpan doesn't have "_status".
122-
absent_span_status = getattr(span, "_status", None) is None
123-
if absent_span_status or span._status.status_code == StatusCode.UNSET:
124-
# OpenTelemetry-Python only allows a status change
125-
# if the current code is UNSET or ERROR. At the end
126-
# of the generator's consumption, only set it to OK
127-
# it wasn't previously set otherwise.
128-
# https://github.com/googleapis/python-spanner/issues/1246
129-
span.set_status(Status(StatusCode.OK))
112+
with MetricsCapture():
113+
try:
114+
yield span
115+
except Exception as error:
116+
span.set_status(Status(StatusCode.ERROR, str(error)))
117+
# OpenTelemetry-Python imposes invoking span.record_exception on __exit__
118+
# on any exception. We should file a bug later on with them to only
119+
# invoke .record_exception if not already invoked, hence we should not
120+
# invoke .record_exception on our own else we shall have 2 exceptions.
121+
raise
122+
else:
123+
# All spans still have set_status available even if for example
124+
# NonRecordingSpan doesn't have "_status".
125+
absent_span_status = getattr(span, "_status", None) is None
126+
if absent_span_status or span._status.status_code == StatusCode.UNSET:
127+
# OpenTelemetry-Python only allows a status change
128+
# if the current code is UNSET or ERROR. At the end
129+
# of the generator's consumption, only set it to OK
130+
# it wasn't previously set otherwise.
131+
# https://github.com/googleapis/python-spanner/issues/1246
132+
span.set_status(Status(StatusCode.OK))
130133

131134

132135
def get_current_span():

google/cloud/spanner_v1/metrics/constants.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# Copyright 2025 Google LLC
1+
# -*- coding: utf-8 -*-
2+
# Copyright 2025 Google LLC
23
#
34
# Licensed under the Apache License, Version 2.0 (the "License");
45
# you may not use this file except in compliance with the License.
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# -*- coding: utf-8 -*-
2+
# Copyright 2025 Google LLC All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""
16+
This module provides functionality for capturing metrics in Cloud Spanner operations.
17+
18+
It includes a context manager class, MetricsCapture, which automatically handles the
19+
start and completion of metrics tracing for a given operation. This ensures that metrics
20+
are consistently recorded for Cloud Spanner operations, facilitating observability and
21+
performance monitoring.
22+
"""
23+
24+
from .spanner_metrics_tracer_factory import SpannerMetricsTracerFactory
25+
26+
27+
class MetricsCapture:
28+
"""Context manager for capturing metrics in Cloud Spanner operations.
29+
30+
This class provides a context manager interface to automatically handle
31+
the start and completion of metrics tracing for a given operation.
32+
"""
33+
34+
def __enter__(self):
35+
"""Enter the runtime context related to this object.
36+
37+
This method initializes a new metrics tracer for the operation and
38+
records the start of the operation.
39+
40+
Returns:
41+
MetricsCapture: The instance of the context manager.
42+
"""
43+
factory = SpannerMetricsTracerFactory()
44+
45+
# Define a new metrics tracer for the new operation
46+
SpannerMetricsTracerFactory.current_metrics_tracer = (
47+
factory.create_metrics_tracer()
48+
)
49+
SpannerMetricsTracerFactory.current_metrics_tracer.record_operation_start()
50+
return self
51+
52+
def __exit__(self, exc_type, exc_value, traceback):
53+
"""Exit the runtime context related to this object.
54+
55+
This method records the completion of the operation. If an exception
56+
occurred, it will be propagated after the metrics are recorded.
57+
58+
Args:
59+
exc_type (Type[BaseException]): The exception type.
60+
exc_value (BaseException): The exception value.
61+
traceback (TracebackType): The traceback object.
62+
63+
Returns:
64+
bool: False to propagate the exception if any occurred.
65+
"""
66+
SpannerMetricsTracerFactory.current_metrics_tracer.record_operation_completion()
67+
return False # Propagate the exception if any

google/cloud/spanner_v1/metrics/metrics_exporter.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
# Copyright 2025 Google LLC
23
#
34
# Licensed under the Apache License, Version 2.0 (the "License");

google/cloud/spanner_v1/metrics/metrics_interceptor.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
GOOGLE_CLOUD_RESOURCE_KEY,
2121
SPANNER_METHOD_PREFIX,
2222
)
23+
2324
from typing import Dict
2425
from .spanner_metrics_tracer_factory import SpannerMetricsTracerFactory
2526
import re
@@ -143,4 +144,11 @@ def intercept(self, invoked_method, request_or_iterator, call_details):
143144
response = invoked_method(request_or_iterator, call_details)
144145
SpannerMetricsTracerFactory.current_metrics_tracer.record_attempt_completion()
145146

147+
# Process and send GFE metrics if enabled
148+
if SpannerMetricsTracerFactory.current_metrics_tracer.gfe_enabled:
149+
metadata = response.initial_metadata()
150+
SpannerMetricsTracerFactory.current_metrics_trace.record_gfe_metrics(
151+
metadata
152+
)
153+
146154
return response

google/cloud/spanner_v1/metrics/metrics_tracer.py

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ class MetricAttemptTracer:
5656
direct_path_used: bool
5757
status: str
5858

59-
def __init__(self):
59+
def __init__(self) -> None:
6060
"""
6161
Initialize a MetricAttemptTracer instance with default values.
6262
@@ -181,8 +181,11 @@ class should not have any knowledge about the observability framework used for m
181181
_instrument_attempt_latency: "Histogram"
182182
_instrument_operation_counter: "Counter"
183183
_instrument_operation_latency: "Histogram"
184+
_instrument_gfe_latency: "Histogram"
185+
_instrument_gfe_missing_header_count: "Counter"
184186
current_op: MetricOpTracer
185187
enabled: bool
188+
gfe_enabled: bool
186189
method: str
187190

188191
def __init__(
@@ -193,21 +196,23 @@ def __init__(
193196
instrument_operation_latency: "Histogram",
194197
instrument_operation_counter: "Counter",
195198
client_attributes: Dict[str, str],
199+
gfe_enabled: bool = False,
196200
):
197201
"""
198202
Initialize a MetricsTracer instance with the given parameters.
199203
200-
This constructor initializes a MetricsTracer instance with the provided method name, enabled status, direct path enabled status,
201-
instrumented metrics for attempt latency, attempt counter, operation latency, operation counter, and client attributes.
202-
It sets up the necessary metrics tracing infrastructure for recording metrics related to RPC operations.
204+
This constructor sets up a MetricsTracer instance with the specified parameters, including the enabled status,
205+
instruments for measuring and counting attempt and operation metrics, and client attributes. It prepares the
206+
infrastructure needed for recording metrics related to RPC operations.
203207
204208
Args:
205-
enabled (bool): A flag indicating if metrics tracing is enabled.
206-
instrument_attempt_latency (Histogram): The instrument for measuring attempt latency.
207-
instrument_attempt_counter (Counter): The instrument for counting attempts.
208-
instrument_operation_latency (Histogram): The instrument for measuring operation latency.
209-
instrument_operation_counter (Counter): The instrument for counting operations.
210-
client_attributes (dict[str, str]): A dictionary of client attributes used for metrics tracing.
209+
enabled (bool): Indicates if metrics tracing is enabled.
210+
instrument_attempt_latency (Histogram): Instrument for measuring attempt latency.
211+
instrument_attempt_counter (Counter): Instrument for counting attempts.
212+
instrument_operation_latency (Histogram): Instrument for measuring operation latency.
213+
instrument_operation_counter (Counter): Instrument for counting operations.
214+
client_attributes (Dict[str, str]): Dictionary of client attributes used for metrics tracing.
215+
gfe_enabled (bool, optional): Indicates if GFE metrics are enabled. Defaults to False.
211216
"""
212217
self.current_op = MetricOpTracer()
213218
self._client_attributes = client_attributes
@@ -216,6 +221,7 @@ def __init__(
216221
self._instrument_operation_latency = instrument_operation_latency
217222
self._instrument_operation_counter = instrument_operation_counter
218223
self.enabled = enabled
224+
self.gfe_enabled = gfe_enabled
219225

220226
@staticmethod
221227
def _get_ms_time_diff(start: datetime, end: datetime) -> float:
@@ -322,7 +328,7 @@ def record_attempt_completion(self, status: str = StatusCode.OK.name) -> None:
322328
323329
If metrics tracing is not enabled, this method does not perform any operations.
324330
"""
325-
if not self.enabled:
331+
if not self.enabled or not HAS_OPENTELEMETRY_INSTALLED:
326332
return
327333
self.current_op.current_attempt.status = status
328334

@@ -347,7 +353,7 @@ def record_operation_start(self) -> None:
347353
It is used to track the start time of an operation, which is essential for calculating operation latency and other metrics.
348354
If metrics tracing is not enabled, this method does not perform any operations.
349355
"""
350-
if not self.enabled:
356+
if not self.enabled or not HAS_OPENTELEMETRY_INSTALLED:
351357
return
352358
self.current_op.start()
353359

@@ -360,7 +366,7 @@ def record_operation_completion(self) -> None:
360366
Additionally, it increments the operation count and records the attempt count for the operation.
361367
If metrics tracing is not enabled, this method does not perform any operations.
362368
"""
363-
if not self.enabled:
369+
if not self.enabled or not HAS_OPENTELEMETRY_INSTALLED:
364370
return
365371
end_time = datetime.now()
366372
# Build Attributes
@@ -385,14 +391,37 @@ def record_operation_completion(self) -> None:
385391
self.current_op.attempt_count, attributes=attempt_attributes
386392
)
387393

394+
def record_gfe_latency(self, latency: int) -> None:
395+
"""
396+
Records the GFE latency using the Histogram instrument.
397+
398+
Args:
399+
latency (int): The latency duration to be recorded.
400+
"""
401+
if not self.enabled or not HAS_OPENTELEMETRY_INSTALLED or not self.gfe_enabled:
402+
return
403+
self._instrument_gfe_latency.record(
404+
amount=latency, attributes=self.client_attributes
405+
)
406+
407+
def record_gfe_missing_header_count(self) -> None:
408+
"""
409+
Increments the counter for missing GFE headers.
410+
"""
411+
if not self.enabled or not HAS_OPENTELEMETRY_INSTALLED or not self.gfe_enabled:
412+
return
413+
self._instrument_gfe_missing_header_count.add(
414+
amount=1, attributes=self.client_attributes
415+
)
416+
388417
def _create_operation_otel_attributes(self) -> dict:
389418
"""
390419
Create additional attributes for operation metrics tracing.
391420
392421
This method populates the client attributes dictionary with the operation status if metrics tracing is enabled.
393422
It returns the updated client attributes dictionary.
394423
"""
395-
if not self.enabled:
424+
if not self.enabled or not HAS_OPENTELEMETRY_INSTALLED:
396425
return {}
397426
attributes = self._client_attributes.copy()
398427
attributes[METRIC_LABEL_KEY_STATUS] = self.current_op.status
@@ -405,7 +434,7 @@ def _create_attempt_otel_attributes(self) -> dict:
405434
This method populates the attributes dictionary with the attempt status if metrics tracing is enabled and an attempt exists.
406435
It returns the updated attributes dictionary.
407436
"""
408-
if not self.enabled:
437+
if not self.enabled or not HAS_OPENTELEMETRY_INSTALLED:
409438
return {}
410439

411440
attributes = self._client_attributes.copy()

google/cloud/spanner_v1/metrics/metrics_tracer_factory.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
METRIC_LABEL_KEY_DATABASE,
3333
METRIC_LABEL_KEY_DIRECT_PATH_ENABLED,
3434
BUILT_IN_METRICS_METER_NAME,
35+
METRIC_NAME_GFE_LATENCY,
36+
METRIC_NAME_GFE_MISSING_HEADER_COUNT,
3537
)
3638

3739
from typing import Dict
@@ -50,10 +52,13 @@ class MetricsTracerFactory:
5052
"""Factory class for creating MetricTracer instances. This class facilitates the creation of MetricTracer objects, which are responsible for collecting and tracing metrics."""
5153

5254
enabled: bool
55+
gfe_enabled: bool
5356
_instrument_attempt_latency: "Histogram"
5457
_instrument_attempt_counter: "Counter"
5558
_instrument_operation_latency: "Histogram"
5659
_instrument_operation_counter: "Counter"
60+
_instrument_gfe_latency: "Histogram"
61+
_instrument_gfe_missing_header_count: "Counter"
5762
_client_attributes: Dict[str, str]
5863

5964
@property
@@ -307,3 +312,15 @@ def _create_metric_instruments(self, service_name: str) -> None:
307312
unit="1",
308313
description="Number of operations.",
309314
)
315+
316+
self._instrument_gfe_latency = meter.create_histogram(
317+
name=METRIC_NAME_GFE_LATENCY,
318+
unit="ms",
319+
description="GFE Latency.",
320+
)
321+
322+
self._instrument_gfe_missing_header_count = meter.create_counter(
323+
name=METRIC_NAME_GFE_MISSING_HEADER_COUNT,
324+
unit="1",
325+
description="GFE missing header count.",
326+
)

0 commit comments

Comments
 (0)