diff --git a/CHANGELOG.md b/CHANGELOG.md index 93f0cab4fc..6177babe1a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#4360](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4360)) - `opentelemetry-instrumentation-aiohttp-server`: Use `canonical` attribute of the `Resource` as a span name ([#3896](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3896)) +- `opentelemetry-instrumentation-flask`: Wrap `wsgi_app` call in try/finally so the `http.server.active_requests` gauge is decremented when a request raises, preventing a long-term gauge drift + ([#4433](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4433)) ### Breaking changes diff --git a/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py b/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py index 02c6ea693f..269df59f94 100644 --- a/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py @@ -416,48 +416,50 @@ def _start_response(status, response_headers, *args, **kwargs): response_hook(span, status, response_headers) return start_response(status, response_headers, *args, **kwargs) - result = wsgi_app(wrapped_app_environ, _start_response) - - # Note: Streaming response context cleanup is now handled in the Flask teardown function - # (_wrapped_teardown_request) to ensure proper cleanup following Logfire's recommendations - # for OpenTelemetry generator context management - - if should_trace: - duration_s = default_timer() - start - # Get the span from wrapped_app_environ and re-create context manually - # to pass to histogram for exemplars generation - span = wrapped_app_environ.get(_ENVIRON_SPAN_KEY) - metrics_context = trace.set_span_in_context(span) - - if duration_histogram_old: - duration_attrs_old = otel_wsgi._parse_duration_attrs( - attributes, _StabilityMode.DEFAULT - ) + try: + result = wsgi_app(wrapped_app_environ, _start_response) - if request_route: - # http.target to be included in old semantic conventions - duration_attrs_old[HTTP_TARGET] = str(request_route) - duration_histogram_old.record( - max(round(duration_s * 1000), 0), - duration_attrs_old, - context=metrics_context, - ) - if duration_histogram_new: - duration_attrs_new = otel_wsgi._parse_duration_attrs( - attributes, _StabilityMode.HTTP - ) + # Note: Streaming response context cleanup is now handled in the Flask teardown function + # (_wrapped_teardown_request) to ensure proper cleanup following Logfire's recommendations + # for OpenTelemetry generator context management + + if should_trace: + duration_s = default_timer() - start + # Get the span from wrapped_app_environ and re-create context manually + # to pass to histogram for exemplars generation + span = wrapped_app_environ.get(_ENVIRON_SPAN_KEY) + metrics_context = trace.set_span_in_context(span) + + if duration_histogram_old: + duration_attrs_old = otel_wsgi._parse_duration_attrs( + attributes, _StabilityMode.DEFAULT + ) + + if request_route: + # http.target to be included in old semantic conventions + duration_attrs_old[HTTP_TARGET] = str(request_route) + duration_histogram_old.record( + max(round(duration_s * 1000), 0), + duration_attrs_old, + context=metrics_context, + ) + if duration_histogram_new: + duration_attrs_new = otel_wsgi._parse_duration_attrs( + attributes, _StabilityMode.HTTP + ) - if request_route: - duration_attrs_new[HTTP_ROUTE] = str(request_route) + if request_route: + duration_attrs_new[HTTP_ROUTE] = str(request_route) - duration_histogram_new.record( - max(duration_s, 0), - duration_attrs_new, - context=metrics_context, - ) + duration_histogram_new.record( + max(duration_s, 0), + duration_attrs_new, + context=metrics_context, + ) - active_requests_counter.add(-1, active_requests_count_attrs) - return result + return result + finally: + active_requests_counter.add(-1, active_requests_count_attrs) def _should_trace(excluded_urls) -> bool: return bool( diff --git a/instrumentation/opentelemetry-instrumentation-flask/tests/test_programmatic.py b/instrumentation/opentelemetry-instrumentation-flask/tests/test_programmatic.py index 039ae9db83..eb3d4f6e1f 100644 --- a/instrumentation/opentelemetry-instrumentation-flask/tests/test_programmatic.py +++ b/instrumentation/opentelemetry-instrumentation-flask/tests/test_programmatic.py @@ -836,6 +836,41 @@ def test_duration_histogram_new_record_with_context_new_semconv(self): span_arg.context.span_id, finished_span.context.span_id ) + def test_active_requests_counter_decremented_on_error(self): + """Regression test for https://github.com/open-telemetry/opentelemetry-python-contrib/issues/4431. + http.server.active_requests must be decremented even when the view + raises an exception, so it does not permanently leak. + """ + self.client.get("/hello/123") + + resp = self.client.get("/hello/500") + self.assertEqual(500, resp.status_code) + + metrics_list = self.memory_metrics_reader.get_metrics_data() + active_requests_value = None + for resource_metric in metrics_list.resource_metrics: + for scope_metric in resource_metric.scope_metrics: + for metric in scope_metric.metrics: + if metric.name != "http.server.active_requests": + continue + points = [ + p.value + for p in metric.data.data_points + if isinstance(p, NumberDataPoint) + ] + if points: + active_requests_value = points[-1] + + self.assertIsNotNone( + active_requests_value, + "http.server.active_requests metric not found", + ) + self.assertEqual( + 0, + active_requests_value, + f"active_requests counter leaked: expected 0 but got {active_requests_value}", + ) + class TestProgrammaticHooks(InstrumentationTest, WsgiTestBase): def setUp(self):