diff --git a/tracer/src/Datadog.Trace.Trimming/build/Datadog.Trace.Trimming.xml b/tracer/src/Datadog.Trace.Trimming/build/Datadog.Trace.Trimming.xml index b668fe308d32..9fe755988489 100644 --- a/tracer/src/Datadog.Trace.Trimming/build/Datadog.Trace.Trimming.xml +++ b/tracer/src/Datadog.Trace.Trimming/build/Datadog.Trace.Trimming.xml @@ -428,7 +428,6 @@ - diff --git a/tracer/src/Datadog.Trace/Configuration/TracerSettings.cs b/tracer/src/Datadog.Trace/Configuration/TracerSettings.cs index 115286d0b39b..ba85f1362c97 100644 --- a/tracer/src/Datadog.Trace/Configuration/TracerSettings.cs +++ b/tracer/src/Datadog.Trace/Configuration/TracerSettings.cs @@ -250,10 +250,17 @@ not null when string.Equals(x, "http/protobuf", StringComparison.OrdinalIgnoreCa }, validator: null); + var otlpGeneralProtocolName = OtlpGeneralProtocol switch + { + OtlpProtocol.Grpc => "grpc", + OtlpProtocol.HttpProtobuf => "http/protobuf", + _ => "grpc", + }; + OtlpMetricsProtocol = config .WithKeys(ConfigurationKeys.OpenTelemetry.ExporterOtlpMetricsProtocol) .GetAs( - defaultValue: new(OtlpProtocol.Grpc, "grpc"), + defaultValue: new(OtlpGeneralProtocol, otlpGeneralProtocolName), converter: x => x switch { not null when string.Equals(x, "grpc", StringComparison.OrdinalIgnoreCase) => OtlpProtocol.Grpc, @@ -312,7 +319,7 @@ not null when string.Equals(x, "lowmemory", StringComparison.OrdinalIgnoreCase) OtlpLogsProtocol = config .WithKeys(ConfigurationKeys.OpenTelemetry.ExporterOtlpLogsProtocol) .GetAs( - defaultValue: new(OtlpProtocol.Grpc, "grpc"), + defaultValue: new(OtlpGeneralProtocol, otlpGeneralProtocolName), converter: x => x switch { not null when string.Equals(x, "grpc", StringComparison.OrdinalIgnoreCase) => OtlpProtocol.Grpc, diff --git a/tracer/src/Datadog.Trace/Logging/DirectSubmission/Sink/OtlpSubmissionLogSink.cs b/tracer/src/Datadog.Trace/Logging/DirectSubmission/Sink/OtlpSubmissionLogSink.cs index f5c29b88fe18..b9a40f17852c 100644 --- a/tracer/src/Datadog.Trace/Logging/DirectSubmission/Sink/OtlpSubmissionLogSink.cs +++ b/tracer/src/Datadog.Trace/Logging/DirectSubmission/Sink/OtlpSubmissionLogSink.cs @@ -76,7 +76,13 @@ protected override void DelayEvents(TimeSpan delayUntilNextFlush) public override async Task DisposeAsync() { - await DisposeAsync(true).ConfigureAwait(false); + // Final flush is awaited by base.DisposeAsync() and bounded by the HTTP + // client timeout; Shutdown() just releases the HTTP client resources. + await base.DisposeAsync().ConfigureAwait(false); + if (!_otlpExporter.Shutdown()) + { + _logger.Warning("OTLP exporter shutdown did not complete successfully."); + } } } #endif diff --git a/tracer/src/Datadog.Trace/OpenTelemetry/Logs/IOtlpExporter.cs b/tracer/src/Datadog.Trace/OpenTelemetry/Logs/IOtlpExporter.cs index 23a064c360a3..92d50f3591f1 100644 --- a/tracer/src/Datadog.Trace/OpenTelemetry/Logs/IOtlpExporter.cs +++ b/tracer/src/Datadog.Trace/OpenTelemetry/Logs/IOtlpExporter.cs @@ -26,10 +26,11 @@ internal interface IOtlpExporter Task ExportAsync(IReadOnlyList logs); /// - /// Shuts down the exporter and ensures all pending exports complete. + /// Releases the exporter's HTTP resources. Does not wait on pending exports: + /// the final flush runs synchronously in the sink's DisposeAsync before this is + /// called, bounded by the HTTP client timeout (OTEL_EXPORTER_OTLP_TIMEOUT). /// - /// Maximum time to wait for shutdown /// True if shutdown completed successfully - bool Shutdown(int timeoutMilliseconds); + bool Shutdown(); } #endif diff --git a/tracer/src/Datadog.Trace/OpenTelemetry/Logs/OtlpExporter.cs b/tracer/src/Datadog.Trace/OpenTelemetry/Logs/OtlpExporter.cs index f09877ff0552..e66eb06d0c07 100644 --- a/tracer/src/Datadog.Trace/OpenTelemetry/Logs/OtlpExporter.cs +++ b/tracer/src/Datadog.Trace/OpenTelemetry/Logs/OtlpExporter.cs @@ -129,11 +129,12 @@ public async Task ExportAsync(IReadOnlyList logs) } /// - /// Shuts down the exporter and ensures all pending exports complete. + /// Releases the exporter's HTTP resources. The final flush already ran + /// synchronously in the sink's DisposeAsync (bounded by the HTTP client + /// timeout, OTEL_EXPORTER_OTLP_TIMEOUT), so there is nothing further to wait on. /// - /// Maximum time to wait for shutdown /// True if shutdown completed successfully - public bool Shutdown(int timeoutMilliseconds) + public bool Shutdown() { try { @@ -158,14 +159,11 @@ private HttpClient CreateHttpClient() var httpClient = new HttpClient(tcpHandler) { Timeout = TimeSpan.FromMilliseconds(_timeoutMs), - DefaultRequestVersion = HttpVersion.Version20, - DefaultVersionPolicy = HttpVersionPolicy.RequestVersionOrHigher }; #else var httpClient = new HttpClient { Timeout = TimeSpan.FromMilliseconds(_timeoutMs), - DefaultRequestVersion = HttpVersion.Version20, }; #endif diff --git a/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/InMemoryExporter.cs b/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/InMemoryExporter.cs index 1667103742ef..fddf226cdf98 100644 --- a/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/InMemoryExporter.cs +++ b/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/InMemoryExporter.cs @@ -46,7 +46,7 @@ public override Task ExportAsync(IReadOnlyList metric return Task.FromResult(result); } - public override bool Shutdown(int timeoutMilliseconds) + public override bool Shutdown() { return true; } diff --git a/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/MetricExporter.cs b/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/MetricExporter.cs index 99de5d17d9f4..89ecbd0fe763 100644 --- a/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/MetricExporter.cs +++ b/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/MetricExporter.cs @@ -23,9 +23,11 @@ internal abstract class MetricExporter public abstract Task ExportAsync(IReadOnlyList metrics); /// - /// Shuts down the exporter. + /// Releases exporter resources. The final flush is driven by the caller + /// (MetricReader.StopAsync) and bounded by the export path's own timeout; + /// this method is not expected to block. /// - public abstract bool Shutdown(int timeoutMilliseconds); + public abstract bool Shutdown(); } } #endif diff --git a/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/MetricReader.cs b/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/MetricReader.cs index ff487799da3b..e1961fa76a64 100644 --- a/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/MetricReader.cs +++ b/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/MetricReader.cs @@ -24,7 +24,6 @@ internal sealed class MetricReader { private static readonly IDatadogLogger Log = DatadogLogging.GetLoggerFor(typeof(MetricReader)); private readonly int _exportIntervalMs; - private readonly int _timeoutMs; private readonly MetricReaderHandler _handler; private readonly MetricExporter _exporter; private MeterListener? _listener; @@ -33,7 +32,6 @@ internal sealed class MetricReader public MetricReader(TracerSettings settings, MetricReaderHandler handler, MetricExporter exporter) { _exportIntervalMs = settings.OtelMetricExportIntervalMs; - _timeoutMs = settings.OtlpMetricsTimeoutMs; _handler = handler; _exporter = exporter; } @@ -95,7 +93,7 @@ public async Task StopAsync() } finally { - _exporter.Shutdown(_timeoutMs); + _exporter.Shutdown(); _listener?.Dispose(); _listener = null; Log.Debug("MetricReader stopped"); diff --git a/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/OtlpExporter.cs b/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/OtlpExporter.cs index f195450de373..7163f0065489 100644 --- a/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/OtlpExporter.cs +++ b/tracer/src/Datadog.Trace/OpenTelemetry/Metrics/OtlpExporter.cs @@ -126,11 +126,13 @@ public override async Task ExportAsync(IReadOnlyList } /// - /// Shuts down the exporter and ensures all pending exports complete. + /// Releases the exporter's HTTP resources. The final export already ran + /// synchronously in MetricReader.StopAsync and is bounded by the HTTP + /// request timeout (OTEL_EXPORTER_OTLP_METRICS_TIMEOUT), so there is + /// nothing further to wait on here. /// - /// Maximum time to wait for shutdown /// True if shutdown completed successfully, false otherwise - public override bool Shutdown(int timeoutMilliseconds) + public override bool Shutdown() { try { @@ -146,7 +148,7 @@ public override bool Shutdown(int timeoutMilliseconds) /// /// Creates an HttpClient with Unix Domain Socket support if the endpoint uses unix:// scheme. - /// For TCP/IP endpoints (http:// or https://), creates a standard HttpClient with HTTP/2. + /// For TCP/IP endpoints (http:// or https://), creates a standard HttpClient. /// private static HttpClient CreateHttpClient(Uri endpoint) { @@ -168,11 +170,7 @@ private static HttpClient CreateHttpClient(Uri endpoint) } }; - return new HttpClient(handler) - { - DefaultRequestVersion = HttpVersion.Version20, - DefaultVersionPolicy = HttpVersionPolicy.RequestVersionOrHigher - }; + return new HttpClient(handler); } // Standard TCP/IP endpoint @@ -181,11 +179,7 @@ private static HttpClient CreateHttpClient(Uri endpoint) AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate }; - return new HttpClient(tcpHandler) - { - DefaultRequestVersion = HttpVersion.Version20, - DefaultVersionPolicy = HttpVersionPolicy.RequestVersionOrHigher - }; + return new HttpClient(tcpHandler); } private async Task SendOtlpRequest(IReadOnlyList metrics) diff --git a/tracer/test/Datadog.Trace.ClrProfiler.IntegrationTests/OpenTelemetrySdkTests.cs b/tracer/test/Datadog.Trace.ClrProfiler.IntegrationTests/OpenTelemetrySdkTests.cs index c43f64f4cb3f..9c7cf8b1fc1b 100644 --- a/tracer/test/Datadog.Trace.ClrProfiler.IntegrationTests/OpenTelemetrySdkTests.cs +++ b/tracer/test/Datadog.Trace.ClrProfiler.IntegrationTests/OpenTelemetrySdkTests.cs @@ -6,6 +6,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Net.Http; using System.Text.RegularExpressions; using System.Threading.Tasks; using Datadog.Trace.Configuration; @@ -254,10 +255,7 @@ public async Task SubmitsOtlpTraces(string packageVersion, string datadogTracesE var testAgentHost = Environment.GetEnvironmentVariable("TEST_AGENT_HOST") ?? "localhost"; var otlpPort = protocol == "grpc" ? 4317 : 4318; - using (var httpClient = new System.Net.Http.HttpClient()) - { - await httpClient.GetAsync($"http://{testAgentHost}:4318/test/session/clear"); - } + await ClearTestAgentSession(testAgentHost); // This is the key configuration that is set differently from previous test cases: // OTEL_TRACES_EXPORTER=otlp enables the DD SDK to emit traces (and trace stats) via OTLP @@ -281,6 +279,16 @@ public async Task SubmitsOtlpTraces(string packageVersion, string datadogTracesE var applicationStartTimeUnixNano = DateTimeOffset.UtcNow.ToUnixTimeNanoseconds(); using var agent = EnvironmentHelper.GetMockAgent(); + // When DD_AGENT_HOST=test-agent is set above, it also redirects the APM trace agent + // URL via the DD_TRACE_AGENT_HOSTNAME alias (the primary key wins). That points APM + // traces at test-agent:, which does not exist, so AgentWriter + // retries fill the tracer's shutdown window and can starve the DirectLogSubmission + // final flush. Pin the APM URL back to the in-process MockAgent. + if (useAgentHostBackup && agent is MockTracerAgent.TcpUdpAgent tcpAgent) + { + SetEnvironmentVariable("DD_TRACE_AGENT_URL", $"http://127.0.0.1:{tcpAgent.Port}"); + } + using (await RunSampleAndWaitForExit(agent, packageVersion: packageVersion ?? "1.13.1")) { using var httpClient = new System.Net.Http.HttpClient(); @@ -497,8 +505,6 @@ await Verifier.Verify(finalJson, settings) #if NET6_0_OR_GREATER [SkippableTheory] - [Trait("SkipInCI", "True")] - [Flaky("New test agent seems to not always be ready", maxRetries: 3)] [Trait("Category", "EndToEnd")] [MemberData(nameof(GetOtlpTestData))] public async Task SubmitsOtlpMetrics(string packageVersion, string datadogMetricsEnabled, string otelMetricsEnabled, string protocol, bool useAgentHostBackup) @@ -519,10 +525,7 @@ public async Task SubmitsOtlpMetrics(string packageVersion, string datadogMetric var testAgentHost = Environment.GetEnvironmentVariable("TEST_AGENT_HOST") ?? "localhost"; var otlpPort = protocol == "grpc" ? 4317 : 4318; - using (var httpClient = new System.Net.Http.HttpClient()) - { - await httpClient.GetAsync($"http://{testAgentHost}:4318/test/session/clear"); - } + await ClearTestAgentSession(testAgentHost); SetEnvironmentVariable("DD_ENV", string.Empty); SetEnvironmentVariable("DD_SERVICE", string.Empty); @@ -530,7 +533,8 @@ public async Task SubmitsOtlpMetrics(string packageVersion, string datadogMetric SetEnvironmentVariable("DD_METRICS_OTEL_ENABLED", datadogMetricsEnabled); SetEnvironmentVariable("OTEL_METRICS_EXPORTER_ENABLED", otelMetricsEnabled); SetEnvironmentVariable("OTEL_EXPORTER_OTLP_PROTOCOL", protocol); - SetEnvironmentVariable("OTEL_METRIC_EXPORT_INTERVAL", "1000"); + // 60s so only the shutdown flush fires; periodic exports of observable instruments produce duplicate batches that break snapshot comparison + SetEnvironmentVariable("OTEL_METRIC_EXPORT_INTERVAL", "60000"); if (useAgentHostBackup) { @@ -545,15 +549,16 @@ public async Task SubmitsOtlpMetrics(string packageVersion, string datadogMetric SetEnvironmentVariable("OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE", runtimeMajor >= 9 ? "delta" : "cumulative"); using var agent = EnvironmentHelper.GetMockAgent(); - using (await RunSampleAndWaitForExit(agent, packageVersion: packageVersion ?? "1.13.1")) + // See comment in SubmitsOtlpTraces. DD_AGENT_HOST=test-agent also redirects the APM + // trace agent URL; pin it back to the in-process MockAgent. + if (useAgentHostBackup && agent is MockTracerAgent.TcpUdpAgent tcpAgent) { - using var httpClient = new System.Net.Http.HttpClient(); - var metricsResponse = await httpClient.GetAsync($"http://{testAgentHost}:4318/test/session/metrics"); - metricsResponse.EnsureSuccessStatusCode(); - - var metricsJson = await metricsResponse.Content.ReadAsStringAsync(); - var metricsData = JToken.Parse(metricsJson); + SetEnvironmentVariable("DD_TRACE_AGENT_URL", $"http://127.0.0.1:{tcpAgent.Port}"); + } + using (await RunSampleAndWaitForExit(agent, packageVersion: packageVersion ?? "1.13.1")) + { + var metricsData = await WaitForTestAgentData($"http://{testAgentHost}:4318/test/session/metrics"); metricsData.Should().NotBeNullOrEmpty(); foreach (var attribute in metricsData.SelectTokens("$..resource.attributes[?(@.key == 'telemetry.sdk.version')]")) @@ -595,8 +600,6 @@ await Verifier.Verify(formattedJson, settings) #if NETCOREAPP3_1_OR_GREATER [SkippableTheory] - [Trait("SkipInCI", "True")] - [Flaky("New test agent seems to not always be ready", maxRetries: 3)] [Trait("Category", "EndToEnd")] [MemberData(nameof(GetOtlpTestData))] public async Task SubmitsOtlpLogs(string packageVersion, string datadogLogsEnabled, string otelLogsEnabled, string protocol, bool useAgentHostBackup) @@ -615,10 +618,7 @@ public async Task SubmitsOtlpLogs(string packageVersion, string datadogLogsEnabl var testAgentHost = Environment.GetEnvironmentVariable("TEST_AGENT_HOST") ?? "localhost"; var otlpPort = protocol == "grpc" ? 4317 : 4318; - using (var httpClient = new System.Net.Http.HttpClient()) - { - await httpClient.GetAsync($"http://{testAgentHost}:4318/test/session/clear"); - } + await ClearTestAgentSession(testAgentHost); SetEnvironmentVariable("DD_ENV", "testing"); SetEnvironmentVariable("DD_SERVICE", "OtlpLogsService"); @@ -626,7 +626,9 @@ public async Task SubmitsOtlpLogs(string packageVersion, string datadogLogsEnabl SetEnvironmentVariable("DD_LOGS_OTEL_ENABLED", datadogLogsEnabled); SetEnvironmentVariable("OTEL_LOGS_EXPORTER_ENABLED", otelLogsEnabled); SetEnvironmentVariable("OTEL_EXPORTER_OTLP_PROTOCOL", protocol); - SetEnvironmentVariable("OTEL_LOG_EXPORT_INTERVAL", "1000"); + // Short delay gives the OTel SDK multiple periodic exports before LoggerProviderSdk.Dispose() hits its 5s shutdown timeout. + // This is especially important for gRPC, where the first export warms the HTTP/2 connection. + SetEnvironmentVariable("OTEL_BLRP_SCHEDULE_DELAY", "500"); SetEnvironmentVariable("DD_LOGS_DIRECT_SUBMISSION_MINIMUM_LEVEL", "Verbose"); if (useAgentHostBackup) @@ -641,17 +643,19 @@ public async Task SubmitsOtlpLogs(string packageVersion, string datadogLogsEnabl var startTimeNanoseconds = DateTimeOffset.UtcNow.ToUnixTimeNanoseconds(); using var agent = EnvironmentHelper.GetMockAgent(); + // See comment in SubmitsOtlpTraces. DD_AGENT_HOST=test-agent also redirects the APM + // trace agent URL; pin it back to the in-process MockAgent so AgentWriter retries + // don't starve the DirectLogSubmission final flush during shutdown. + if (useAgentHostBackup && agent is MockTracerAgent.TcpUdpAgent tcpAgent) + { + SetEnvironmentVariable("DD_TRACE_AGENT_URL", $"http://127.0.0.1:{tcpAgent.Port}"); + } + using (await RunSampleAndWaitForExit(agent, packageVersion: packageVersion ?? "1.13.1")) { var endTimeNanoseconds = DateTimeOffset.UtcNow.ToUnixTimeNanoseconds(); - using var httpClient = new System.Net.Http.HttpClient(); - var logsResponse = await httpClient.GetAsync($"http://{testAgentHost}:4318/test/session/logs"); - logsResponse.EnsureSuccessStatusCode(); - - var logsJson = await logsResponse.Content.ReadAsStringAsync(); - var logsData = JToken.Parse(logsJson); - + var logsData = await WaitForTestAgentData($"http://{testAgentHost}:4318/test/session/logs"); logsData.Should().NotBeNullOrEmpty(); logsData.SelectTokens("$..log_records[*]").Should().AllSatisfy(logRecord => { @@ -706,6 +710,69 @@ await Verifier.Verify(formattedJson, settings) } #endif + /// + /// Clears the test-agent session, retrying if the agent is not yet ready. + /// Ensures the OTLP HTTP endpoint is accepting connections before tests proceed. + /// + private static async Task ClearTestAgentSession(string testAgentHost, int maxRetries = 5, int delayMs = 1000) + { + using var httpClient = new HttpClient { Timeout = TimeSpan.FromSeconds(5) }; + var url = $"http://{testAgentHost}:4318/test/session/clear"; + + for (var attempt = 1; attempt <= maxRetries; attempt++) + { + try + { + var response = await httpClient.GetAsync(url); + response.EnsureSuccessStatusCode(); + return; + } + catch (Exception) when (attempt < maxRetries) + { + await Task.Delay(delayMs); + } + } + + // Final attempt -- let it throw if it fails + var finalResponse = await httpClient.GetAsync(url); + finalResponse.EnsureSuccessStatusCode(); + } + + /// + /// Polls the test-agent for data until non-empty results are returned or timeout is reached. + /// The sample app exports data during shutdown, so there can be a brief delay + /// between process exit and data appearing in the test-agent. The timeout is generous + /// because first-time gRPC connections (TCP+HTTP/2+TLS handshake) plus tracer shutdown + /// flushing can stack up on slower CI runners. + /// + private static async Task WaitForTestAgentData(string url, int timeoutSeconds = 60, int pollIntervalMs = 500) + { + using var httpClient = new HttpClient { Timeout = TimeSpan.FromSeconds(10) }; + var deadline = DateTime.UtcNow.AddSeconds(timeoutSeconds); + + while (DateTime.UtcNow < deadline) + { + var response = await httpClient.GetAsync(url); + response.EnsureSuccessStatusCode(); + + var json = await response.Content.ReadAsStringAsync(); + var data = JToken.Parse(json); + + if (data.HasValues) + { + return data; + } + + await Task.Delay(pollIntervalMs); + } + + // Final attempt -- return whatever we get so the caller's assertion shows the actual value + var finalResponse = await httpClient.GetAsync(url); + finalResponse.EnsureSuccessStatusCode(); + var finalJson = await finalResponse.Content.ReadAsStringAsync(); + return JToken.Parse(finalJson); + } + private static string GetSuffix(string packageVersion) { // The snapshots are only different in .NET Core 2.1 - .NET 5 with package version 1.0.1 diff --git a/tracer/test/Datadog.Trace.Tests/Configuration/TracerSettingsTests.cs b/tracer/test/Datadog.Trace.Tests/Configuration/TracerSettingsTests.cs index e6f998c6f77b..377a5ee44a39 100644 --- a/tracer/test/Datadog.Trace.Tests/Configuration/TracerSettingsTests.cs +++ b/tracer/test/Datadog.Trace.Tests/Configuration/TracerSettingsTests.cs @@ -978,6 +978,8 @@ public void OtelMetricExportTimeoutCustomValues(string value, int expected) [InlineData("invalid", null, OtlpProtocol.Grpc)] [InlineData("http/protobuf", null, OtlpProtocol.HttpProtobuf)] [InlineData("grpc", "http/protobuf", OtlpProtocol.Grpc)] + [InlineData(null, "http/protobuf", OtlpProtocol.HttpProtobuf)] + [InlineData(null, "grpc", OtlpProtocol.Grpc)] public void OtlpProtocolFallbacks(string metricsProtocol, string generalProtocol, object expected) { var source = CreateConfigurationSource( @@ -1073,6 +1075,8 @@ public void PartialFlushMinSpans(string value, int expected) [InlineData("invalid", null, OtlpProtocol.Grpc)] [InlineData("http/protobuf", null, OtlpProtocol.HttpProtobuf)] [InlineData("grpc", "http/protobuf", OtlpProtocol.Grpc)] + [InlineData(null, "http/protobuf", OtlpProtocol.HttpProtobuf)] + [InlineData(null, "grpc", OtlpProtocol.Grpc)] public void OtlpLogsProtocolFallbacks(string logsProtocol, string generalProtocol, object expected) { var source = CreateConfigurationSource( diff --git a/tracer/test/Datadog.Trace.Tests/Logging/DirectSubmission/Sink/OtlpSinkTests.cs b/tracer/test/Datadog.Trace.Tests/Logging/DirectSubmission/Sink/OtlpSinkTests.cs index 3f86c07abb3d..8753d678ac53 100644 --- a/tracer/test/Datadog.Trace.Tests/Logging/DirectSubmission/Sink/OtlpSinkTests.cs +++ b/tracer/test/Datadog.Trace.Tests/Logging/DirectSubmission/Sink/OtlpSinkTests.cs @@ -263,7 +263,7 @@ public async Task ExportAsync(IReadOnlyList logs) return await _exportFunc(logs).ConfigureAwait(false); } - public bool Shutdown(int timeoutMilliseconds) + public bool Shutdown() { return true; } diff --git a/tracer/test/test-applications/integrations/Samples.OpenTelemetrySdk/CustomTracerProviderBuilderExtensions.cs b/tracer/test/test-applications/integrations/Samples.OpenTelemetrySdk/CustomTracerProviderBuilderExtensions.cs index 295061700a40..1adcb6b1fe18 100644 --- a/tracer/test/test-applications/integrations/Samples.OpenTelemetrySdk/CustomTracerProviderBuilderExtensions.cs +++ b/tracer/test/test-applications/integrations/Samples.OpenTelemetrySdk/CustomTracerProviderBuilderExtensions.cs @@ -5,6 +5,7 @@ using OpenTelemetry.Metrics; #endif #if OTEL_1_9 +using Microsoft.Extensions.DependencyInjection; using OpenTelemetry.Logs; #endif using Microsoft.Extensions.Logging; @@ -61,33 +62,30 @@ public static MeterProviderBuilder AddOtlpExporterIfEnvironmentVariablePresent(t #if OTEL_1_9 public static class CustomLoggerFactoryBuilderExtensions { - public static ILoggerFactory AddOtlpExporterIfEnvironmentVariablePresent() + // Returns an IServiceProvider rather than an ILoggerFactory so callers can resolve + // the underlying OpenTelemetry.Logs.LoggerProvider and call Shutdown before process exit. + // LoggerProviderSdk.Dispose() caps its shutdown flush at 5s; with gRPC, the first export + // can exceed that due to TCP/HTTP/2/TLS handshake, causing batched logs to be dropped. + public static ServiceProvider CreateLoggerServices() { - // Check if OpenTelemetry Logs Exporter is enabled (similar to metrics) - if (Environment.GetEnvironmentVariable("OTEL_LOGS_EXPORTER_ENABLED") is string value - && value == "true") + var services = new ServiceCollection(); + services.AddLogging(builder => { - return LoggerFactory.Create(builder => + builder.SetMinimumLevel(LogLevel.Trace); + + if (Environment.GetEnvironmentVariable("OTEL_LOGS_EXPORTER_ENABLED") is string value + && value == "true") { - builder.SetMinimumLevel(LogLevel.Trace); #if NET6_0_OR_GREATER - builder.AddOpenTelemetry( - options => - { - options.AddOtlpExporter(); - } - ); + builder.AddOpenTelemetry(options => + { + options.AddOtlpExporter(); + }); #endif - }); - } - else - { - // Create logger factory without OTel - Datadog instrumentation will hook this - return LoggerFactory.Create(builder => - { - builder.SetMinimumLevel(LogLevel.Trace); - }); - } + } + }); + + return services.BuildServiceProvider(); } } #endif diff --git a/tracer/test/test-applications/integrations/Samples.OpenTelemetrySdk/Program.cs b/tracer/test/test-applications/integrations/Samples.OpenTelemetrySdk/Program.cs index 6cb75524308f..a0e2e2893347 100644 --- a/tracer/test/test-applications/integrations/Samples.OpenTelemetrySdk/Program.cs +++ b/tracer/test/test-applications/integrations/Samples.OpenTelemetrySdk/Program.cs @@ -7,6 +7,7 @@ using OpenTelemetry.Metrics; #endif #if OTEL_1_9 +using Microsoft.Extensions.DependencyInjection; using OpenTelemetry.Logs; #endif using System.Collections.Generic; @@ -44,14 +45,21 @@ public static async Task Main(string[] args) .Build(); #if OTEL_1_2 - using var meterProvider = Sdk.CreateMeterProviderBuilder() + // Not `using var`: we call Shutdown explicitly below, and OTel SDK <= 1.3.2 re-collects + // on a second Shutdown/Dispose which produces a duplicate cumulative batch the test + // snapshot doesn't expect. Process exit handles cleanup. + var meterProvider = Sdk.CreateMeterProviderBuilder() .AddOtlpExporterIfEnvironmentVariablePresent() .Build(); #endif #if OTEL_1_9 - using var loggerFactory = CustomLoggerFactoryBuilderExtensions - .AddOtlpExporterIfEnvironmentVariablePresent(); + // Not `using var`: we call Shutdown on the LoggerProvider below with a generous + // timeout. Letting the ServiceProvider dispose here would trigger a second shutdown + // via LoggerProviderSdk.Dispose, which caps at 5s and can guillotine in-flight OTLP + // exports. Process exit handles cleanup, matching the meterProvider pattern above. + var loggerServices = CustomLoggerFactoryBuilderExtensions.CreateLoggerServices(); + var loggerFactory = loggerServices.GetRequiredService(); #endif _tracer = tracerProvider.GetTracer(serviceName); // The version is omitted so the ActivitySource.Version / otel.library.version is not set @@ -144,11 +152,22 @@ public static async Task Main(string[] args) } #if OTEL_1_2 - meterProvider?.Dispose(); + // Shutdown with a generous timeout so the single final Collect+Export can complete even + // when the first gRPC export has to negotiate TCP+HTTP/2+TLS. We avoid ForceFlush-then- + // Dispose because two Collects on cumulative-temporality observable instruments re-emit + // the same cumulative values and produce duplicate resource_metrics batches. Shutdown + // performs exactly one Collect; `meterProvider` is deliberately not `using var` so the + // runtime doesn't re-invoke Dispose->Shutdown after this (older OTel SDKs like 1.3.2 + // re-collect in that second call). + meterProvider?.Shutdown(timeoutMilliseconds: 30_000); #endif #if OTEL_1_9 - - loggerFactory?.Dispose(); + // Shutdown rather than ForceFlush: ForceFlush drains the processor queue but does not + // tear down the exporter's HttpClient, so a subsequent ServiceProvider dispose would + // still run LoggerProviderSdk.Dispose with its hard 5s shutdown cap and could guillotine + // in-flight exports. Shutdown drains and stops the processor with our timeout; we then + // let process exit clean up the ServiceProvider rather than risking the 5s Dispose race. + loggerServices.GetService()?.Shutdown(timeoutMilliseconds: 30_000); #endif }