66using System ;
77using System . Collections . Generic ;
88using System . Linq ;
9+ using System . Net . Http ;
910using System . Text . RegularExpressions ;
1011using System . Threading . Tasks ;
1112using Datadog . Trace . Configuration ;
@@ -254,10 +255,7 @@ public async Task SubmitsOtlpTraces(string packageVersion, string datadogTracesE
254255 var testAgentHost = Environment . GetEnvironmentVariable ( "TEST_AGENT_HOST" ) ?? "localhost" ;
255256 var otlpPort = protocol == "grpc" ? 4317 : 4318 ;
256257
257- using ( var httpClient = new System . Net . Http . HttpClient ( ) )
258- {
259- await httpClient . GetAsync ( $ "http://{ testAgentHost } :4318/test/session/clear") ;
260- }
258+ await ClearTestAgentSession ( testAgentHost ) ;
261259
262260 // This is the key configuration that is set differently from previous test cases:
263261 // OTEL_TRACES_EXPORTER=otlp enables the DD SDK to emit traces (and trace stats) via OTLP
@@ -497,7 +495,6 @@ await Verifier.Verify(finalJson, settings)
497495
498496#if NET6_0_OR_GREATER
499497 [ SkippableTheory ]
500- [ Flaky ( "New test agent seems to not always be ready" , maxRetries : 3 ) ]
501498 [ Trait ( "Category" , "EndToEnd" ) ]
502499 [ MemberData ( nameof ( GetOtlpTestData ) ) ]
503500 public async Task SubmitsOtlpMetrics ( string packageVersion , string datadogMetricsEnabled , string otelMetricsEnabled , string protocol , bool useAgentHostBackup )
@@ -518,18 +515,16 @@ public async Task SubmitsOtlpMetrics(string packageVersion, string datadogMetric
518515 var testAgentHost = Environment . GetEnvironmentVariable ( "TEST_AGENT_HOST" ) ?? "localhost" ;
519516 var otlpPort = protocol == "grpc" ? 4317 : 4318 ;
520517
521- using ( var httpClient = new System . Net . Http . HttpClient ( ) )
522- {
523- await httpClient . GetAsync ( $ "http://{ testAgentHost } :4318/test/session/clear") ;
524- }
518+ await ClearTestAgentSession ( testAgentHost ) ;
525519
526520 SetEnvironmentVariable ( "DD_ENV" , string . Empty ) ;
527521 SetEnvironmentVariable ( "DD_SERVICE" , string . Empty ) ;
528522 SetEnvironmentVariable ( "DD_METRICS_OTEL_METER_NAMES" , "OpenTelemetryMetricsMeter" ) ;
529523 SetEnvironmentVariable ( "DD_METRICS_OTEL_ENABLED" , datadogMetricsEnabled ) ;
530524 SetEnvironmentVariable ( "OTEL_METRICS_EXPORTER_ENABLED" , otelMetricsEnabled ) ;
531525 SetEnvironmentVariable ( "OTEL_EXPORTER_OTLP_PROTOCOL" , protocol ) ;
532- SetEnvironmentVariable ( "OTEL_METRIC_EXPORT_INTERVAL" , "1000" ) ;
526+ // 60s so only the shutdown flush fires; periodic exports of observable instruments produce duplicate batches that break snapshot comparison
527+ SetEnvironmentVariable ( "OTEL_METRIC_EXPORT_INTERVAL" , "60000" ) ;
533528
534529 if ( useAgentHostBackup )
535530 {
@@ -546,13 +541,7 @@ public async Task SubmitsOtlpMetrics(string packageVersion, string datadogMetric
546541 using var agent = EnvironmentHelper . GetMockAgent ( ) ;
547542 using ( await RunSampleAndWaitForExit ( agent , packageVersion : packageVersion ?? "1.13.1" ) )
548543 {
549- using var httpClient = new System . Net . Http . HttpClient ( ) ;
550- var metricsResponse = await httpClient . GetAsync ( $ "http://{ testAgentHost } :4318/test/session/metrics") ;
551- metricsResponse . EnsureSuccessStatusCode ( ) ;
552-
553- var metricsJson = await metricsResponse . Content . ReadAsStringAsync ( ) ;
554- var metricsData = JToken . Parse ( metricsJson ) ;
555-
544+ var metricsData = await WaitForTestAgentData ( $ "http://{ testAgentHost } :4318/test/session/metrics") ;
556545 metricsData . Should ( ) . NotBeNullOrEmpty ( ) ;
557546
558547 foreach ( var attribute in metricsData . SelectTokens ( "$..resource.attributes[?(@.key == 'telemetry.sdk.version')]" ) )
@@ -594,7 +583,6 @@ await Verifier.Verify(formattedJson, settings)
594583
595584#if NETCOREAPP3_1_OR_GREATER
596585 [ SkippableTheory ]
597- [ Flaky ( "New test agent seems to not always be ready" , maxRetries : 3 ) ]
598586 [ Trait ( "Category" , "EndToEnd" ) ]
599587 [ MemberData ( nameof ( GetOtlpTestData ) ) ]
600588 public async Task SubmitsOtlpLogs ( string packageVersion , string datadogLogsEnabled , string otelLogsEnabled , string protocol , bool useAgentHostBackup )
@@ -613,18 +601,17 @@ public async Task SubmitsOtlpLogs(string packageVersion, string datadogLogsEnabl
613601 var testAgentHost = Environment . GetEnvironmentVariable ( "TEST_AGENT_HOST" ) ?? "localhost" ;
614602 var otlpPort = protocol == "grpc" ? 4317 : 4318 ;
615603
616- using ( var httpClient = new System . Net . Http . HttpClient ( ) )
617- {
618- await httpClient . GetAsync ( $ "http://{ testAgentHost } :4318/test/session/clear") ;
619- }
604+ await ClearTestAgentSession ( testAgentHost ) ;
620605
621606 SetEnvironmentVariable ( "DD_ENV" , "testing" ) ;
622607 SetEnvironmentVariable ( "DD_SERVICE" , "OtlpLogsService" ) ;
623608 SetEnvironmentVariable ( "OTEL_RESOURCE_ATTRIBUTES" , "service.name=OtlpLogsService,deployment.environment=testing" ) ;
624609 SetEnvironmentVariable ( "DD_LOGS_OTEL_ENABLED" , datadogLogsEnabled ) ;
625610 SetEnvironmentVariable ( "OTEL_LOGS_EXPORTER_ENABLED" , otelLogsEnabled ) ;
626611 SetEnvironmentVariable ( "OTEL_EXPORTER_OTLP_PROTOCOL" , protocol ) ;
627- SetEnvironmentVariable ( "OTEL_LOG_EXPORT_INTERVAL" , "1000" ) ;
612+ // Short delay gives the OTel SDK multiple periodic exports before LoggerProviderSdk.Dispose() hits its 5s shutdown timeout.
613+ // This is especially important for gRPC, where the first export warms the HTTP/2 connection.
614+ SetEnvironmentVariable ( "OTEL_BLRP_SCHEDULE_DELAY" , "500" ) ;
628615 SetEnvironmentVariable ( "DD_LOGS_DIRECT_SUBMISSION_MINIMUM_LEVEL" , "Verbose" ) ;
629616
630617 if ( useAgentHostBackup )
@@ -643,13 +630,7 @@ public async Task SubmitsOtlpLogs(string packageVersion, string datadogLogsEnabl
643630 {
644631 var endTimeNanoseconds = DateTimeOffset . UtcNow . ToUnixTimeNanoseconds ( ) ;
645632
646- using var httpClient = new System . Net . Http . HttpClient ( ) ;
647- var logsResponse = await httpClient . GetAsync ( $ "http://{ testAgentHost } :4318/test/session/logs") ;
648- logsResponse . EnsureSuccessStatusCode ( ) ;
649-
650- var logsJson = await logsResponse . Content . ReadAsStringAsync ( ) ;
651- var logsData = JToken . Parse ( logsJson ) ;
652-
633+ var logsData = await WaitForTestAgentData ( $ "http://{ testAgentHost } :4318/test/session/logs") ;
653634 logsData . Should ( ) . NotBeNullOrEmpty ( ) ;
654635 logsData . SelectTokens ( "$..log_records[*]" ) . Should ( ) . AllSatisfy ( logRecord =>
655636 {
@@ -704,6 +685,67 @@ await Verifier.Verify(formattedJson, settings)
704685 }
705686#endif
706687
688+ /// <summary>
689+ /// Clears the test-agent session, retrying if the agent is not yet ready.
690+ /// Ensures the OTLP HTTP endpoint is accepting connections before tests proceed.
691+ /// </summary>
692+ private static async Task ClearTestAgentSession ( string testAgentHost , int maxRetries = 5 , int delayMs = 1000 )
693+ {
694+ using var httpClient = new HttpClient { Timeout = TimeSpan . FromSeconds ( 5 ) } ;
695+ var url = $ "http://{ testAgentHost } :4318/test/session/clear";
696+
697+ for ( var attempt = 1 ; attempt <= maxRetries ; attempt ++ )
698+ {
699+ try
700+ {
701+ var response = await httpClient . GetAsync ( url ) ;
702+ response . EnsureSuccessStatusCode ( ) ;
703+ return ;
704+ }
705+ catch ( Exception ) when ( attempt < maxRetries )
706+ {
707+ await Task . Delay ( delayMs ) ;
708+ }
709+ }
710+
711+ // Final attempt -- let it throw if it fails
712+ var finalResponse = await httpClient . GetAsync ( url ) ;
713+ finalResponse . EnsureSuccessStatusCode ( ) ;
714+ }
715+
716+ /// <summary>
717+ /// Polls the test-agent for data until non-empty results are returned or timeout is reached.
718+ /// The sample app exports data during shutdown, so there can be a brief delay
719+ /// between process exit and data appearing in the test-agent.
720+ /// </summary>
721+ private static async Task < JToken > WaitForTestAgentData ( string url , int timeoutSeconds = 30 , int pollIntervalMs = 500 )
722+ {
723+ using var httpClient = new HttpClient { Timeout = TimeSpan . FromSeconds ( 10 ) } ;
724+ var deadline = DateTime . UtcNow . AddSeconds ( timeoutSeconds ) ;
725+
726+ while ( DateTime . UtcNow < deadline )
727+ {
728+ var response = await httpClient . GetAsync ( url ) ;
729+ response . EnsureSuccessStatusCode ( ) ;
730+
731+ var json = await response . Content . ReadAsStringAsync ( ) ;
732+ var data = JToken . Parse ( json ) ;
733+
734+ if ( data . HasValues )
735+ {
736+ return data ;
737+ }
738+
739+ await Task . Delay ( pollIntervalMs ) ;
740+ }
741+
742+ // Final attempt -- return whatever we get so the caller's assertion shows the actual value
743+ var finalResponse = await httpClient . GetAsync ( url ) ;
744+ finalResponse . EnsureSuccessStatusCode ( ) ;
745+ var finalJson = await finalResponse . Content . ReadAsStringAsync ( ) ;
746+ return JToken . Parse ( finalJson ) ;
747+ }
748+
707749 private static string GetSuffix ( string packageVersion )
708750 {
709751 // The snapshots are only different in .NET Core 2.1 - .NET 5 with package version 1.0.1
0 commit comments