Skip to content

Commit 0a7a562

Browse files
committed
chore: Add diagnostics for intermittent CI image pull failures
1 parent 3f97885 commit 0a7a562

6 files changed

Lines changed: 87 additions & 2 deletions

File tree

.github/workflows/cicd.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ jobs:
6666
env:
6767
# Lowest API version that GitHub runners support.
6868
DOCKER_API_VERSION: 1.44
69+
# Enable debug-level logging to capture image pull diagnostics while we
70+
# investigate the intermittent registry pull failures in CI.
71+
TESTCONTAINERS_LOGGING_ENABLED: true
6972

7073
steps:
7174
- name: Checkout Repository

src/Testcontainers/Clients/DockerImageOperations.cs

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ namespace DotNet.Testcontainers.Clients
22
{
33
using System;
44
using System.Collections.Generic;
5+
using System.Diagnostics;
6+
using System.Globalization;
57
using System.IO;
68
using System.Linq;
79
using System.Threading;
@@ -14,6 +16,12 @@ namespace DotNet.Testcontainers.Clients
1416

1517
internal sealed class DockerImageOperations : DockerApiClient, IDockerImageOperations
1618
{
19+
// Diagnostic counter shared across all instances to surface how many image pulls
20+
// are running concurrently in the same process. This helps determine whether the
21+
// intermittent CI pull failures correlate with bursts of parallel pulls (a race),
22+
// as opposed to isolated registry timeouts.
23+
private static int _pullsInFlight;
24+
1725
public DockerImageOperations(Guid sessionId, IDockerEndpointAuthenticationConfiguration dockerEndpointAuthConfig, ILogger logger)
1826
: base(sessionId, dockerEndpointAuthConfig, logger)
1927
{
@@ -70,8 +78,48 @@ public async Task CreateAsync(IImage image, IDockerRegistryAuthenticationConfigu
7078
IdentityToken = dockerRegistryAuthConfig.IdentityToken,
7179
};
7280

73-
await DockerClient.Images.CreateImageAsync(createParameters, authConfig, traceProgress, ct)
74-
.ConfigureAwait(false);
81+
var inFlightAtStart = Interlocked.Increment(ref _pullsInFlight);
82+
var stopwatch = Stopwatch.StartNew();
83+
84+
Logger.PullingDockerImage(image, dockerRegistryAuthConfig.RegistryEndpoint, inFlightAtStart);
85+
86+
try
87+
{
88+
await DockerClient.Images.CreateImageAsync(createParameters, authConfig, traceProgress, ct)
89+
.ConfigureAwait(false);
90+
}
91+
catch (Exception e)
92+
{
93+
stopwatch.Stop();
94+
var inFlightAtFailure = Volatile.Read(ref _pullsInFlight);
95+
var statusCode = e is DockerApiException dockerApiException ? dockerApiException.StatusCode.ToString() : e.GetType().Name;
96+
var responseBody = e is DockerApiException apiException ? apiException.ResponseBody : e.Message;
97+
var traceErrors = string.Join(" | ", traceProgress.Errors.Select(error => error.Message));
98+
Logger.FailedToPullDockerImage(image, dockerRegistryAuthConfig.RegistryEndpoint, stopwatch.ElapsedMilliseconds, inFlightAtStart, inFlightAtFailure, statusCode, responseBody, traceErrors, e);
99+
100+
// The diagnostics above are logged through the resource logger. In CI the tests run
101+
// under VSTest, which discards xUnit diagnostic messages, so the only output that
102+
// reliably reaches the CI logs is a failed test's exception. Re-throw with the
103+
// diagnostic context embedded in the message (keeping the original as the inner
104+
// exception) so the intermittent image pull failures can be investigated from CI.
105+
var diagnostics = string.Format(
106+
CultureInfo.InvariantCulture,
107+
"Failed to pull Docker image {0} from registry {1} after {2} ms (concurrent pulls at start: {3}, at failure: {4}); Docker API status: {5}; response: {6}; pull progress errors: {7}",
108+
image.FullName,
109+
dockerRegistryAuthConfig.RegistryEndpoint ?? "(default)",
110+
stopwatch.ElapsedMilliseconds,
111+
inFlightAtStart,
112+
inFlightAtFailure,
113+
statusCode,
114+
responseBody,
115+
string.IsNullOrEmpty(traceErrors) ? "(none)" : traceErrors);
116+
117+
throw new InvalidOperationException(diagnostics, e);
118+
}
119+
finally
120+
{
121+
Interlocked.Decrement(ref _pullsInFlight);
122+
}
75123

76124
Logger.DockerImageCreated(image);
77125
}

src/Testcontainers/Logger.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@ static ConsoleLogger()
6666

6767
private ConsoleLogger()
6868
{
69+
// Allow enabling debug-level logging through an environment variable. This is
70+
// primarily useful in CI, where the default console logger is the only sink and
71+
// debug messages (image pull progress, diagnostics) would otherwise be hidden.
72+
var loggingEnabled = Environment.GetEnvironmentVariable("TESTCONTAINERS_LOGGING_ENABLED");
73+
DebugLogLevelEnabled = "1".Equals(loggingEnabled, StringComparison.OrdinalIgnoreCase)
74+
|| "true".Equals(loggingEnabled, StringComparison.OrdinalIgnoreCase);
6975
}
7076

7177
/// <summary>

src/Testcontainers/Logging.cs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@ internal static partial class Logging
6666
[LoggerMessage(Level = LogLevel.Information, Message = "Execute \"{Command}\" at Docker container {Id}")]
6767
private static partial void ExecuteCommandInDockerContainerCore(ILogger logger, string command, TruncatedId id);
6868

69+
[LoggerMessage(Level = LogLevel.Debug, Message = "Pulling Docker image {FullName} from registry {Registry} (concurrent pulls in this process: {InFlight})")]
70+
private static partial void PullingDockerImageCore(ILogger logger, string fullName, string registry, int inFlight);
71+
72+
[LoggerMessage(Level = LogLevel.Error, Message = "Failed to pull Docker image {FullName} from registry {Registry} after {ElapsedMilliseconds} ms (concurrent pulls at start: {InFlightAtStart}, at failure: {InFlightAtFailure}); Docker API status: {StatusCode}; response: {ResponseBody}; pull progress errors: {TraceErrors}")]
73+
private static partial void FailedToPullDockerImageCore(ILogger logger, string fullName, string registry, long elapsedMilliseconds, int inFlightAtStart, int inFlightAtFailure, string statusCode, string responseBody, string traceErrors, Exception exception);
74+
6975
[LoggerMessage(Level = LogLevel.Information, Message = "Docker image {FullName} created")]
7076
private static partial void DockerImageCreatedCore(ILogger logger, string fullName);
7177

@@ -209,6 +215,16 @@ public static void ExecuteCommandInDockerContainer(this ILogger logger, string i
209215
ExecuteCommandInDockerContainerCore(logger, commandLine, new TruncatedId(id));
210216
}
211217

218+
public static void PullingDockerImage(this ILogger logger, IImage image, string registry, int inFlight)
219+
{
220+
PullingDockerImageCore(logger, image.FullName, registry ?? "(default)", inFlight);
221+
}
222+
223+
public static void FailedToPullDockerImage(this ILogger logger, IImage image, string registry, long elapsedMilliseconds, int inFlightAtStart, int inFlightAtFailure, string statusCode, string responseBody, string traceErrors, Exception exception)
224+
{
225+
FailedToPullDockerImageCore(logger, image.FullName, registry ?? "(default)", elapsedMilliseconds, inFlightAtStart, inFlightAtFailure, statusCode, responseBody, string.IsNullOrEmpty(traceErrors) ? "(none)" : traceErrors, exception);
226+
}
227+
212228
public static void DockerImageCreated(this ILogger logger, IImage image)
213229
{
214230
DockerImageCreatedCore(logger, image.FullName);

tests/Directory.Build.props

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,11 @@
99
<PropertyGroup>
1010
<VSTestLogger>trx%3BLogFileName=$(MSBuildProjectName).trx</VSTestLogger>
1111
</PropertyGroup>
12+
<ItemGroup>
13+
<!-- The Testcontainers xUnit fixtures route their diagnostics (including the image
14+
pull diagnostics) to xUnit's IMessageSink as diagnostic messages, which xUnit
15+
suppresses by default. Enable diagnostic and live output so these messages show
16+
up in the CI logs while we investigate the intermittent image pull failures. -->
17+
<Content Include="$(MSBuildThisFileDirectory)xunit.runner.json" Link="xunit.runner.json" CopyToOutputDirectory="PreserveNewest"/>
18+
</ItemGroup>
1219
</Project>

tests/xunit.runner.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"$schema": "https://xunit.net/schema/current/xunit.runner.schema.json",
3+
"diagnosticMessages": true,
4+
"showLiveOutput": true
5+
}

0 commit comments

Comments
 (0)