Skip to content

Commit a856d9d

Browse files
authored
ci: pre-pull docker images with retry to fight registry flakes (#122)
1 parent 50f2f8c commit a856d9d

4 files changed

Lines changed: 162 additions & 38 deletions

File tree

.github/workflows/ci.yaml

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
test:
1919
name: Python ${{ matrix.python-version }} - ${{ matrix.cdist-group }}/3
2020
runs-on: ubuntu-latest
21-
timeout-minutes: 10
21+
timeout-minutes: 15
2222
strategy:
2323
fail-fast: false
2424
matrix:
@@ -42,6 +42,51 @@ jobs:
4242
- name: Intall dependencies
4343
run: uv sync --frozen --all-extras
4444

45+
# Pre-pull docker images with retry to absorb transient registry failures
46+
# (MCR WAF blocks on mcr.microsoft.com, Docker Hub anonymous rate limits)
47+
# before pytest starts spawning fixture subprocesses. Pull in parallel
48+
# (-P 6) - sequential pulls of ~30 images cost 10+ min on a runner.
49+
- name: Pre-pull docker images
50+
run: |
51+
set -u
52+
images=(
53+
"mcr.microsoft.com/azure-storage/azurite"
54+
"mcr.microsoft.com/mssql/server:2022-latest"
55+
"postgres:11" "postgres:12" "postgres:13" "postgres:14"
56+
"postgres:15" "postgres:16" "postgres:17" "postgres:18"
57+
"pgvector/pgvector:pg15"
58+
"paradedb/paradedb:0.21.5-pg16"
59+
"google/alloydbomni:16"
60+
"mysql:5.6" "mysql:5.7" "mysql:8"
61+
"mariadb:11.3"
62+
"gvenzl/oracle-free:23-slim-faststart"
63+
"gvenzl/oracle-xe:18-slim-faststart"
64+
"redis:latest"
65+
"valkey/valkey:latest"
66+
"mongo:latest"
67+
"cockroachdb/cockroach:latest"
68+
"software.yugabyte.com/yugabytedb/yugabyte:latest"
69+
"quay.io/minio/minio"
70+
"minio/mc:latest"
71+
"rustfs/rustfs:latest"
72+
"rustfs/rc:latest"
73+
"gcr.io/cloud-spanner-emulator/emulator:latest"
74+
"ghcr.io/goccy/bigquery-emulator:latest"
75+
"gizmodata/gizmosql:latest"
76+
"elasticsearch:7.17.19"
77+
"elasticsearch:8.13.0"
78+
)
79+
printf '%s\n' "${images[@]}" | xargs -n1 -P6 -I{} bash -c '
80+
image="$1"
81+
for attempt in 1 2 3 4 5; do
82+
if docker pull --quiet "$image"; then
83+
exit 0
84+
fi
85+
[ "$attempt" -lt 5 ] && sleep $((attempt * attempt * 5))
86+
done
87+
echo "::warning::Failed to pull $image after 5 attempts"
88+
' _ {}
89+
4590
- if: matrix.python-version == '3.12'
4691
name: Run tests with coverage tracking
4792
run: uv run pytest --cdist-group=${{ matrix.cdist-group }}/3 -k "not elasticsearch"
@@ -83,6 +128,18 @@ jobs:
83128
- name: Intall dependencies
84129
run: uv sync --frozen --all-extras
85130

131+
- name: Pre-pull docker images
132+
run: |
133+
set -u
134+
for image in "elasticsearch:7.17.19" "elasticsearch:8.13.0"; do
135+
for attempt in 1 2 3 4 5; do
136+
if docker pull --quiet "$image"; then
137+
break
138+
fi
139+
[ "$attempt" -eq 5 ] && echo "::warning::Failed to pull $image" || sleep $((attempt * attempt * 5))
140+
done
141+
done
142+
86143
- name: Run tests with coverage tracking
87144
run: uv run pytest -k elasticsearch
88145

src/pytest_databases/_service.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,23 @@ def _stop_all_containers(client: DockerClient) -> None:
6262
ignore_removed=True,
6363
)
6464
for container in containers:
65-
if container.status == "running":
66-
container.kill()
67-
elif container.status in {"stopped", "dead"}:
68-
container.remove()
69-
elif container.status == "removing":
70-
continue
71-
else:
72-
msg = f"Cannot handle container in state {container.status}"
73-
raise RuntimeError(msg)
65+
# Containers may disappear between the list and the kill/remove call -
66+
# they are tagged remove=True so they vacate as soon as they stop, and
67+
# transient teardown can race with this loop. Treat 404 (already gone)
68+
# and 409 (removal already in progress) as success.
69+
try:
70+
if container.status == "running":
71+
container.kill()
72+
elif container.status in {"stopped", "dead"}:
73+
container.remove()
74+
elif container.status == "removing":
75+
continue
76+
else:
77+
msg = f"Cannot handle container in state {container.status}"
78+
raise RuntimeError(msg)
79+
except APIError as exc:
80+
if exc.status_code not in {404, 409}:
81+
raise
7482

7583

7684
class DockerService(AbstractContextManager):
@@ -156,7 +164,17 @@ def run(
156164
try:
157165
self._client.images.get(image)
158166
except ImageNotFound:
159-
self._client.images.pull(*image.rsplit(":", maxsplit=1), **platform_kwarg) # pyright: ignore[reportCallIssue,reportArgumentType]
167+
# Registries can fail transiently: Docker Hub rate-limits
168+
# anonymous pulls with 500s, MCR's WAF returns 404s wrapping
169+
# a block page. Retry a few times before giving up.
170+
for attempt in range(3):
171+
try:
172+
self._client.images.pull(*image.rsplit(":", maxsplit=1), **platform_kwarg) # pyright: ignore[reportCallIssue,reportArgumentType]
173+
break
174+
except (APIError, ImageNotFound):
175+
if attempt == 2:
176+
raise
177+
time.sleep(2**attempt)
160178

161179
if container is None:
162180
container = self._client.containers.run( # pyright: ignore[reportCallIssue,reportArgumentType]

src/pytest_databases/docker/mariadb.py

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -80,23 +80,48 @@ def check(_service: ServiceContainer) -> bool:
8080
pause=1.0,
8181
transient=isolation_level == "server",
8282
) as service:
83-
# Final setup: ensure the worker-specific database exists and permissions are correct
83+
# check() above only verifies root SELECT 1; that becomes true before
84+
# mariadb has fully provisioned the app user with the @'%' grant. Verify
85+
# the app user can actually reach db_name from any host before yielding,
86+
# otherwise tests race into 'Host not allowed' / 'access denied'.
8487
container_name = f"pytest_databases_{name}"
8588
container = docker_service._get_container(container_name)
86-
if container:
87-
setup_sql = (
88-
f"CREATE DATABASE IF NOT EXISTS {db_name}; "
89-
f"GRANT ALL PRIVILEGES ON *.* TO '{user}'@'%'; "
90-
"FLUSH PRIVILEGES;"
89+
if container is None:
90+
msg = f"MariaDB container {container_name!r} disappeared after startup"
91+
raise RuntimeError(msg)
92+
93+
setup_sql = (
94+
f"CREATE DATABASE IF NOT EXISTS {db_name}; "
95+
f"GRANT ALL PRIVILEGES ON *.* TO '{user}'@'%'; "
96+
"FLUSH PRIVILEGES;"
97+
)
98+
verify_cmd = [
99+
"mariadb",
100+
f"--user={user}",
101+
f"--password={password}",
102+
db_name,
103+
"-e",
104+
"SELECT 1",
105+
]
106+
last_err: bytes = b""
107+
for attempt in range(15):
108+
setup_res = container.exec_run(
109+
["mariadb", "--user=root", f"--password={root_password}", "-e", setup_sql],
91110
)
92-
# Retry setup a few times if it fails
93-
for _ in range(5):
94-
res = container.exec_run(
95-
["mariadb", "--user=root", f"--password={root_password}", "-e", setup_sql],
96-
)
97-
if res.exit_code == 0:
111+
if setup_res.exit_code == 0:
112+
verify_res = container.exec_run(verify_cmd)
113+
if verify_res.exit_code == 0:
98114
break
99-
time.sleep(1)
115+
last_err = verify_res.output
116+
else:
117+
last_err = setup_res.output
118+
time.sleep(1 + attempt * 0.5)
119+
else:
120+
msg = (
121+
f"MariaDB fixture {name!r}: user {user!r} could not reach database "
122+
f"{db_name!r} after 15 attempts. Last output: {last_err!r}"
123+
)
124+
raise RuntimeError(msg)
100125

101126
yield MariaDBService(
102127
db=db_name,

src/pytest_databases/docker/mysql.py

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -87,24 +87,48 @@ def check(_service: ServiceContainer) -> bool:
8787
transient=isolation_level == "server",
8888
platform=platform,
8989
) as service:
90-
# Final setup: ensure the worker-specific database exists and permissions are correct
90+
# The check() above only verifies root can SELECT 1; that signal is true
91+
# before mysql has finished provisioning the app user and applying our
92+
# post-start grants. Verify the app user can actually reach db_name
93+
# before yielding so tests don't race the fixture into 'access denied'.
9194
container_name = f"pytest_databases_{name}"
9295
container = docker_service._get_container(container_name)
93-
if container:
94-
# Grant global privileges to the app user so they can create databases in tests if needed
95-
setup_sql = (
96-
f"CREATE DATABASE IF NOT EXISTS {db_name}; "
97-
f"GRANT ALL PRIVILEGES ON *.* TO '{user}'@'%'; "
98-
"FLUSH PRIVILEGES;"
96+
if container is None:
97+
msg = f"MySQL container {container_name!r} disappeared after startup"
98+
raise RuntimeError(msg)
99+
100+
setup_sql = (
101+
f"CREATE DATABASE IF NOT EXISTS {db_name}; "
102+
f"GRANT ALL PRIVILEGES ON *.* TO '{user}'@'%'; "
103+
"FLUSH PRIVILEGES;"
104+
)
105+
verify_cmd = [
106+
"mysql",
107+
f"--user={user}",
108+
f"--password={password}",
109+
db_name,
110+
"-e",
111+
"SELECT 1",
112+
]
113+
last_err: bytes = b""
114+
for attempt in range(15):
115+
setup_res = container.exec_run(
116+
["mysql", "--user=root", f"--password={root_password}", "-e", setup_sql],
99117
)
100-
# Retry setup a few times if it fails
101-
for _ in range(5):
102-
res = container.exec_run(
103-
["mysql", "--user=root", f"--password={root_password}", "-e", setup_sql],
104-
)
105-
if res.exit_code == 0:
118+
if setup_res.exit_code == 0:
119+
verify_res = container.exec_run(verify_cmd)
120+
if verify_res.exit_code == 0:
106121
break
107-
time.sleep(1)
122+
last_err = verify_res.output
123+
else:
124+
last_err = setup_res.output
125+
time.sleep(1 + attempt * 0.5)
126+
else:
127+
msg = (
128+
f"MySQL fixture {name!r}: user {user!r} could not reach database "
129+
f"{db_name!r} after 15 attempts. Last output: {last_err!r}"
130+
)
131+
raise RuntimeError(msg)
108132

109133
yield MySQLService(
110134
db=db_name,

0 commit comments

Comments
 (0)