Skip to content

Commit 252489c

Browse files
committed
compute worker docker image update checking code added. circle ci config updated to run flake8 on compute worker code
1 parent 128acc4 commit 252489c

3 files changed

Lines changed: 193 additions & 3 deletions

File tree

.circleci/config.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ jobs:
6666

6767
- run:
6868
name: "Lint: Check code style with flake8"
69-
command: docker compose exec django flake8 src/
69+
command: |
70+
docker compose exec django flake8 src/
71+
docker compose exec django flake8 compute_worker/
7072
7173
- run:
7274
name: "Tests: Run unit/integration tests (excluding e2e)"

compute_worker/compute_worker.py

Lines changed: 74 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from billiard.exceptions import SoftTimeLimitExceeded
3131

3232
from logs_loguru import configure_logging, colorize_run_args
33+
from docker_image_update_checker import DockerImageStatus, DockerImageUpdateChecker
3334

3435
logger = logging.getLogger(__name__)
3536

@@ -113,6 +114,11 @@ def to_bool(val):
113114

114115
WORKER_BUNDLE_URL_REWRITE = get("WORKER_BUNDLE_URL_REWRITE", "").strip()
115116

117+
# Docker image config
118+
DOCKER_IMAGE_NAMESPACE = get("DOCKER_IMAGE_NAMESPACE", "codalab")
119+
DOCKER_IMAGE_REPOSITORY = get("DOCKER_IMAGE_REPOSITORY", "codabench-compute-worker")
120+
DOCKER_IMAGE_TAG = get("DOCKER_IMAGE_TAG", "latest")
121+
116122

117123
# -----------------------------------------------
118124
# Program Kind
@@ -302,12 +308,78 @@ def rewrite_bundle_url_if_needed(url):
302308
return url
303309

304310

311+
def check_docker_image_update():
312+
"""
313+
Compare local and remote compute worker Docker images and log the
314+
synchronization status along with relevant image metadata.
315+
"""
316+
checker = DockerImageUpdateChecker(
317+
namespace=Settings.DOCKER_IMAGE_NAMESPACE,
318+
repository=Settings.DOCKER_IMAGE_REPOSITORY,
319+
tag=Settings.DOCKER_IMAGE_TAG,
320+
docker_base_url=Settings.CONTAINER_SOCKET
321+
)
322+
result = checker.compare_local_vs_remote_images()
323+
status = result["status"]
324+
325+
log_level = logging.INFO
326+
327+
log_lines = [
328+
"",
329+
"=" * 60,
330+
"DOCKER IMAGE UPDATE CHECK",
331+
"=" * 60,
332+
f"Image: {result.get('image_name')}",
333+
]
334+
335+
remote = result.get("remote")
336+
local = result.get("local")
337+
338+
if remote:
339+
log_lines.append(f"Remote: digest={remote.get('digest')}, date={remote.get('date')}")
340+
341+
if local:
342+
log_lines.append(f"Local: id={local.get('id')}, date={local.get('date')}")
343+
344+
log_lines.append("-" * 60)
345+
346+
if status == DockerImageStatus.UP_TO_DATE:
347+
log_lines.append("Status: Local image is synchronized with remote")
348+
log_level = logging.INFO
349+
350+
elif status == DockerImageStatus.BEHIND:
351+
log_lines.append("Status: Local image is behind remote version. For better submission processing and to avoid any submission errors, fetch the latest image!")
352+
log_level = logging.ERROR
353+
354+
elif status == DockerImageStatus.LOCAL_MISSING:
355+
log_lines.append("Status: Local image is not present. Pull required")
356+
log_level = logging.ERROR
357+
358+
elif status == DockerImageStatus.REMOTE_UNAVAILABLE:
359+
log_lines.append("Status: Could not fetch remote image metadata")
360+
log_level = logging.ERROR
361+
362+
elif status == "error":
363+
log_lines.append(f"Status: Image check failed: {result.get('error')}")
364+
log_level = logging.ERROR
365+
else:
366+
log_lines.append(f"Unknown image status: {status}")
367+
log_level = logging.ERROR
368+
369+
log_lines.append("=" * 60)
370+
371+
logger.log(log_level, "\n".join(log_lines))
372+
373+
305374
# -----------------------------------------------------------------------------
306375
# The main compute worker entrypoint, this is how a job is ran at the highest
307376
# level.
308377
# -----------------------------------------------------------------------------
309378
@shared_task(name="compute_worker_run")
310379
def run_wrapper(run_args):
380+
# Check for docker image update
381+
check_docker_image_update()
382+
311383
# We need to convert the UUID given by celery into a byte like object otherwise things will break
312384
run_args.update(secret=str(run_args["secret"]))
313385
logger.info(f"Received run arguments: \n {colorize_run_args(json.dumps(run_args))}")
@@ -338,7 +410,7 @@ def run_wrapper(run_args):
338410
msg = "Submission failed. See logs for more details."
339411
run._update_status(SubmissionStatus.FAILED, extra_information=msg)
340412
raise
341-
except Exception as e:
413+
except Exception:
342414
# Catch any exception to avoid getting stuck in Running status
343415
run._update_status(SubmissionStatus.FAILED, extra_information=traceback.format_exc())
344416
raise
@@ -1303,7 +1375,7 @@ def start(self):
13031375
}
13041376
# Cleanup containers
13051377
containers_to_kill = [
1306-
self.ingestion_container_name,
1378+
self.ingestion_container_name,
13071379
self.program_container_name
13081380
]
13091381
logger.debug(
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
import json
2+
import urllib.request
3+
from datetime import datetime
4+
import docker
5+
6+
7+
class DockerImageStatus:
8+
UP_TO_DATE = "up_to_date"
9+
BEHIND = "behind"
10+
NEWER_LOCAL = "newer_local"
11+
DIFFERENT = "different"
12+
LOCAL_MISSING = "local_missing"
13+
REMOTE_UNAVAILABLE = "remote_unavailable"
14+
15+
16+
class DockerImageUpdateChecker:
17+
18+
def __init__(self, namespace: str, repository: str, tag: str, docker_base_url):
19+
self.image_name = f"{namespace}/{repository}:{tag}"
20+
self.url = f"https://hub.docker.com/v2/namespaces/{namespace}/repositories/{repository}/tags/{tag}"
21+
self.client = docker.APIClient(base_url=docker_base_url, version="auto")
22+
23+
@staticmethod
24+
def _parse_datetime(value: str):
25+
if not value:
26+
return None
27+
28+
try:
29+
return datetime.fromisoformat(value.replace("Z", "+00:00"))
30+
except ValueError:
31+
return None
32+
33+
@staticmethod
34+
def _get_json(url: str):
35+
try:
36+
with urllib.request.urlopen(url) as response:
37+
return json.loads(response.read().decode())
38+
except Exception:
39+
return None
40+
41+
def get_remote_info(self):
42+
43+
data = self._get_json(self.url)
44+
if not data:
45+
return None
46+
47+
return {
48+
"digest": data.get("digest"),
49+
"date": self._parse_datetime(
50+
data.get("tag_last_pushed") or data.get("last_updated")
51+
),
52+
}
53+
54+
def get_local_info(self):
55+
try:
56+
image = self.client.inspect_image(self.image_name)
57+
return {
58+
"id": image.get("Id"),
59+
"digests": image.get("RepoDigests", []),
60+
"date": self._parse_datetime(image.get("Created")),
61+
}
62+
except docker.errors.DockerException:
63+
return None
64+
65+
def _get_status(self, remote: dict, local: dict):
66+
remote_digest = remote.get("digest")
67+
local_digests = local.get("digests", [])
68+
69+
if remote_digest and any(
70+
remote_digest in digest for digest in local_digests
71+
):
72+
return DockerImageStatus.UP_TO_DATE
73+
74+
remote_date = remote.get("date")
75+
local_date = local.get("date")
76+
77+
if remote_date and local_date:
78+
if remote_date > local_date:
79+
return DockerImageStatus.BEHIND
80+
81+
if remote_date < local_date:
82+
return DockerImageStatus.NEWER_LOCAL
83+
84+
return DockerImageStatus.DIFFERENT
85+
86+
def compare_local_vs_remote_images(self):
87+
88+
try:
89+
remote = self.get_remote_info()
90+
local = self.get_local_info()
91+
92+
if not remote:
93+
return {
94+
"status": DockerImageStatus.REMOTE_UNAVAILABLE,
95+
"image_name": self.image_name,
96+
}
97+
98+
if not local:
99+
return {
100+
"status": DockerImageStatus.LOCAL_MISSING,
101+
"image_name": self.image_name,
102+
"remote": remote,
103+
}
104+
105+
return {
106+
"status": self._get_status(remote, local),
107+
"image_name": self.image_name,
108+
"remote": remote,
109+
"local": local,
110+
}
111+
except Exception as exc:
112+
return {
113+
"status": "error",
114+
"image_name": self.image_name,
115+
"error": str(exc),
116+
}

0 commit comments

Comments
 (0)