Skip to content

Commit 1fbfd66

Browse files
authored
feat: docker-compose parser (#427)
1 parent dcb02be commit 1fbfd66

File tree

10 files changed

+241
-27
lines changed

10 files changed

+241
-27
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ The following dependency file formats are supported:
171171
- `yarn.lock` (v1, v2)
172172
- `pnpm-lock.yaml` (v9)
173173
- `Dockerfile`
174+
- `docker-compose.yml`, `docker-compose.yaml`, `compose.yml`, `compose.yaml` (v1, v2, v3)
174175

175176
### Check dependencies introduced through the CLI
176177

src/twyn/base/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@
3333
"pnpm-lock.yaml": dependency_parser.PnpmLockParser,
3434
"yarn.lock": dependency_parser.YarnLockParser,
3535
"Dockerfile": dependency_parser.DockerfileParser,
36+
"docker-compose.yml": dependency_parser.DockerComposeParser,
37+
"docker-compose.yaml": dependency_parser.DockerComposeParser,
38+
"compose.yml": dependency_parser.DockerComposeParser,
39+
"compose.yaml": dependency_parser.DockerComposeParser,
3640
}
3741
"""Mapping of dependency file names to their parser classes."""
3842

src/twyn/dependency_managers/managers.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33

44
from twyn.dependency_managers.exceptions import NoMatchingDependencyManagerError
55
from twyn.dependency_parser.parsers.constants import (
6+
COMPOSE_YAML,
7+
COMPOSE_YML,
8+
DOCKER_COMPOSE_YAML,
9+
DOCKER_COMPOSE_YML,
610
DOCKERFILE,
711
PACKAGE_LOCK_JSON,
812
PNPM_LOCK_YAML,
@@ -66,7 +70,7 @@ def get_alternative_source(self, sources: dict[str, str]) -> str | None:
6670
dockerhub_dependency_manager = DependencyManager(
6771
name="dockerhub",
6872
trusted_packages_source=TopDockerHubReference,
69-
dependency_files={DOCKERFILE},
73+
dependency_files={DOCKERFILE, DOCKER_COMPOSE_YML, DOCKER_COMPOSE_YAML, COMPOSE_YML, COMPOSE_YAML},
7074
trusted_packages_manager=TrustedDockerHubPackageManager,
7175
)
7276

src/twyn/dependency_parser/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Dependency parsers."""
22

3+
from twyn.dependency_parser.parsers.docker_compose_parser import DockerComposeParser
34
from twyn.dependency_parser.parsers.dockerfile_parser import DockerfileParser
45
from twyn.dependency_parser.parsers.lock_parser import PoetryLockParser, UvLockParser
56
from twyn.dependency_parser.parsers.package_lock_json import PackageLockJsonParser
@@ -15,4 +16,5 @@
1516
"YarnLockParser",
1617
"PnpmLockParser",
1718
"DockerfileParser",
19+
"DockerComposeParser",
1820
]

src/twyn/dependency_parser/dependency_selector.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,10 +42,12 @@ def get_dependency_file_parsers_from_file_name(self) -> list[AbstractParser]:
4242
"""Get parsers for dependency files based on their names."""
4343
parsers = []
4444
for dependency_file in self.dependency_files:
45+
dependency_filename = Path(dependency_file).name
4546
for known_dependency_file_name in DEPENDENCY_FILE_MAPPING:
46-
if dependency_file.endswith(known_dependency_file_name):
47+
if dependency_filename == known_dependency_file_name:
4748
file_parser = DEPENDENCY_FILE_MAPPING[known_dependency_file_name](dependency_file)
4849
parsers.append(file_parser)
50+
break
4951
if not parsers:
5052
raise NoMatchingParserError
5153

src/twyn/dependency_parser/parsers/constants.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,15 @@
1818

1919
DOCKERFILE = "Dockerfile"
2020
"""Filename for Docker container definition files."""
21+
22+
DOCKER_COMPOSE_YML = "docker-compose.yml"
23+
"""Filename for Docker Compose configuration files."""
24+
25+
DOCKER_COMPOSE_YAML = "docker-compose.yaml"
26+
"""Alternative filename for Docker Compose configuration files."""
27+
28+
COMPOSE_YML = "compose.yml"
29+
"""Modern filename for Docker Compose configuration files."""
30+
31+
COMPOSE_YAML = "compose.yaml"
32+
"""Modern alternative filename for Docker Compose configuration files."""
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import logging
2+
import re
3+
4+
import yaml
5+
from typing_extensions import override
6+
7+
from twyn.dependency_parser.parsers.abstract_parser import AbstractParser
8+
from twyn.dependency_parser.parsers.constants import DOCKER_COMPOSE_YML
9+
10+
logger = logging.getLogger("twyn")
11+
12+
13+
class DockerComposeParser(AbstractParser):
14+
"""Parser for docker-compose.yml dependencies (service images)."""
15+
16+
# Pattern for variable substitution in docker-compose
17+
# Supports ${VAR}, ${VAR:-default}, ${VAR-default}, ${VAR:?error}
18+
VARIABLE_PATTERN = re.compile(
19+
r"\$\{(?P<name>[a-zA-Z_][a-zA-Z0-9_]*)(?::-(?P<default>[^}]+))?\}|\$(?P<short_name>[a-zA-Z_][a-zA-Z0-9_]*)"
20+
)
21+
22+
def __init__(self, file_path: str = DOCKER_COMPOSE_YML) -> None:
23+
super().__init__(file_path)
24+
25+
@override
26+
def parse(self) -> set[str]:
27+
"""Parse docker-compose.yml and return image names from services.
28+
29+
Extracts images from service definitions and handles variable substitution.
30+
"""
31+
with self.file_handler.open("r") as fp:
32+
try:
33+
compose_data = yaml.safe_load(fp)
34+
except yaml.YAMLError as e:
35+
logger.warning("Failed to parse docker-compose file: %s", e)
36+
return set()
37+
38+
if not compose_data:
39+
return set()
40+
41+
images: set[str] = set()
42+
43+
# Handle both docker-compose v2/v3 format (services at root)
44+
# and older formats
45+
services = compose_data.get("services", {})
46+
if not services:
47+
# Try legacy format where services are at root level
48+
services = {k: v for k, v in compose_data.items() if isinstance(v, dict) and "image" in v}
49+
50+
for service_config in services.values():
51+
if not isinstance(service_config, dict):
52+
continue
53+
54+
image = service_config.get("image")
55+
if image:
56+
# Resolve any environment variables
57+
resolved_image = self._resolve_variables(str(image))
58+
# Extract image name without tag
59+
image_name = self._extract_image_name(resolved_image)
60+
if image_name and not self._has_unresolved_variables(image_name):
61+
images.add(image_name)
62+
63+
return images
64+
65+
def _resolve_variables(self, text: str) -> str:
66+
"""Resolve variable substitutions in text.
67+
68+
Note: Unlike Dockerfile, docker-compose variables come from the
69+
environment, so we can only resolve those with default values.
70+
"""
71+
72+
def replace_var(match: re.Match[str]) -> str:
73+
default = match.group("default") if match.group("name") else None
74+
75+
# Without access to actual env vars, return default if available
76+
if default is not None:
77+
return default
78+
79+
# Keep the variable reference if no default
80+
return match.group(0)
81+
82+
return self.VARIABLE_PATTERN.sub(replace_var, text)
83+
84+
def _has_unresolved_variables(self, text: str) -> bool:
85+
"""Check if text still contains unresolved variable references."""
86+
return bool(self.VARIABLE_PATTERN.search(text))
87+
88+
def _extract_image_name(self, image_with_tag: str) -> str:
89+
"""Extract image name without tag/version/digest from a Docker image reference.
90+
91+
Examples:
92+
ubuntu:20.04 -> ubuntu
93+
node:16-alpine -> node
94+
registry.hub.docker.com/library/nginx:latest -> registry.hub.docker.com/library/nginx
95+
localhost:5000/myapp:v1.0 -> localhost:5000/myapp
96+
redis:7 -> redis
97+
nginx@sha256:23q... -> nginx
98+
"""
99+
# Strip off the digest FIRST
100+
if "@" in image_with_tag:
101+
image_with_tag = image_with_tag.split("@")[0]
102+
103+
# Find the last ':' in the string
104+
last_colon_idx = image_with_tag.rfind(":")
105+
106+
if last_colon_idx == -1:
107+
# No colon found, return as-is
108+
return image_with_tag
109+
110+
potential_tag = image_with_tag[last_colon_idx + 1 :]
111+
name_part = image_with_tag[:last_colon_idx]
112+
113+
# Check if this looks like a port number (registry:port/path pattern)
114+
# A port is indicated by the pattern hostname:port/path where:
115+
# - The part after colon is purely numeric (port)
116+
# - There's a slash after the port (path to image)
117+
if potential_tag.isdigit() and "/" in image_with_tag[last_colon_idx + 1 :]:
118+
# This looks like a registry with port, don't strip it
119+
return image_with_tag
120+
121+
# Otherwise, strip the tag
122+
return name_part

tests/conftest.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,54 @@ def package_lock_json_file_with_namespace_typo(tmp_path: Path) -> Iterator[Path]
654654
yield tmp_file
655655

656656

657+
@pytest.fixture
658+
def docker_compose_file(tmp_path: Path) -> Iterator[Path]:
659+
"""Docker compose file."""
660+
docker_compose = tmp_path / "docker-compose.yml"
661+
data = """version: '3.8'
662+
663+
services:
664+
web:
665+
image: nginx:latest
666+
667+
api:
668+
image: my-registry.io/backend/python:3.11-slim
669+
670+
custom:
671+
image: ${CUSTOM_IMAGE:-default-image}:${VERSION:-latest}
672+
673+
with-digest:
674+
image: alpine@sha256:123456789abcdef
675+
676+
build-only:
677+
build:
678+
context: ./app
679+
dockerfile: Dockerfile
680+
681+
unresolved-var:
682+
image: ${REGISTRY}/myapp:${TAG}
683+
684+
private-registry:
685+
image: internal-registry.company.com:5000/team/service:v1.2.3
686+
"""
687+
with create_tmp_file(docker_compose, data) as tmp_file:
688+
yield tmp_file
689+
690+
691+
@pytest.fixture
692+
def docker_compose_file_legacy(tmp_path: Path) -> Iterator[Path]:
693+
"""Docker compose file in legacy v1 format."""
694+
docker_compose = tmp_path / "docker-compose.yml"
695+
data = """web:
696+
image: nginx:latest
697+
698+
db:
699+
image: mysql:8
700+
"""
701+
with create_tmp_file(docker_compose, data) as tmp_file:
702+
yield tmp_file
703+
704+
657705
@pytest.fixture(autouse=True)
658706
def fail_on_requests_get(request) -> Generator[None, Any, None]:
659707
with mock.patch("requests.get") as m_get:

tests/dependency_parser/test_dependency_parser.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import pytest
55
from twyn.dependency_parser import (
6+
DockerComposeParser,
67
PackageLockJsonParser,
78
PnpmLockParser,
89
PoetryLockParser,
@@ -167,3 +168,29 @@ def test_dockefile_parser(self, dockerfile: Path) -> None:
167168
"my-registry/nginx",
168169
"nginx",
169170
}
171+
172+
173+
class TestDockerComposeParser:
174+
def test_docker_compose_parser(self, docker_compose_file: Path) -> None:
175+
parser = DockerComposeParser(file_path=str(docker_compose_file))
176+
result = parser.parse()
177+
178+
# Should find all services with explicit images
179+
assert "nginx" in result
180+
assert "my-registry.io/backend/python" in result
181+
assert "default-image" in result # Variable with default resolved
182+
assert "alpine" in result # Image with digest
183+
assert "internal-registry.company.com:5000/team/service" in result
184+
185+
# Should NOT include services with only build context (no image)
186+
# Should NOT include services with unresolved variables
187+
assert len(result) == 5
188+
189+
def test_docker_compose_parser_legacy_format(self, docker_compose_file_legacy: Path) -> None:
190+
parser = DockerComposeParser(file_path=str(docker_compose_file_legacy))
191+
result = parser.parse()
192+
193+
# Should find services in v1 format (without services key)
194+
assert "nginx" in result
195+
assert "mysql" in result
196+
assert len(result) == 2

tests/dependency_parser/test_dependency_selector.py

Lines changed: 17 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,13 @@
11
from pathlib import Path
22

33
import pytest
4-
from twyn.dependency_parser import PoetryLockParser, RequirementsTxtParser, UvLockParser
4+
from twyn.dependency_parser import (
5+
DockerComposeParser,
6+
DockerfileParser,
7+
PoetryLockParser,
8+
RequirementsTxtParser,
9+
UvLockParser,
10+
)
511
from twyn.dependency_parser.dependency_selector import DependencySelector
612
from twyn.dependency_parser.exceptions import (
713
NoMatchingParserError,
@@ -15,17 +21,16 @@ class TestDependencySelector:
1521
@pytest.mark.parametrize(
1622
("file_name", "parser_class"),
1723
[
18-
(
19-
"requirements.txt",
20-
RequirementsTxtParser,
21-
), # because file is specified, we won't autocheck
24+
("requirements.txt", RequirementsTxtParser),
2225
("poetry.lock", PoetryLockParser),
2326
("uv.lock", UvLockParser),
24-
("/some/path/poetry.lock", PoetryLockParser),
25-
("/some/path/uv.lock", UvLockParser),
26-
("/some/path/requirements.txt", RequirementsTxtParser),
27-
("/some/path/yarn.lock", YarnLockParser),
28-
("/some/path/package-lock.json", PackageLockJsonParser),
27+
("yarn.lock", YarnLockParser),
28+
("package-lock.json", PackageLockJsonParser),
29+
("Dockerfile", DockerfileParser),
30+
("docker-compose.yml", DockerComposeParser),
31+
("docker-compose.yaml", DockerComposeParser),
32+
("compose.yml", DockerComposeParser),
33+
("compose.yaml", DockerComposeParser),
2934
],
3035
)
3136
def test_get_dependency_parser(self, file_name: str, parser_class: type[AbstractParser]) -> None:
@@ -35,31 +40,18 @@ def test_get_dependency_parser(self, file_name: str, parser_class: type[Abstract
3540
assert isinstance(parser[0], parser_class)
3641
assert str(parser[0].file_handler.file_path).endswith(file_name)
3742

38-
def test_get_dependency_parser_auto_detect_requirements_file(
39-
self, requirements_txt_file: Path, tmp_path: Path
40-
) -> None:
43+
def test_get_dependency_parser_auto_detects_file(self, requirements_txt_file: Path, tmp_path: Path) -> None:
4144
parser = DependencySelector("", root_path=str(tmp_path)).get_dependency_parsers()
4245
assert isinstance(parser[0], RequirementsTxtParser)
4346

44-
def test_get_dependency_parser_auto_detect_poetry_lock_file(
45-
self, poetry_lock_file_ge_1_5: Path, tmp_path: Path
46-
) -> None:
47-
selector = DependencySelector("", root_path=str(tmp_path))
48-
parser = selector.get_dependency_parsers()
49-
assert isinstance(parser[0], PoetryLockParser)
50-
51-
def test_get_dependency_parser_auto_detect_uv_lock_file(self, uv_lock_file: Path, tmp_path: Path) -> None:
52-
parser = DependencySelector("", root_path=str(tmp_path)).get_dependency_parsers()
53-
assert isinstance(parser[0], UvLockParser)
54-
5547
def test_auto_detect_dependency_file_parser_exceptions(self, tmp_path: Path) -> None:
5648
with pytest.raises(NoMatchingParserError):
5749
DependencySelector(root_path=str(tmp_path)).get_dependency_parsers()
5850

5951
@pytest.mark.parametrize("file_name", ["unknown.txt", ""])
6052
def test_get_dependency_file_parser_unknown_file_type(self, file_name: str) -> None:
6153
with pytest.raises(NoMatchingParserError):
62-
DependencySelector(file_name).get_dependency_file_parsers_from_file_name()
54+
DependencySelector({file_name}).get_dependency_parsers()
6355

6456
def test_auto_detect_dependency_file_parser_scans_subdirectories(self, tmp_path: Path) -> None:
6557
# Create nested directories and dependency files

0 commit comments

Comments
 (0)