diff --git a/README.md b/README.md index c5dcb95..84f33d5 100644 --- a/README.md +++ b/README.md @@ -171,6 +171,7 @@ The following dependency file formats are supported: - `yarn.lock` (v1, v2) - `pnpm-lock.yaml` (v9) - `Dockerfile` +- `docker-compose.yml`, `docker-compose.yaml`, `compose.yml`, `compose.yaml` (v1, v2, v3) ### Check dependencies introduced through the CLI diff --git a/src/twyn/base/constants.py b/src/twyn/base/constants.py index b2f416f..ace9f74 100644 --- a/src/twyn/base/constants.py +++ b/src/twyn/base/constants.py @@ -33,6 +33,10 @@ "pnpm-lock.yaml": dependency_parser.PnpmLockParser, "yarn.lock": dependency_parser.YarnLockParser, "Dockerfile": dependency_parser.DockerfileParser, + "docker-compose.yml": dependency_parser.DockerComposeParser, + "docker-compose.yaml": dependency_parser.DockerComposeParser, + "compose.yml": dependency_parser.DockerComposeParser, + "compose.yaml": dependency_parser.DockerComposeParser, } """Mapping of dependency file names to their parser classes.""" diff --git a/src/twyn/dependency_managers/managers.py b/src/twyn/dependency_managers/managers.py index ed5eaf6..5f8b1d9 100644 --- a/src/twyn/dependency_managers/managers.py +++ b/src/twyn/dependency_managers/managers.py @@ -3,6 +3,10 @@ from twyn.dependency_managers.exceptions import NoMatchingDependencyManagerError from twyn.dependency_parser.parsers.constants import ( + COMPOSE_YAML, + COMPOSE_YML, + DOCKER_COMPOSE_YAML, + DOCKER_COMPOSE_YML, DOCKERFILE, PACKAGE_LOCK_JSON, PNPM_LOCK_YAML, @@ -66,7 +70,7 @@ def get_alternative_source(self, sources: dict[str, str]) -> str | None: dockerhub_dependency_manager = DependencyManager( name="dockerhub", trusted_packages_source=TopDockerHubReference, - dependency_files={DOCKERFILE}, + dependency_files={DOCKERFILE, DOCKER_COMPOSE_YML, DOCKER_COMPOSE_YAML, COMPOSE_YML, COMPOSE_YAML}, trusted_packages_manager=TrustedDockerHubPackageManager, ) diff --git a/src/twyn/dependency_parser/__init__.py b/src/twyn/dependency_parser/__init__.py index 837c8fa..b604523 100644 --- a/src/twyn/dependency_parser/__init__.py +++ b/src/twyn/dependency_parser/__init__.py @@ -1,5 +1,6 @@ """Dependency parsers.""" +from twyn.dependency_parser.parsers.docker_compose_parser import DockerComposeParser from twyn.dependency_parser.parsers.dockerfile_parser import DockerfileParser from twyn.dependency_parser.parsers.lock_parser import PoetryLockParser, UvLockParser from twyn.dependency_parser.parsers.package_lock_json import PackageLockJsonParser @@ -15,4 +16,5 @@ "YarnLockParser", "PnpmLockParser", "DockerfileParser", + "DockerComposeParser", ] diff --git a/src/twyn/dependency_parser/dependency_selector.py b/src/twyn/dependency_parser/dependency_selector.py index fe12877..1c1c91f 100644 --- a/src/twyn/dependency_parser/dependency_selector.py +++ b/src/twyn/dependency_parser/dependency_selector.py @@ -42,10 +42,12 @@ def get_dependency_file_parsers_from_file_name(self) -> list[AbstractParser]: """Get parsers for dependency files based on their names.""" parsers = [] for dependency_file in self.dependency_files: + dependency_filename = Path(dependency_file).name for known_dependency_file_name in DEPENDENCY_FILE_MAPPING: - if dependency_file.endswith(known_dependency_file_name): + if dependency_filename == known_dependency_file_name: file_parser = DEPENDENCY_FILE_MAPPING[known_dependency_file_name](dependency_file) parsers.append(file_parser) + break if not parsers: raise NoMatchingParserError diff --git a/src/twyn/dependency_parser/parsers/constants.py b/src/twyn/dependency_parser/parsers/constants.py index aa9c1f0..dcbad3b 100644 --- a/src/twyn/dependency_parser/parsers/constants.py +++ b/src/twyn/dependency_parser/parsers/constants.py @@ -18,3 +18,15 @@ DOCKERFILE = "Dockerfile" """Filename for Docker container definition files.""" + +DOCKER_COMPOSE_YML = "docker-compose.yml" +"""Filename for Docker Compose configuration files.""" + +DOCKER_COMPOSE_YAML = "docker-compose.yaml" +"""Alternative filename for Docker Compose configuration files.""" + +COMPOSE_YML = "compose.yml" +"""Modern filename for Docker Compose configuration files.""" + +COMPOSE_YAML = "compose.yaml" +"""Modern alternative filename for Docker Compose configuration files.""" diff --git a/src/twyn/dependency_parser/parsers/docker_compose_parser.py b/src/twyn/dependency_parser/parsers/docker_compose_parser.py new file mode 100644 index 0000000..86a4377 --- /dev/null +++ b/src/twyn/dependency_parser/parsers/docker_compose_parser.py @@ -0,0 +1,122 @@ +import logging +import re + +import yaml +from typing_extensions import override + +from twyn.dependency_parser.parsers.abstract_parser import AbstractParser +from twyn.dependency_parser.parsers.constants import DOCKER_COMPOSE_YML + +logger = logging.getLogger("twyn") + + +class DockerComposeParser(AbstractParser): + """Parser for docker-compose.yml dependencies (service images).""" + + # Pattern for variable substitution in docker-compose + # Supports ${VAR}, ${VAR:-default}, ${VAR-default}, ${VAR:?error} + VARIABLE_PATTERN = re.compile( + r"\$\{(?P[a-zA-Z_][a-zA-Z0-9_]*)(?::-(?P[^}]+))?\}|\$(?P[a-zA-Z_][a-zA-Z0-9_]*)" + ) + + def __init__(self, file_path: str = DOCKER_COMPOSE_YML) -> None: + super().__init__(file_path) + + @override + def parse(self) -> set[str]: + """Parse docker-compose.yml and return image names from services. + + Extracts images from service definitions and handles variable substitution. + """ + with self.file_handler.open("r") as fp: + try: + compose_data = yaml.safe_load(fp) + except yaml.YAMLError as e: + logger.warning("Failed to parse docker-compose file: %s", e) + return set() + + if not compose_data: + return set() + + images: set[str] = set() + + # Handle both docker-compose v2/v3 format (services at root) + # and older formats + services = compose_data.get("services", {}) + if not services: + # Try legacy format where services are at root level + services = {k: v for k, v in compose_data.items() if isinstance(v, dict) and "image" in v} + + for service_config in services.values(): + if not isinstance(service_config, dict): + continue + + image = service_config.get("image") + if image: + # Resolve any environment variables + resolved_image = self._resolve_variables(str(image)) + # Extract image name without tag + image_name = self._extract_image_name(resolved_image) + if image_name and not self._has_unresolved_variables(image_name): + images.add(image_name) + + return images + + def _resolve_variables(self, text: str) -> str: + """Resolve variable substitutions in text. + + Note: Unlike Dockerfile, docker-compose variables come from the + environment, so we can only resolve those with default values. + """ + + def replace_var(match: re.Match[str]) -> str: + default = match.group("default") if match.group("name") else None + + # Without access to actual env vars, return default if available + if default is not None: + return default + + # Keep the variable reference if no default + return match.group(0) + + return self.VARIABLE_PATTERN.sub(replace_var, text) + + def _has_unresolved_variables(self, text: str) -> bool: + """Check if text still contains unresolved variable references.""" + return bool(self.VARIABLE_PATTERN.search(text)) + + def _extract_image_name(self, image_with_tag: str) -> str: + """Extract image name without tag/version/digest from a Docker image reference. + + Examples: + ubuntu:20.04 -> ubuntu + node:16-alpine -> node + registry.hub.docker.com/library/nginx:latest -> registry.hub.docker.com/library/nginx + localhost:5000/myapp:v1.0 -> localhost:5000/myapp + redis:7 -> redis + nginx@sha256:23q... -> nginx + """ + # Strip off the digest FIRST + if "@" in image_with_tag: + image_with_tag = image_with_tag.split("@")[0] + + # Find the last ':' in the string + last_colon_idx = image_with_tag.rfind(":") + + if last_colon_idx == -1: + # No colon found, return as-is + return image_with_tag + + potential_tag = image_with_tag[last_colon_idx + 1 :] + name_part = image_with_tag[:last_colon_idx] + + # Check if this looks like a port number (registry:port/path pattern) + # A port is indicated by the pattern hostname:port/path where: + # - The part after colon is purely numeric (port) + # - There's a slash after the port (path to image) + if potential_tag.isdigit() and "/" in image_with_tag[last_colon_idx + 1 :]: + # This looks like a registry with port, don't strip it + return image_with_tag + + # Otherwise, strip the tag + return name_part diff --git a/tests/conftest.py b/tests/conftest.py index 5559c8e..6146957 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -654,6 +654,54 @@ def package_lock_json_file_with_namespace_typo(tmp_path: Path) -> Iterator[Path] yield tmp_file +@pytest.fixture +def docker_compose_file(tmp_path: Path) -> Iterator[Path]: + """Docker compose file.""" + docker_compose = tmp_path / "docker-compose.yml" + data = """version: '3.8' + +services: + web: + image: nginx:latest + + api: + image: my-registry.io/backend/python:3.11-slim + + custom: + image: ${CUSTOM_IMAGE:-default-image}:${VERSION:-latest} + + with-digest: + image: alpine@sha256:123456789abcdef + + build-only: + build: + context: ./app + dockerfile: Dockerfile + + unresolved-var: + image: ${REGISTRY}/myapp:${TAG} + + private-registry: + image: internal-registry.company.com:5000/team/service:v1.2.3 +""" + with create_tmp_file(docker_compose, data) as tmp_file: + yield tmp_file + + +@pytest.fixture +def docker_compose_file_legacy(tmp_path: Path) -> Iterator[Path]: + """Docker compose file in legacy v1 format.""" + docker_compose = tmp_path / "docker-compose.yml" + data = """web: + image: nginx:latest + +db: + image: mysql:8 +""" + with create_tmp_file(docker_compose, data) as tmp_file: + yield tmp_file + + @pytest.fixture(autouse=True) def fail_on_requests_get(request) -> Generator[None, Any, None]: with mock.patch("requests.get") as m_get: diff --git a/tests/dependency_parser/test_dependency_parser.py b/tests/dependency_parser/test_dependency_parser.py index 3ae218c..5b723a4 100644 --- a/tests/dependency_parser/test_dependency_parser.py +++ b/tests/dependency_parser/test_dependency_parser.py @@ -3,6 +3,7 @@ import pytest from twyn.dependency_parser import ( + DockerComposeParser, PackageLockJsonParser, PnpmLockParser, PoetryLockParser, @@ -167,3 +168,29 @@ def test_dockefile_parser(self, dockerfile: Path) -> None: "my-registry/nginx", "nginx", } + + +class TestDockerComposeParser: + def test_docker_compose_parser(self, docker_compose_file: Path) -> None: + parser = DockerComposeParser(file_path=str(docker_compose_file)) + result = parser.parse() + + # Should find all services with explicit images + assert "nginx" in result + assert "my-registry.io/backend/python" in result + assert "default-image" in result # Variable with default resolved + assert "alpine" in result # Image with digest + assert "internal-registry.company.com:5000/team/service" in result + + # Should NOT include services with only build context (no image) + # Should NOT include services with unresolved variables + assert len(result) == 5 + + def test_docker_compose_parser_legacy_format(self, docker_compose_file_legacy: Path) -> None: + parser = DockerComposeParser(file_path=str(docker_compose_file_legacy)) + result = parser.parse() + + # Should find services in v1 format (without services key) + assert "nginx" in result + assert "mysql" in result + assert len(result) == 2 diff --git a/tests/dependency_parser/test_dependency_selector.py b/tests/dependency_parser/test_dependency_selector.py index c1ffc0b..e7be4ed 100644 --- a/tests/dependency_parser/test_dependency_selector.py +++ b/tests/dependency_parser/test_dependency_selector.py @@ -1,7 +1,13 @@ from pathlib import Path import pytest -from twyn.dependency_parser import PoetryLockParser, RequirementsTxtParser, UvLockParser +from twyn.dependency_parser import ( + DockerComposeParser, + DockerfileParser, + PoetryLockParser, + RequirementsTxtParser, + UvLockParser, +) from twyn.dependency_parser.dependency_selector import DependencySelector from twyn.dependency_parser.exceptions import ( NoMatchingParserError, @@ -15,17 +21,16 @@ class TestDependencySelector: @pytest.mark.parametrize( ("file_name", "parser_class"), [ - ( - "requirements.txt", - RequirementsTxtParser, - ), # because file is specified, we won't autocheck + ("requirements.txt", RequirementsTxtParser), ("poetry.lock", PoetryLockParser), ("uv.lock", UvLockParser), - ("/some/path/poetry.lock", PoetryLockParser), - ("/some/path/uv.lock", UvLockParser), - ("/some/path/requirements.txt", RequirementsTxtParser), - ("/some/path/yarn.lock", YarnLockParser), - ("/some/path/package-lock.json", PackageLockJsonParser), + ("yarn.lock", YarnLockParser), + ("package-lock.json", PackageLockJsonParser), + ("Dockerfile", DockerfileParser), + ("docker-compose.yml", DockerComposeParser), + ("docker-compose.yaml", DockerComposeParser), + ("compose.yml", DockerComposeParser), + ("compose.yaml", DockerComposeParser), ], ) def test_get_dependency_parser(self, file_name: str, parser_class: type[AbstractParser]) -> None: @@ -35,23 +40,10 @@ def test_get_dependency_parser(self, file_name: str, parser_class: type[Abstract assert isinstance(parser[0], parser_class) assert str(parser[0].file_handler.file_path).endswith(file_name) - def test_get_dependency_parser_auto_detect_requirements_file( - self, requirements_txt_file: Path, tmp_path: Path - ) -> None: + def test_get_dependency_parser_auto_detects_file(self, requirements_txt_file: Path, tmp_path: Path) -> None: parser = DependencySelector("", root_path=str(tmp_path)).get_dependency_parsers() assert isinstance(parser[0], RequirementsTxtParser) - def test_get_dependency_parser_auto_detect_poetry_lock_file( - self, poetry_lock_file_ge_1_5: Path, tmp_path: Path - ) -> None: - selector = DependencySelector("", root_path=str(tmp_path)) - parser = selector.get_dependency_parsers() - assert isinstance(parser[0], PoetryLockParser) - - def test_get_dependency_parser_auto_detect_uv_lock_file(self, uv_lock_file: Path, tmp_path: Path) -> None: - parser = DependencySelector("", root_path=str(tmp_path)).get_dependency_parsers() - assert isinstance(parser[0], UvLockParser) - def test_auto_detect_dependency_file_parser_exceptions(self, tmp_path: Path) -> None: with pytest.raises(NoMatchingParserError): DependencySelector(root_path=str(tmp_path)).get_dependency_parsers() @@ -59,7 +51,7 @@ def test_auto_detect_dependency_file_parser_exceptions(self, tmp_path: Path) -> @pytest.mark.parametrize("file_name", ["unknown.txt", ""]) def test_get_dependency_file_parser_unknown_file_type(self, file_name: str) -> None: with pytest.raises(NoMatchingParserError): - DependencySelector(file_name).get_dependency_file_parsers_from_file_name() + DependencySelector({file_name}).get_dependency_parsers() def test_auto_detect_dependency_file_parser_scans_subdirectories(self, tmp_path: Path) -> None: # Create nested directories and dependency files