diff --git a/cwltool/singularity.py b/cwltool/singularity.py index 11380e3ed..3ed85c189 100644 --- a/cwltool/singularity.py +++ b/cwltool/singularity.py @@ -155,16 +155,67 @@ def is_version_3_10_or_newer() -> bool: return version >= Version("3.10") +def _encode_container_image(string: str) -> str: + """ + Produce a unique filename-safe string for a container image. + + The input may be a Docker container specifier like "ubuntu", + "ubuntu:latest", "quay.io/adamnovak/hap.py:v0.3.15", + "quay.io/adamnovak/hap.py@sha256:f483da1b1c3d58be028a9599db4255cf1eb8570d50f901f10c8499fc36a94418", + etc., or it may be something with a protocol like "docker://ubuntu" or + "https://library.sylabs.io/v1/imagefile/library/default/alpine:latest", or + it may be a file path not ending in ":latest". + + (It may not be a file path ending in ":latest" because it is too hard to + work out that the corresponding path without ":latest" should not have + ":latest" appended to it.) + + If the input does not have a tag or digest, and a protocol was not used, + the "latest" tag will be used. + + Distinct strings that refer to the same image (like "library/ubuntu", + "docker.io/library/ubuntu", and "docker://ubuntu:latest") will not + necessarily always produce the same result. + + No two distinct inputs that refer to distinct container images will produce + the same result. The result is human-readable. + """ + # See https://stackoverflow.com/a/43091578 for information on allowed + # patterns of Docker image specifiers. + has_tag_or_digest = re.search( + pattern=r"(:[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}|@[a-z0-9]+([+._-][a-z0-9]+)*:[a-zA-Z0-9=_-]+)$", + string=string, + ) + + has_protocol = re.search(pattern=r"^[a-zA-Z0-9-_]+://", string=string) + + if not has_tag_or_digest and not has_protocol: + # We apply :latest to *everything* that doesn't seem to have a tag or a + # protocol + string += ":latest" + + # First we need to escape existing underscores, and then we need to use + # underscore-based escapes to represent characters not allowed in + # filenames. There's sadly no one obvious way to do this. + return string.replace("_", "___").replace("/", "_s_") + + def _normalize_image_id(string: str) -> str: - if ":" not in string: - string += "_latest" - return string.replace("/", "_") + ".img" + return _encode_container_image(string) + ".img" def _normalize_sif_id(string: str) -> str: - if ":" not in string: - string += "_latest" - return string.replace("/", "_") + ".sif" + """ + Produce a .sif filename for a container image. + + This is guaranteed to be the same as the path at which cwl-docker-extract + from the cwl-utils project will save the .sif within its target directory, + for inputs supported by cwl-docker-extract. + + When two inputs refer to different images, the results for those inputs are + guaranteed to be distinct. + """ + return _encode_container_image(string) + ".sif" @mypyc_attr(allow_interpreted_subclasses=True) diff --git a/tests/debian_image_id.cwl b/tests/debian_image_id.cwl index 79ef9185b..9b2a7ece6 100755 --- a/tests/debian_image_id.cwl +++ b/tests/debian_image_id.cwl @@ -4,7 +4,7 @@ class: CommandLineTool requirements: DockerRequirement: - dockerImageId: 'debian:stable-slim.img' + dockerImageId: 'docker.io_s_debian:stable-slim.img' inputs: message: string diff --git a/tests/debian_image_id2.cwl b/tests/debian_image_id2.cwl index 5982a44c9..cbe90e8e7 100755 --- a/tests/debian_image_id2.cwl +++ b/tests/debian_image_id2.cwl @@ -4,7 +4,7 @@ class: CommandLineTool requirements: DockerRequirement: - dockerImageId: 'docker.io_debian:stable-slim.sif' + dockerImageId: 'docker.io_s_debian:stable-slim.sif' inputs: message: string diff --git a/tests/test_singularity.py b/tests/test_singularity.py index a6a1f7b4f..af66a4ddc 100644 --- a/tests/test_singularity.py +++ b/tests/test_singularity.py @@ -15,6 +15,7 @@ _IMAGES, _IMAGES_LOCK, _inspect_singularity_sandbox_image, + _normalize_sif_id, ) from .util import ( @@ -33,6 +34,47 @@ def clear_singularity_image_cache() -> None: _IMAGES.clear() +def test_normalize_sif_id_resists_collision() -> None: + """Test that tricky image names can't collide.""" + assert _normalize_sif_id("ubuntu:latest") != _normalize_sif_id("ubuntu_latest") + assert _normalize_sif_id("docker://user_name/repo") != _normalize_sif_id( + "docker://user/name_repo" + ) + assert _normalize_sif_id("http://something.com/something.sif") != _normalize_sif_id( + "http:_something.com/something.sif" + ) + + +def test_normalize_sif_id_implies_latest() -> None: + """ + Test that image names that imply latest are equivalent to those that + specify it. + """ + assert _normalize_sif_id("ubuntu") == _normalize_sif_id("ubuntu:latest") + assert _normalize_sif_id("quay.io/adamnovak/hap.py") == _normalize_sif_id( + "quay.io/adamnovak/hap.py:latest" + ) + + +def test_normalize_sif_id_does_not_tag_protocols() -> None: + """ + Test that if an image comes from a string with a protocol, no tag is added. + """ + assert _normalize_sif_id("docker://library/ubuntu") != _normalize_sif_id( + "docker://library/ubuntu:latest" + ) + + +def test_normalize_sif_id_matches_cwl_utils_tests() -> None: + """ + Make sure that the particular values tested in cwl-utils get the same + answers here as there. + """ + + assert _normalize_sif_id("some_name/repo:123") == "some___name_s_repo:123.sif" + assert _normalize_sif_id("some/name_repo:123") == "some_s_name___repo:123.sif" + + @needs_singularity_2_6 def test_singularity_pullfolder(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: """Test singularity respects SINGULARITY_PULLFOLDER.""" @@ -133,6 +175,10 @@ def test_singularity_local(tmp_path: Path) -> None: @needs_singularity_2_6 def test_singularity2_docker_image_id_in_tool(tmp_path: Path) -> None: + """ + Ensure that images end up cached at the expected path and are loadable from + it with Singularity 2. + """ workdir = tmp_path / "working_dir" workdir.mkdir() with working_directory(workdir): @@ -157,6 +203,10 @@ def test_singularity2_docker_image_id_in_tool(tmp_path: Path) -> None: @needs_singularity_3_or_newer def test_singularity3_docker_image_id_in_tool(tmp_path: Path) -> None: + """ + Ensure that images end up cached at the expected path and are loadable from + it with Singularity 3. + """ workdir = tmp_path / "working_dir" workdir.mkdir() with working_directory(workdir): @@ -194,7 +244,7 @@ def test_singularity_dockerfile_no_name_no_cache(tmp_path: Path) -> None: ] ) assert result_code == 0, stderr - assert not (workdir / "bea92b9b6910cbbd2ae602f5bb0f0f27_latest.sif").exists() + assert not (workdir / _normalize_sif_id("bea92b9b6910cbbd2ae602f5bb0f0f27")).exists() @needs_singularity_3_or_newer @@ -217,8 +267,8 @@ def test_singularity_dockerfile_no_name_with_cache( ] ) assert result_code == 0, stderr - assert not (workdir / "bea92b9b6910cbbd2ae602f5bb0f0f27_latest.sif").exists() - assert (cachedir / "bea92b9b6910cbbd2ae602f5bb0f0f27_latest.sif").exists() + assert not (workdir / _normalize_sif_id("bea92b9b6910cbbd2ae602f5bb0f0f27")).exists() + assert (cachedir / _normalize_sif_id("bea92b9b6910cbbd2ae602f5bb0f0f27")).exists() @needs_singularity_3_or_newer @@ -236,8 +286,8 @@ def test_singularity_dockerfile_with_name_no_cache(tmp_path: Path) -> None: ) assert result_code == 0, stderr print(list(workdir.iterdir())) - assert not (workdir / "bea92b9b6910cbbd2ae602f5bb0f0f27_latest.sif").exists() - assert not (workdir / "customDebian_latest.sif").exists() + assert not (workdir / _normalize_sif_id("bea92b9b6910cbbd2ae602f5bb0f0f27")).exists() + assert not (workdir / _normalize_sif_id("customDebian")).exists() @needs_singularity_3_or_newer @@ -262,10 +312,10 @@ def test_singularity_dockerfile_with_name_with_cache( print(list(workdir.iterdir())) print(list(cachedir.iterdir())) assert result_code == 0, stderr - assert not (workdir / "bea92b9b6910cbbd2ae602f5bb0f0f27_latest.sif").exists() - assert not (cachedir / "bea92b9b6910cbbd2ae602f5bb0f0f27_latest.sif").exists() - assert not (workdir / "customDebian_latest.sif").exists() - assert (cachedir / "customDebian_latest.sif").exists() + assert not (workdir / _normalize_sif_id("bea92b9b6910cbbd2ae602f5bb0f0f27")).exists() + assert not (cachedir / _normalize_sif_id("bea92b9b6910cbbd2ae602f5bb0f0f27")).exists() + assert not (workdir / _normalize_sif_id("customDebian")).exists() + assert (cachedir / _normalize_sif_id("customDebian")).exists() @needs_singularity