Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 57 additions & 6 deletions cwltool/singularity.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,16 +155,67 @@ def is_version_3_10_or_newer() -> bool:
return version >= Version("3.10")


def _encode_container_image(string: str) -> str:
"""
Produce a unique filename-safe string for a container image.

The input may be a Docker container specifier like "ubuntu",
"ubuntu:latest", "quay.io/adamnovak/hap.py:v0.3.15",
"quay.io/adamnovak/hap.py@sha256:f483da1b1c3d58be028a9599db4255cf1eb8570d50f901f10c8499fc36a94418",
etc., or it may be something with a protocol like "docker://ubuntu" or
"https://library.sylabs.io/v1/imagefile/library/default/alpine:latest", or
it may be a file path not ending in ":latest".

(It may not be a file path ending in ":latest" because it is too hard to
work out that the corresponding path without ":latest" should not have
":latest" appended to it.)

If the input does not have a tag or digest, and a protocol was not used,
the "latest" tag will be used.

Distinct strings that refer to the same image (like "library/ubuntu",
"docker.io/library/ubuntu", and "docker://ubuntu:latest") will not
necessarily always produce the same result.

No two distinct inputs that refer to distinct container images will produce
the same result. The result is human-readable.
"""
# See https://stackoverflow.com/a/43091578 for information on allowed
# patterns of Docker image specifiers.
has_tag_or_digest = re.search(
pattern=r"(:[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}|@[a-z0-9]+([+._-][a-z0-9]+)*:[a-zA-Z0-9=_-]+)$",
string=string,
)

has_protocol = re.search(pattern=r"^[a-zA-Z0-9-_]+://", string=string)

if not has_tag_or_digest and not has_protocol:
# We apply :latest to *everything* that doesn't seem to have a tag or a
# protocol
string += ":latest"

# First we need to escape existing underscores, and then we need to use
# underscore-based escapes to represent characters not allowed in
# filenames. There's sadly no one obvious way to do this.
return string.replace("_", "___").replace("/", "_s_")


def _normalize_image_id(string: str) -> str:
if ":" not in string:
string += "_latest"
return string.replace("/", "_") + ".img"
return _encode_container_image(string) + ".img"


def _normalize_sif_id(string: str) -> str:
if ":" not in string:
string += "_latest"
return string.replace("/", "_") + ".sif"
"""
Produce a .sif filename for a container image.

This is guaranteed to be the same as the path at which cwl-docker-extract
from the cwl-utils project will save the .sif within its target directory,
for inputs supported by cwl-docker-extract.

When two inputs refer to different images, the results for those inputs are
guaranteed to be distinct.
"""
return _encode_container_image(string) + ".sif"


@mypyc_attr(allow_interpreted_subclasses=True)
Expand Down
2 changes: 1 addition & 1 deletion tests/debian_image_id.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ class: CommandLineTool

requirements:
DockerRequirement:
dockerImageId: 'debian:stable-slim.img'
dockerImageId: 'docker.io_s_debian:stable-slim.img'

inputs:
message: string
Expand Down
2 changes: 1 addition & 1 deletion tests/debian_image_id2.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ class: CommandLineTool

requirements:
DockerRequirement:
dockerImageId: 'docker.io_debian:stable-slim.sif'
dockerImageId: 'docker.io_s_debian:stable-slim.sif'

inputs:
message: string
Expand Down
68 changes: 59 additions & 9 deletions tests/test_singularity.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
_IMAGES,
_IMAGES_LOCK,
_inspect_singularity_sandbox_image,
_normalize_sif_id,
)

from .util import (
Expand All @@ -33,6 +34,47 @@ def clear_singularity_image_cache() -> None:
_IMAGES.clear()


def test_normalize_sif_id_resists_collision() -> None:
"""Test that tricky image names can't collide."""
assert _normalize_sif_id("ubuntu:latest") != _normalize_sif_id("ubuntu_latest")
assert _normalize_sif_id("docker://user_name/repo") != _normalize_sif_id(
"docker://user/name_repo"
)
assert _normalize_sif_id("http://something.com/something.sif") != _normalize_sif_id(
"http:_something.com/something.sif"
)


def test_normalize_sif_id_implies_latest() -> None:
"""
Test that image names that imply latest are equivalent to those that
specify it.
"""
assert _normalize_sif_id("ubuntu") == _normalize_sif_id("ubuntu:latest")
assert _normalize_sif_id("quay.io/adamnovak/hap.py") == _normalize_sif_id(
"quay.io/adamnovak/hap.py:latest"
)


def test_normalize_sif_id_does_not_tag_protocols() -> None:
"""
Test that if an image comes from a string with a protocol, no tag is added.
"""
assert _normalize_sif_id("docker://library/ubuntu") != _normalize_sif_id(
"docker://library/ubuntu:latest"
)


def test_normalize_sif_id_matches_cwl_utils_tests() -> None:
"""
Make sure that the particular values tested in cwl-utils get the same
answers here as there.
"""

assert _normalize_sif_id("some_name/repo:123") == "some___name_s_repo:123.sif"
assert _normalize_sif_id("some/name_repo:123") == "some_s_name___repo:123.sif"


@needs_singularity_2_6
def test_singularity_pullfolder(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
"""Test singularity respects SINGULARITY_PULLFOLDER."""
Expand Down Expand Up @@ -133,6 +175,10 @@ def test_singularity_local(tmp_path: Path) -> None:

@needs_singularity_2_6
def test_singularity2_docker_image_id_in_tool(tmp_path: Path) -> None:
"""
Ensure that images end up cached at the expected path and are loadable from
it with Singularity 2.
"""
workdir = tmp_path / "working_dir"
workdir.mkdir()
with working_directory(workdir):
Expand All @@ -157,6 +203,10 @@ def test_singularity2_docker_image_id_in_tool(tmp_path: Path) -> None:

@needs_singularity_3_or_newer
def test_singularity3_docker_image_id_in_tool(tmp_path: Path) -> None:
"""
Ensure that images end up cached at the expected path and are loadable from
it with Singularity 3.
"""
workdir = tmp_path / "working_dir"
workdir.mkdir()
with working_directory(workdir):
Expand Down Expand Up @@ -194,7 +244,7 @@ def test_singularity_dockerfile_no_name_no_cache(tmp_path: Path) -> None:
]
)
assert result_code == 0, stderr
assert not (workdir / "bea92b9b6910cbbd2ae602f5bb0f0f27_latest.sif").exists()
assert not (workdir / _normalize_sif_id("bea92b9b6910cbbd2ae602f5bb0f0f27")).exists()


@needs_singularity_3_or_newer
Expand All @@ -217,8 +267,8 @@ def test_singularity_dockerfile_no_name_with_cache(
]
)
assert result_code == 0, stderr
assert not (workdir / "bea92b9b6910cbbd2ae602f5bb0f0f27_latest.sif").exists()
assert (cachedir / "bea92b9b6910cbbd2ae602f5bb0f0f27_latest.sif").exists()
assert not (workdir / _normalize_sif_id("bea92b9b6910cbbd2ae602f5bb0f0f27")).exists()
assert (cachedir / _normalize_sif_id("bea92b9b6910cbbd2ae602f5bb0f0f27")).exists()


@needs_singularity_3_or_newer
Expand All @@ -236,8 +286,8 @@ def test_singularity_dockerfile_with_name_no_cache(tmp_path: Path) -> None:
)
assert result_code == 0, stderr
print(list(workdir.iterdir()))
assert not (workdir / "bea92b9b6910cbbd2ae602f5bb0f0f27_latest.sif").exists()
assert not (workdir / "customDebian_latest.sif").exists()
assert not (workdir / _normalize_sif_id("bea92b9b6910cbbd2ae602f5bb0f0f27")).exists()
assert not (workdir / _normalize_sif_id("customDebian")).exists()


@needs_singularity_3_or_newer
Expand All @@ -262,10 +312,10 @@ def test_singularity_dockerfile_with_name_with_cache(
print(list(workdir.iterdir()))
print(list(cachedir.iterdir()))
assert result_code == 0, stderr
assert not (workdir / "bea92b9b6910cbbd2ae602f5bb0f0f27_latest.sif").exists()
assert not (cachedir / "bea92b9b6910cbbd2ae602f5bb0f0f27_latest.sif").exists()
assert not (workdir / "customDebian_latest.sif").exists()
assert (cachedir / "customDebian_latest.sif").exists()
assert not (workdir / _normalize_sif_id("bea92b9b6910cbbd2ae602f5bb0f0f27")).exists()
assert not (cachedir / _normalize_sif_id("bea92b9b6910cbbd2ae602f5bb0f0f27")).exists()
assert not (workdir / _normalize_sif_id("customDebian")).exists()
assert (cachedir / _normalize_sif_id("customDebian")).exists()


@needs_singularity
Expand Down