From 56b5058245b08c4f236c2daf82164d7e27ad29f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Tosser?= Date: Tue, 24 Mar 2026 15:04:36 +0100 Subject: [PATCH 1/4] feat: add files.unarchive operation Extract archive files on remote systems. Supports tar (.tar, .tar.gz, .tar.bz2, .tar.xz, .tar.zst) and zip formats. Archives can be local (uploaded automatically) or already on the remote host. Includes idempotency via `creates` parameter and optional chown support. Closes #1546 --- src/pyinfra/operations/files.py | 119 ++++++++++++++++++ .../files.unarchive/dest_not_directory.json | 16 +++ .../files.unarchive/extract_local_tar_gz.json | 16 +++ .../extract_remote_tar_gz.json | 18 +++ .../files.unarchive/extract_remote_zip.json | 18 +++ .../files.unarchive/extract_with_chown.json | 21 ++++ .../extract_with_extra_opts.json | 19 +++ .../remote_archive_missing.json | 19 +++ .../files.unarchive/skip_creates_exists.json | 15 +++ .../files.unarchive/unsupported_format.json | 16 +++ 10 files changed, 277 insertions(+) create mode 100644 tests/operations/files.unarchive/dest_not_directory.json create mode 100644 tests/operations/files.unarchive/extract_local_tar_gz.json create mode 100644 tests/operations/files.unarchive/extract_remote_tar_gz.json create mode 100644 tests/operations/files.unarchive/extract_remote_zip.json create mode 100644 tests/operations/files.unarchive/extract_with_chown.json create mode 100644 tests/operations/files.unarchive/extract_with_extra_opts.json create mode 100644 tests/operations/files.unarchive/remote_archive_missing.json create mode 100644 tests/operations/files.unarchive/skip_creates_exists.json create mode 100644 tests/operations/files.unarchive/unsupported_format.json diff --git a/src/pyinfra/operations/files.py b/src/pyinfra/operations/files.py index b252a28ad..d7588da5d 100644 --- a/src/pyinfra/operations/files.py +++ b/src/pyinfra/operations/files.py @@ -2076,3 +2076,122 @@ def block( else: cmd = StringCommand(f"awk '/{mark_1}/,/{mark_2}/ {{next}} 1'") yield StringCommand(out_prep, cmd, q_path, "> $OUT", real_out) + + +_ARCHIVE_FORMATS = { + ".tar": "tar xf", + ".tar.gz": "tar xzf", + ".tgz": "tar xzf", + ".tar.bz2": "tar xjf", + ".tbz2": "tar xjf", + ".tar.xz": "tar xJf", + ".txz": "tar xJf", + ".tar.zst": "tar --zstd -xf", + ".zip": "unzip -o", +} + + +def _get_archive_command(src: str) -> str | None: + lower = src.lower() + for ext, cmd in _ARCHIVE_FORMATS.items(): + if lower.endswith(ext): + return cmd + return None + + +@operation() +def unarchive( + src: str, + dest: str, + remote_src: bool = False, + creates: str | None = None, + extra_opts: list[str] | None = None, + user: str | None = None, + group: str | None = None, +): + """ + Extract archive files on the remote system. + + + src: path to the archive file (local or remote depending on ``remote_src``) + + dest: remote directory to extract into (must exist) + + remote_src: set to ``True`` if the archive is already on the remote system + + creates: if this path already exists, the operation is skipped (idempotency) + + extra_opts: list of additional arguments to pass to the extract command + + user: user to own the extracted files + + group: group to own the extracted files + + Supported formats: + ``.tar``, ``.tar.gz``/``.tgz``, ``.tar.bz2``/``.tbz2``, + ``.tar.xz``/``.txz``, ``.tar.zst``, ``.zip`` + + **Examples:** + + .. code:: python + + # Extract a remote archive + files.unarchive( + name="Extract app tarball", + src="/tmp/app.tar.gz", + dest="/opt/app", + remote_src=True, + ) + + # Upload and extract a local archive + files.unarchive( + name="Deploy release", + src="releases/app-v1.0.tar.gz", + dest="/opt/app", + creates="/opt/app/bin/start", + ) + """ + + # Idempotency: skip if creates path already exists + if creates: + if host.get_fact(File, path=creates) is not None: + host.noop("archive already extracted ({0} exists)".format(creates)) + return + + # Validate destination exists and is a directory + dest_info = host.get_fact(Directory, path=dest) + if not dest_info: + raise OperationError("Destination {0} is not an existing directory".format(dest)) + + extract_cmd = _get_archive_command(src) + if extract_cmd is None: + raise OperationValueError( + "Unsupported archive format for {0}. " + "Supported: {1}".format(src, ", ".join(_ARCHIVE_FORMATS.keys())) + ) + + if not remote_src: + # Upload the local archive to a temp location on the remote + temp_archive = host.get_temp_filename(src) + yield FileUploadCommand(src, temp_archive) + archive_path = temp_archive + else: + # Validate the remote archive exists + if host.get_fact(File, path=src) is None: + raise OperationError("Remote archive {0} does not exist".format(src)) + archive_path = src + + # Build extract command + cmd_parts = extract_cmd.split() + if extra_opts: + cmd_parts.extend(extra_opts) + + if extract_cmd.startswith("tar"): + cmd_parts.extend([QuoteString(archive_path), "-C", QuoteString(dest)]) + else: + # unzip: unzip -o -d + cmd_parts.extend([QuoteString(archive_path), "-d", QuoteString(dest)]) + + yield StringCommand(*cmd_parts) + + # Clean up uploaded temp file + if not remote_src: + yield StringCommand("rm", "-f", QuoteString(temp_archive)) + + # Set ownership if requested + if user or group: + ownership = "{0}:{1}".format(user or "", group or "") + yield StringCommand("chown", "-R", ownership, QuoteString(dest)) diff --git a/tests/operations/files.unarchive/dest_not_directory.json b/tests/operations/files.unarchive/dest_not_directory.json new file mode 100644 index 000000000..747e6809b --- /dev/null +++ b/tests/operations/files.unarchive/dest_not_directory.json @@ -0,0 +1,16 @@ +{ + "kwargs": { + "src": "/tmp/app.tar.gz", + "dest": "/opt/missing", + "remote_src": true + }, + "facts": { + "files.Directory": { + "path=/opt/missing": null + } + }, + "exception": { + "name": "OperationError", + "message": "Destination /opt/missing is not an existing directory" + } +} diff --git a/tests/operations/files.unarchive/extract_local_tar_gz.json b/tests/operations/files.unarchive/extract_local_tar_gz.json new file mode 100644 index 000000000..b3b68a9c7 --- /dev/null +++ b/tests/operations/files.unarchive/extract_local_tar_gz.json @@ -0,0 +1,16 @@ +{ + "kwargs": { + "src": "releases/app.tar.gz", + "dest": "/opt/app" + }, + "facts": { + "files.Directory": { + "path=/opt/app": true + } + }, + "commands": [ + ["upload", "releases/app.tar.gz", "_tempfile_"], + "tar xzf _tempfile_ -C /opt/app", + "rm -f _tempfile_" + ] +} diff --git a/tests/operations/files.unarchive/extract_remote_tar_gz.json b/tests/operations/files.unarchive/extract_remote_tar_gz.json new file mode 100644 index 000000000..fa720220e --- /dev/null +++ b/tests/operations/files.unarchive/extract_remote_tar_gz.json @@ -0,0 +1,18 @@ +{ + "kwargs": { + "src": "/tmp/app.tar.gz", + "dest": "/opt/app", + "remote_src": true + }, + "facts": { + "files.Directory": { + "path=/opt/app": true + }, + "files.File": { + "path=/tmp/app.tar.gz": true + } + }, + "commands": [ + "tar xzf /tmp/app.tar.gz -C /opt/app" + ] +} diff --git a/tests/operations/files.unarchive/extract_remote_zip.json b/tests/operations/files.unarchive/extract_remote_zip.json new file mode 100644 index 000000000..b4664e765 --- /dev/null +++ b/tests/operations/files.unarchive/extract_remote_zip.json @@ -0,0 +1,18 @@ +{ + "kwargs": { + "src": "/tmp/app.zip", + "dest": "/opt/app", + "remote_src": true + }, + "facts": { + "files.Directory": { + "path=/opt/app": true + }, + "files.File": { + "path=/tmp/app.zip": true + } + }, + "commands": [ + "unzip -o /tmp/app.zip -d /opt/app" + ] +} diff --git a/tests/operations/files.unarchive/extract_with_chown.json b/tests/operations/files.unarchive/extract_with_chown.json new file mode 100644 index 000000000..2518a0a7b --- /dev/null +++ b/tests/operations/files.unarchive/extract_with_chown.json @@ -0,0 +1,21 @@ +{ + "kwargs": { + "src": "/tmp/app.tar.gz", + "dest": "/opt/app", + "remote_src": true, + "user": "www-data", + "group": "www-data" + }, + "facts": { + "files.Directory": { + "path=/opt/app": true + }, + "files.File": { + "path=/tmp/app.tar.gz": true + } + }, + "commands": [ + "tar xzf /tmp/app.tar.gz -C /opt/app", + "chown -R www-data:www-data /opt/app" + ] +} diff --git a/tests/operations/files.unarchive/extract_with_extra_opts.json b/tests/operations/files.unarchive/extract_with_extra_opts.json new file mode 100644 index 000000000..ed4909ebd --- /dev/null +++ b/tests/operations/files.unarchive/extract_with_extra_opts.json @@ -0,0 +1,19 @@ +{ + "kwargs": { + "src": "/tmp/app.tar.gz", + "dest": "/opt/app", + "remote_src": true, + "extra_opts": ["--strip-components=1"] + }, + "facts": { + "files.Directory": { + "path=/opt/app": true + }, + "files.File": { + "path=/tmp/app.tar.gz": true + } + }, + "commands": [ + "tar xzf --strip-components=1 /tmp/app.tar.gz -C /opt/app" + ] +} diff --git a/tests/operations/files.unarchive/remote_archive_missing.json b/tests/operations/files.unarchive/remote_archive_missing.json new file mode 100644 index 000000000..468bdbec2 --- /dev/null +++ b/tests/operations/files.unarchive/remote_archive_missing.json @@ -0,0 +1,19 @@ +{ + "kwargs": { + "src": "/tmp/missing.tar.gz", + "dest": "/opt/app", + "remote_src": true + }, + "facts": { + "files.Directory": { + "path=/opt/app": true + }, + "files.File": { + "path=/tmp/missing.tar.gz": null + } + }, + "exception": { + "name": "OperationError", + "message": "Remote archive /tmp/missing.tar.gz does not exist" + } +} diff --git a/tests/operations/files.unarchive/skip_creates_exists.json b/tests/operations/files.unarchive/skip_creates_exists.json new file mode 100644 index 000000000..2c1641d62 --- /dev/null +++ b/tests/operations/files.unarchive/skip_creates_exists.json @@ -0,0 +1,15 @@ +{ + "kwargs": { + "src": "/tmp/app.tar.gz", + "dest": "/opt/app", + "remote_src": true, + "creates": "/opt/app/bin/start" + }, + "facts": { + "files.File": { + "path=/opt/app/bin/start": true + } + }, + "commands": [], + "noop_description": "archive already extracted (/opt/app/bin/start exists)" +} diff --git a/tests/operations/files.unarchive/unsupported_format.json b/tests/operations/files.unarchive/unsupported_format.json new file mode 100644 index 000000000..d432bb880 --- /dev/null +++ b/tests/operations/files.unarchive/unsupported_format.json @@ -0,0 +1,16 @@ +{ + "kwargs": { + "src": "/tmp/app.rar", + "dest": "/opt/app", + "remote_src": true + }, + "facts": { + "files.Directory": { + "path=/opt/app": true + } + }, + "exception": { + "name": "OperationValueError", + "message": "Unsupported archive format for /tmp/app.rar. Supported: .tar, .tar.gz, .tgz, .tar.bz2, .tbz2, .tar.xz, .txz, .tar.zst, .zip" + } +} From 8fd11610dd3af24579b65d51890ea33a1f92092c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Tosser?= Date: Tue, 24 Mar 2026 15:21:14 +0100 Subject: [PATCH 2/4] style: apply linter formatting --- src/pyinfra/operations/files.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pyinfra/operations/files.py b/src/pyinfra/operations/files.py index d7588da5d..0f5088491 100644 --- a/src/pyinfra/operations/files.py +++ b/src/pyinfra/operations/files.py @@ -2159,8 +2159,9 @@ def unarchive( extract_cmd = _get_archive_command(src) if extract_cmd is None: raise OperationValueError( - "Unsupported archive format for {0}. " - "Supported: {1}".format(src, ", ".join(_ARCHIVE_FORMATS.keys())) + "Unsupported archive format for {0}. Supported: {1}".format( + src, ", ".join(_ARCHIVE_FORMATS.keys()) + ) ) if not remote_src: From f7e0888bfaa5096b81655d9e3a260edb696ff1b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Tosser?= Date: Tue, 24 Mar 2026 15:24:54 +0100 Subject: [PATCH 3/4] fix: resolve mypy errors in files.unarchive Pass QuoteString args directly to StringCommand instead of extending a list[str] with them. --- src/pyinfra/operations/files.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/pyinfra/operations/files.py b/src/pyinfra/operations/files.py index 0f5088491..c13ee5f58 100644 --- a/src/pyinfra/operations/files.py +++ b/src/pyinfra/operations/files.py @@ -2176,17 +2176,15 @@ def unarchive( archive_path = src # Build extract command - cmd_parts = extract_cmd.split() + cmd_parts: list[str] = extract_cmd.split() if extra_opts: cmd_parts.extend(extra_opts) if extract_cmd.startswith("tar"): - cmd_parts.extend([QuoteString(archive_path), "-C", QuoteString(dest)]) + yield StringCommand(*cmd_parts, QuoteString(archive_path), "-C", QuoteString(dest)) else: # unzip: unzip -o -d - cmd_parts.extend([QuoteString(archive_path), "-d", QuoteString(dest)]) - - yield StringCommand(*cmd_parts) + yield StringCommand(*cmd_parts, QuoteString(archive_path), "-d", QuoteString(dest)) # Clean up uploaded temp file if not remote_src: From 0774e051eaff5f3f71c974a6b06d20c592e4714c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Tosser?= Date: Tue, 21 Apr 2026 07:46:26 +0200 Subject: [PATCH 4/4] fix(operations.files.unarchive): keep tar -f adjacent to archive and use file_utils.chown - Split -f from the tar format flags so extra_opts can no longer be mistaken for the archive path. Command shape is now 'tar -f -C '. - Delegate ownership to file_utils.chown instead of a hand-rolled StringCommand, matching the rest of the files operations. - Add fixtures for .tar.zst (multi-flag format) and unzip with extra_opts. --- src/pyinfra/operations/files.py | 73 ++++++++++++------- .../files.unarchive/extract_local_tar_gz.json | 2 +- .../extract_remote_tar_gz.json | 2 +- .../extract_remote_tar_zst.json | 18 +++++ .../files.unarchive/extract_with_chown.json | 2 +- .../extract_with_extra_opts.json | 2 +- .../extract_zip_with_extra_opts.json | 19 +++++ 7 files changed, 87 insertions(+), 31 deletions(-) create mode 100644 tests/operations/files.unarchive/extract_remote_tar_zst.json create mode 100644 tests/operations/files.unarchive/extract_zip_with_extra_opts.json diff --git a/src/pyinfra/operations/files.py b/src/pyinfra/operations/files.py index c13ee5f58..c37f64e67 100644 --- a/src/pyinfra/operations/files.py +++ b/src/pyinfra/operations/files.py @@ -2078,24 +2078,28 @@ def block( yield StringCommand(out_prep, cmd, q_path, "> $OUT", real_out) -_ARCHIVE_FORMATS = { - ".tar": "tar xf", - ".tar.gz": "tar xzf", - ".tgz": "tar xzf", - ".tar.bz2": "tar xjf", - ".tbz2": "tar xjf", - ".tar.xz": "tar xJf", - ".txz": "tar xJf", - ".tar.zst": "tar --zstd -xf", - ".zip": "unzip -o", +_TAR_FORMATS = { + ".tar": ["-x"], + ".tar.gz": ["-xz"], + ".tgz": ["-xz"], + ".tar.bz2": ["-xj"], + ".tbz2": ["-xj"], + ".tar.xz": ["-xJ"], + ".txz": ["-xJ"], + ".tar.zst": ["-x", "--zstd"], } +_ZIP_FORMATS = (".zip",) +_ARCHIVE_EXTENSIONS = tuple(_TAR_FORMATS.keys()) + _ZIP_FORMATS -def _get_archive_command(src: str) -> str | None: +def _get_archive_format(src: str) -> tuple[str, list[str]] | None: lower = src.lower() - for ext, cmd in _ARCHIVE_FORMATS.items(): + for ext, flags in _TAR_FORMATS.items(): if lower.endswith(ext): - return cmd + return "tar", flags + for ext in _ZIP_FORMATS: + if lower.endswith(ext): + return "unzip", ["-o"] return None @@ -2156,14 +2160,16 @@ def unarchive( if not dest_info: raise OperationError("Destination {0} is not an existing directory".format(dest)) - extract_cmd = _get_archive_command(src) - if extract_cmd is None: + archive_format = _get_archive_format(src) + if archive_format is None: raise OperationValueError( "Unsupported archive format for {0}. Supported: {1}".format( - src, ", ".join(_ARCHIVE_FORMATS.keys()) + src, ", ".join(_ARCHIVE_EXTENSIONS) ) ) + tool, flags = archive_format + if not remote_src: # Upload the local archive to a temp location on the remote temp_archive = host.get_temp_filename(src) @@ -2175,16 +2181,30 @@ def unarchive( raise OperationError("Remote archive {0} does not exist".format(src)) archive_path = src - # Build extract command - cmd_parts: list[str] = extract_cmd.split() - if extra_opts: - cmd_parts.extend(extra_opts) - - if extract_cmd.startswith("tar"): - yield StringCommand(*cmd_parts, QuoteString(archive_path), "-C", QuoteString(dest)) + extras = list(extra_opts) if extra_opts else [] + + if tool == "tar": + # tar -f -C + # Keep -f adjacent to the archive path so extras never get mistaken for it. + yield StringCommand( + tool, + *flags, + *extras, + "-f", + QuoteString(archive_path), + "-C", + QuoteString(dest), + ) else: - # unzip: unzip -o -d - yield StringCommand(*cmd_parts, QuoteString(archive_path), "-d", QuoteString(dest)) + # unzip -d + yield StringCommand( + tool, + *flags, + *extras, + QuoteString(archive_path), + "-d", + QuoteString(dest), + ) # Clean up uploaded temp file if not remote_src: @@ -2192,5 +2212,4 @@ def unarchive( # Set ownership if requested if user or group: - ownership = "{0}:{1}".format(user or "", group or "") - yield StringCommand("chown", "-R", ownership, QuoteString(dest)) + yield file_utils.chown(dest, user, group, recursive=True) diff --git a/tests/operations/files.unarchive/extract_local_tar_gz.json b/tests/operations/files.unarchive/extract_local_tar_gz.json index b3b68a9c7..9a1812495 100644 --- a/tests/operations/files.unarchive/extract_local_tar_gz.json +++ b/tests/operations/files.unarchive/extract_local_tar_gz.json @@ -10,7 +10,7 @@ }, "commands": [ ["upload", "releases/app.tar.gz", "_tempfile_"], - "tar xzf _tempfile_ -C /opt/app", + "tar -xz -f _tempfile_ -C /opt/app", "rm -f _tempfile_" ] } diff --git a/tests/operations/files.unarchive/extract_remote_tar_gz.json b/tests/operations/files.unarchive/extract_remote_tar_gz.json index fa720220e..54a039c87 100644 --- a/tests/operations/files.unarchive/extract_remote_tar_gz.json +++ b/tests/operations/files.unarchive/extract_remote_tar_gz.json @@ -13,6 +13,6 @@ } }, "commands": [ - "tar xzf /tmp/app.tar.gz -C /opt/app" + "tar -xz -f /tmp/app.tar.gz -C /opt/app" ] } diff --git a/tests/operations/files.unarchive/extract_remote_tar_zst.json b/tests/operations/files.unarchive/extract_remote_tar_zst.json new file mode 100644 index 000000000..06d2c65b4 --- /dev/null +++ b/tests/operations/files.unarchive/extract_remote_tar_zst.json @@ -0,0 +1,18 @@ +{ + "kwargs": { + "src": "/tmp/app.tar.zst", + "dest": "/opt/app", + "remote_src": true + }, + "facts": { + "files.Directory": { + "path=/opt/app": true + }, + "files.File": { + "path=/tmp/app.tar.zst": true + } + }, + "commands": [ + "tar -x --zstd -f /tmp/app.tar.zst -C /opt/app" + ] +} diff --git a/tests/operations/files.unarchive/extract_with_chown.json b/tests/operations/files.unarchive/extract_with_chown.json index 2518a0a7b..880de42ec 100644 --- a/tests/operations/files.unarchive/extract_with_chown.json +++ b/tests/operations/files.unarchive/extract_with_chown.json @@ -15,7 +15,7 @@ } }, "commands": [ - "tar xzf /tmp/app.tar.gz -C /opt/app", + "tar -xz -f /tmp/app.tar.gz -C /opt/app", "chown -R www-data:www-data /opt/app" ] } diff --git a/tests/operations/files.unarchive/extract_with_extra_opts.json b/tests/operations/files.unarchive/extract_with_extra_opts.json index ed4909ebd..e8a6314b2 100644 --- a/tests/operations/files.unarchive/extract_with_extra_opts.json +++ b/tests/operations/files.unarchive/extract_with_extra_opts.json @@ -14,6 +14,6 @@ } }, "commands": [ - "tar xzf --strip-components=1 /tmp/app.tar.gz -C /opt/app" + "tar -xz --strip-components=1 -f /tmp/app.tar.gz -C /opt/app" ] } diff --git a/tests/operations/files.unarchive/extract_zip_with_extra_opts.json b/tests/operations/files.unarchive/extract_zip_with_extra_opts.json new file mode 100644 index 000000000..9e0c8e1c9 --- /dev/null +++ b/tests/operations/files.unarchive/extract_zip_with_extra_opts.json @@ -0,0 +1,19 @@ +{ + "kwargs": { + "src": "/tmp/app.zip", + "dest": "/opt/app", + "remote_src": true, + "extra_opts": ["-q"] + }, + "facts": { + "files.Directory": { + "path=/opt/app": true + }, + "files.File": { + "path=/tmp/app.zip": true + } + }, + "commands": [ + "unzip -o -q /tmp/app.zip -d /opt/app" + ] +}