diff --git a/.github/workflows/sync.yml b/.github/workflows/sync.yml index 614facf..c99cbe7 100644 --- a/.github/workflows/sync.yml +++ b/.github/workflows/sync.yml @@ -132,6 +132,10 @@ jobs: - name: Build conflict resolution agent image run: docker build -f .repo-sync/docker/conflict-resolution/Dockerfile -t repo-sync-conflict-resolution .repo-sync + - name: Configure Git LFS + run: | + git lfs install --local + git -C peer lfs install --local - name: Check if SSH key is provided id: check-ssh-key diff --git a/README.md b/README.md index 2747f95..bfc00a4 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,15 @@ jobs: - uses: warpdotdev/repo-sync/actions/validate-markers@main ``` -this validates that all `!repo-sync` markers are properly paired, not nested, and that no symlinks exist in the repo. +this validates that all `!repo-sync` markers are properly paired, not nested, that no symlinks exist in the repo, and that text Git LFS payloads do not contain repo-sync markers. + +to validate only selected paths, pass `paths` as a JSON array so file names containing spaces or shell metacharacters are preserved exactly: + +```yaml +- uses: warpdotdev/repo-sync/actions/validate-markers@main + with: + paths: '["src/file with spaces.txt", "assets/*.bin"]' +``` ### step 3: add the sync workflow (both repos) diff --git a/actions/validate-markers/action.yml b/actions/validate-markers/action.yml index 76996cd..76ec395 100644 --- a/actions/validate-markers/action.yml +++ b/actions/validate-markers/action.yml @@ -1,12 +1,13 @@ name: "Validate repo-sync markers" description: > Validates that all !repo-sync markers are properly paired and non-nested, - and that no symlinks target private/ directories or escape the repository root. + that no symlinks target private/ directories or escape the repository root, + and that text Git LFS payloads do not contain repo-sync markers. inputs: paths: description: > - Space-separated list of relative file paths or globs to validate. + JSON array of relative file paths or globs to validate. Defaults to all files in the repository. required: false default: "" @@ -23,15 +24,17 @@ runs: shell: bash run: pip install "${{ github.action_path }}/../../" + - name: Configure Git LFS + shell: bash + run: git lfs install --local + - name: Run marker validation shell: bash env: PATHS_INPUT: ${{ inputs.paths }} run: | - args=("--validate-only" "${{ github.workspace }}") + args=("--validate-only" "--validate-lfs-payloads" "${{ github.workspace }}") if [ -n "$PATHS_INPUT" ]; then - for p in $PATHS_INPUT; do - args+=("$p") - done + args+=("--paths-json" "$PATHS_INPUT") fi repo-sync-strip "${args[@]}" diff --git a/src/repo_sync/stack/git_ops.py b/src/repo_sync/stack/git_ops.py index c9a8707..b793193 100644 --- a/src/repo_sync/stack/git_ops.py +++ b/src/repo_sync/stack/git_ops.py @@ -4,9 +4,12 @@ import os import subprocess +import tempfile from dataclasses import dataclass +from pathlib import Path from repo_sync.errors import VerboseCalledProcessError +from repo_sync.stack.lfs import parse_lfs_pointer_file @dataclass @@ -51,7 +54,6 @@ def _run(self, args: list[str], check: bool = True) -> CommandResult: stderr=result.stderr.strip(), ) - def rev_parse(self, ref: str) -> str: """Resolve a ref to a full SHA.""" return self._run(["rev-parse", ref]).stdout @@ -322,6 +324,14 @@ def remote_add_or_update(self, name: str, url: str) -> None: else: self._run(["remote", "add", name, url]) + def remote_remove(self, name: str) -> None: + """Remove a remote if it exists.""" + self._run(["remote", "remove", name], check=False) + + def remote_url(self, name: str) -> str: + """Return the configured URL for a remote.""" + return self._run(["remote", "get-url", name]).stdout + def log_shas(self, ref: str = "HEAD") -> list[str]: """Return all commit SHAs reachable from ref (newest first).""" result = self._run(["log", "--format=%H", ref]) @@ -352,3 +362,171 @@ def diff_patch(self, ref_a: str, ref_b: str) -> str: result.stderr, ) return result.stdout.decode("utf-8", errors="replace").strip() + + def diff_name_only(self, ref_a: str, ref_b: str) -> list[str]: + """Return changed paths between two refs.""" + env = {**os.environ, **self._env_additions} if self._env_additions else None + result = subprocess.run( + ["git", "diff", "--name-only", "-z", ref_a, ref_b], + cwd=self.repo_dir, + capture_output=True, + env=env, + ) + if result.returncode != 0: + raise VerboseCalledProcessError( + result.returncode, + ["git", "diff", "--name-only", "-z", ref_a, ref_b], + result.stdout, + result.stderr, + ) + if not result.stdout: + return [] + return [ + path + for path in result.stdout.decode("utf-8", errors="surrogateescape").split("\0") + if path + ] + + def lfs_tracked_paths( + self, + paths: list[str], + source_ref: str | None = None, + ) -> set[str]: + """Return paths that are configured with the Git LFS filter.""" + if not paths: + return set() + + args = ["check-attr", "-z"] + if source_ref is not None: + args.extend(["--source", source_ref]) + args.extend(["--stdin", "filter"]) + + env = {**os.environ, **self._env_additions} if self._env_additions else None + input_text = "".join(f"{path}\0" for path in paths) + result = subprocess.run( + ["git", *args], + cwd=self.repo_dir, + input=input_text, + capture_output=True, + text=True, + env=env, + ) + if result.returncode != 0: + raise VerboseCalledProcessError( + result.returncode, + ["git", *args], + result.stdout, + result.stderr, + ) + + tracked_paths: set[str] = set() + fields = [field for field in result.stdout.split("\0") if field] + for index in range(0, len(fields), 3): + if index + 2 >= len(fields): + break + path, attr, value = fields[index:index + 3] + if attr == "filter" and value == "lfs": + tracked_paths.add(path) + return tracked_paths + + def lfs_fetch_paths( + self, + ref: str, + paths: list[str], + expected_oids: dict[str, str] | None = None, + ) -> None: + """Fetch LFS objects for exact paths at a ref.""" + for path in paths: + with tempfile.NamedTemporaryFile() as output: + self.lfs_write_path( + ref, + path, + output.name, + expected_oid=(expected_oids or {}).get(path), + ) + + def lfs_write_path( + self, + ref: str, + path: str, + output_path: str, + expected_oid: str | None = None, + ) -> None: + """Write the LFS-smudged content for an exact path at a ref.""" + env = { + **os.environ, + **self._env_additions, + "GIT_ATTR_SOURCE": ref, + } + command = ["git", "cat-file", "--filters", f"{ref}:{path}"] + with open(output_path, "wb") as output: + result = subprocess.run( + command, + cwd=self.repo_dir, + stdout=output, + stderr=subprocess.PIPE, + env=env, + ) + if result.returncode != 0: + raise VerboseCalledProcessError( + result.returncode, + command, + b"", + result.stderr, + ) + pointer = parse_lfs_pointer_file(output_path, path) + if pointer is not None and ( + expected_oid is None or pointer.oid == expected_oid + ): + raise VerboseCalledProcessError( + 1, + command, + b"", + ( + "git cat-file --filters returned an LFS pointer instead " + "of payload bytes. Ensure Git LFS filters are configured " + "with `git lfs install --local`." + ), + ) + + def lfs_missing_oids(self, oids: list[str]) -> list[str]: + """Return LFS object IDs that are not present in the local LFS store.""" + if not oids: + return [] + + git_common_dir = self._run(["rev-parse", "--git-common-dir"]).stdout + git_common_path = Path(git_common_dir) + if not git_common_path.is_absolute(): + git_common_path = Path(self.repo_dir) / git_common_path + + missing: list[str] = [] + for oid in oids: + object_path = ( + git_common_path / "lfs" / "objects" / oid[:2] / oid[2:4] / oid + ) + if not object_path.is_file(): + missing.append(oid) + return missing + + def lfs_push_oids(self, remote: str, oids: list[str]) -> None: + """Push specific LFS object IDs to a remote.""" + if not oids: + return + + env = {**os.environ, **self._env_additions} if self._env_additions else None + input_text = "".join(f"{oid}\n" for oid in oids) + result = subprocess.run( + ["git", "lfs", "push", "--object-id", remote, "--stdin"], + cwd=self.repo_dir, + input=input_text, + capture_output=True, + text=True, + env=env, + ) + if result.returncode != 0: + raise VerboseCalledProcessError( + result.returncode, + ["git", "lfs", "push", "--object-id", remote, "--stdin"], + result.stdout, + result.stderr, + ) diff --git a/src/repo_sync/stack/lfs.py b/src/repo_sync/stack/lfs.py new file mode 100644 index 0000000..7f3b01b --- /dev/null +++ b/src/repo_sync/stack/lfs.py @@ -0,0 +1,136 @@ +"""Helpers for detecting Git LFS pointer files in synced trees.""" + +from __future__ import annotations + +import os +import re +import stat +from dataclasses import dataclass +from pathlib import Path +from typing import Iterable + + +_POINTER_VERSION = "version https://git-lfs.github.com/spec/v1" +_OID_RE = re.compile(r"^oid sha256:([0-9a-f]{64})$") +_SIZE_RE = re.compile(r"^size ([0-9]+)$") +_MAX_POINTER_BYTES = 4096 + + +@dataclass(frozen=True) +class LfsPointer: + """A Git LFS pointer found in a synced tree.""" + + path: str + oid: str + size: int + + +def parse_lfs_pointer(data: bytes, path: str) -> LfsPointer | None: + """Parse a Git LFS pointer file, returning None for ordinary files.""" + if len(data) > _MAX_POINTER_BYTES: + return None + + try: + text = data.decode("utf-8") + except UnicodeDecodeError: + return None + + lines = text.splitlines() + if not lines or lines[0] != _POINTER_VERSION: + return None + + oid: str | None = None + size: int | None = None + for line in lines[1:]: + oid_match = _OID_RE.match(line) + if oid_match: + oid = oid_match.group(1) + continue + + size_match = _SIZE_RE.match(line) + if size_match: + size = int(size_match.group(1)) + + if oid is None or size is None: + return None + + return LfsPointer(path=path, oid=oid, size=size) + + +def parse_lfs_pointer_file(file_path: str | os.PathLike[str], path: str) -> LfsPointer | None: + """Parse a Git LFS pointer file without reading large payloads into memory.""" + with open(file_path, "rb") as file: + data = file.read(_MAX_POINTER_BYTES + 1) + return parse_lfs_pointer(data, path) + + +def collect_lfs_pointers( + root: str, + paths: Iterable[str] | None = None, + fail_on_read_error: bool | None = None, +) -> list[LfsPointer]: + """Collect LFS pointers under root, optionally restricted to relative paths.""" + candidates = _candidate_paths(root, paths) + pointers: list[LfsPointer] = [] + fail_on_read_error = ( + paths is not None + if fail_on_read_error is None + else fail_on_read_error + ) + + for relpath, fullpath in candidates: + try: + path_stat = os.lstat(fullpath) + except FileNotFoundError: + continue + except OSError: + if fail_on_read_error: + raise + continue + + if stat.S_ISLNK(path_stat.st_mode) or not stat.S_ISREG(path_stat.st_mode): + continue + + try: + if path_stat.st_size > _MAX_POINTER_BYTES: + continue + data = Path(fullpath).read_bytes() + except OSError: + if fail_on_read_error: + raise + continue + + pointer = parse_lfs_pointer(data, relpath) + if pointer is not None: + pointers.append(pointer) + + return pointers + + +def _candidate_paths( + root: str, + paths: Iterable[str] | None, +) -> list[tuple[str, str]]: + """Return normalized relative and absolute paths to inspect.""" + root_path = Path(root) + if paths is not None: + result: list[tuple[str, str]] = [] + for path in paths: + relpath = os.path.normpath(path) + if os.path.isabs(relpath) or relpath == ".." or relpath.startswith("../"): + continue + result.append((relpath, str(root_path / relpath))) + return result + + result = [] + for dirpath, dirnames, filenames in os.walk(root, followlinks=False): + dirnames[:] = [ + name + for name in dirnames + if not os.path.islink(os.path.join(dirpath, name)) + ] + for filename in filenames: + fullpath = os.path.join(dirpath, filename) + relpath = os.path.relpath(fullpath, root) + result.append((relpath, fullpath)) + return result diff --git a/src/repo_sync/strip/cli.py b/src/repo_sync/strip/cli.py index 2da4e07..ec32497 100644 --- a/src/repo_sync/strip/cli.py +++ b/src/repo_sync/strip/cli.py @@ -12,8 +12,10 @@ from __future__ import annotations import argparse +import json import sys +from repo_sync.strip.lfs import validate_lfs_payloads from repo_sync.strip.tree import StrippingError, strip_tree @@ -35,19 +37,58 @@ def main(argv: list[str] | None = None) -> int: "(only meaningful with --validate-only)." ), ) + parser.add_argument( + "--paths-json", + default="", + help=( + "JSON array of relative file paths to validate " + "(only meaningful with --validate-only)." + ), + ) parser.add_argument( "--validate-only", action="store_true", default=False, help="Only validate markers; do not modify files.", ) + parser.add_argument( + "--validate-lfs-payloads", + action="store_true", + default=False, + help="Validate that Git LFS payloads do not contain repo-sync markers.", + ) args = parser.parse_args(argv) - paths = args.paths if args.paths else None + if args.paths_json: + try: + paths_json = json.loads(args.paths_json) + except json.JSONDecodeError as exc: + print(f"--paths-json must be a JSON array: {exc}", file=sys.stderr) + return 1 + if not isinstance(paths_json, list) or not all( + isinstance(path, str) for path in paths_json + ): + print("--paths-json must be a JSON array of strings", file=sys.stderr) + return 1 + if args.paths: + print( + "--paths-json cannot be combined with positional paths", + file=sys.stderr, + ) + return 1 + paths = paths_json if paths_json else None + else: + paths = args.paths if args.paths else None if args.validate_only: result = strip_tree(args.directory, validate_only=True, paths=paths) + if args.validate_lfs_payloads: + lfs_result = validate_lfs_payloads(args.directory, paths=paths) + result = result._replace( + errors=[*result.errors, *lfs_result.errors], + warnings=[*result.warnings, *lfs_result.warnings], + ) for w in result.warnings: print(f"warning: {w}", file=sys.stderr) if result.errors: @@ -55,6 +96,12 @@ def main(argv: list[str] | None = None) -> int: print(err, file=sys.stderr) return 1 return 0 + if args.validate_lfs_payloads: + print( + "--validate-lfs-payloads requires --validate-only", + file=sys.stderr, + ) + return 1 try: result = strip_tree(args.directory) diff --git a/src/repo_sync/strip/lfs.py b/src/repo_sync/strip/lfs.py new file mode 100644 index 0000000..d387c97 --- /dev/null +++ b/src/repo_sync/strip/lfs.py @@ -0,0 +1,194 @@ +"""Git LFS payload validation for repo-sync markers.""" + +from __future__ import annotations + +import json +import os +from pathlib import Path, PurePosixPath +import subprocess +import tempfile + +from repo_sync.stack.lfs import parse_lfs_pointer_file +from repo_sync.strip.detect import is_binary +from repo_sync.strip.markers import ( + MarkerError, + has_private_file_marker, + strip_private_regions, + validate_markers, +) +from repo_sync.strip.tree import StripResult, _expand_paths + + +def validate_lfs_payload_file(payload_path: str, *, filepath: str) -> list[str]: + """Return errors if an LFS payload contains repo-sync private markers.""" + if is_binary(payload_path): + return [] + + try: + text = Path(payload_path).read_bytes().decode("utf-8") + except UnicodeDecodeError: + return [] + + lines = text.splitlines(keepends=True) + errors = validate_markers(lines, filepath=filepath) + if errors: + return [ + f"{error} (Git LFS payloads cannot contain repo-sync private markers)" + for error in errors + ] + + if has_private_file_marker(lines): + return [ + f"{filepath}: Git LFS payload contains a private-file marker; " + "LFS payloads cannot be stripped during repo sync" + ] + + try: + stripped = strip_private_regions(lines, filepath=filepath) + except MarkerError as exc: + return [str(exc)] + + if stripped != lines: + return [ + f"{filepath}: Git LFS payload contains private region markers; " + "LFS payloads cannot be stripped during repo sync" + ] + + return [] + + +def validate_lfs_payloads( + root: str, + *, + paths: list[str] | None = None, + ref: str = "HEAD", +) -> StripResult: + """Validate repo-sync marker invariants for Git LFS payloads in a repo.""" + errors: list[str] = [] + warnings: list[str] = [] + try: + lfs_oids_by_path = _git_lfs_paths(root, ref) + except subprocess.CalledProcessError as exc: + stderr = ( + exc.stderr + if isinstance(exc.stderr, str) + else (exc.stderr or b"").decode("utf-8", errors="replace") + ) + return StripResult( + [f"failed to list Git LFS files: {stderr.strip() or exc}"], + warnings, + ) + + selected_paths = _select_lfs_paths(root, sorted(lfs_oids_by_path), paths) + if not selected_paths: + return StripResult(errors, warnings) + + with tempfile.TemporaryDirectory(prefix="repo-sync-lfs-validate-") as temp_dir: + for index, relpath in enumerate(selected_paths): + payload_path = os.path.join(temp_dir, f"payload-{index}") + try: + _write_lfs_payload( + root, + ref, + relpath, + payload_path, + expected_oid=lfs_oids_by_path.get(relpath), + ) + except subprocess.CalledProcessError as exc: + stderr = ( + exc.stderr + if isinstance(exc.stderr, str) + else (exc.stderr or b"").decode("utf-8", errors="replace") + ) + errors.append( + f"{relpath}: failed to materialize Git LFS payload: " + f"{stderr.strip() or exc}" + ) + continue + errors.extend( + validate_lfs_payload_file(payload_path, filepath=relpath) + ) + + return StripResult(errors, warnings) + + +def _git_lfs_paths(root: str, ref: str) -> dict[str, str | None]: + """Return Git LFS paths and object IDs present at a ref.""" + result = subprocess.run( + ["git", "lfs", "ls-files", "--json", ref], + cwd=root, + capture_output=True, + text=True, + check=True, + ) + data = json.loads(result.stdout or "{}") + files = data.get("files") or [] + paths: dict[str, str | None] = {} + for entry in files: + path = entry.get("name") or entry.get("path") + oid = entry.get("oid") + if path: + paths[path] = oid if isinstance(oid, str) else None + return paths + + +def _select_lfs_paths( + root: str, + lfs_paths: list[str], + paths: list[str] | None, +) -> list[str]: + """Return the LFS paths selected by the action's path filters.""" + if paths is None: + return lfs_paths + if any( + path == ".gitattributes" or path.endswith("/.gitattributes") + for path in paths + ): + return lfs_paths + + expanded = {os.path.relpath(path, root) for path in _expand_paths(root, paths)} + selected: list[str] = [] + for lfs_path in lfs_paths: + if lfs_path in expanded or _matches_any_pattern(lfs_path, paths): + selected.append(lfs_path) + return selected + + +def _matches_any_pattern(path: str, patterns: list[str]) -> bool: + """Return True if a repo-relative path matches any glob pattern.""" + pure_path = PurePosixPath(path) + return any(pure_path.match(pattern) for pattern in patterns) + + +def _write_lfs_payload( + root: str, + ref: str, + relpath: str, + output_path: str, + expected_oid: str | None = None, +) -> None: + """Write the LFS-smudged payload for a path to a temporary file.""" + command = ["git", "cat-file", "--filters", f"{ref}:{relpath}"] + env = {**os.environ, "GIT_ATTR_SOURCE": ref} + with open(output_path, "wb") as output: + subprocess.run( + command, + cwd=root, + stdout=output, + stderr=subprocess.PIPE, + env=env, + check=True, + ) + pointer = parse_lfs_pointer_file(output_path, relpath) + if pointer is not None and ( + expected_oid is None or pointer.oid == expected_oid + ): + raise subprocess.CalledProcessError( + 1, + command, + stderr=( + "git cat-file --filters returned an LFS pointer instead " + "of payload bytes. Ensure Git LFS filters are configured " + "with `git lfs install --local`." + ), + ) diff --git a/src/repo_sync/workflows/create_sync_prs.py b/src/repo_sync/workflows/create_sync_prs.py index 71448e0..ba45107 100644 --- a/src/repo_sync/workflows/create_sync_prs.py +++ b/src/repo_sync/workflows/create_sync_prs.py @@ -27,11 +27,13 @@ from repo_sync.stack.gh_ops import GhOps from repo_sync.stack.git_ops import GitOps +from repo_sync.stack.lfs import LfsPointer, collect_lfs_pointers from repo_sync.stack.trailers import ( SyncOrigin, append_trailer, format_conflict_trailer, ) +from repo_sync.strip.lfs import validate_lfs_payload_file from repo_sync.workflows.conflict import ( add_conflict_label, assign_conflict_reviewer, @@ -143,6 +145,108 @@ def _run_fixup_script(script_path: str, working_dir: str) -> bool: return False +def _mirror_lfs_objects( + source_git: GitOps, + peer_git: GitOps, + source_ref: str, + snapshot_dir: str, + changed_paths: list[str], + attributes_git: GitOps, + attributes_ref: str, + validate_payload_markers: bool = False, +) -> None: + """Mirror LFS objects referenced by changed pointer files in a snapshot.""" + scan_paths = None if _lfs_attributes_changed(changed_paths) else changed_paths + pointers = collect_lfs_pointers( + snapshot_dir, + scan_paths, + fail_on_read_error=True, + ) + if not pointers: + return + lfs_tracked_paths = attributes_git.lfs_tracked_paths( + sorted({pointer.path for pointer in pointers}), + source_ref=attributes_ref, + ) + pointers = [ + pointer + for pointer in pointers + if pointer.path in lfs_tracked_paths + ] + if not pointers: + return + + paths_by_oid: dict[str, list[str]] = {} + for pointer in pointers: + paths_by_oid.setdefault(pointer.oid, []).append(pointer.path) + + oids = sorted(paths_by_oid) + logger.info( + "Mirroring %d Git LFS object(s) referenced by %d changed pointer file(s).", + len(oids), len(pointers), + ) + for oid in oids: + logger.info( + "Mirroring Git LFS object %s for path(s): %s.", + oid, + ", ".join(sorted(paths_by_oid[oid])), + ) + + fetch_paths = sorted({pointer.path for pointer in pointers}) + expected_oids = {pointer.path: pointer.oid for pointer in pointers} + source_git.lfs_fetch_paths(source_ref, fetch_paths, expected_oids=expected_oids) + missing_oids = source_git.lfs_missing_oids(oids) + if missing_oids: + raise PermanentSyncError( + "Missing Git LFS object(s) after fetch: " + + ", ".join(sorted(missing_oids)) + ) + if validate_payload_markers: + _validate_lfs_payload_markers(source_git, source_ref, pointers) + + target_remote = f"repo_sync_lfs_target_{os.getpid()}" + peer_url = peer_git.remote_url("origin") + source_git.remote_add_or_update(target_remote, peer_url) + try: + source_git.lfs_push_oids(target_remote, oids) + finally: + source_git.remote_remove(target_remote) + + +def _lfs_attributes_changed(changed_paths: list[str]) -> bool: + """Return True when changed paths may affect LFS tracking rules.""" + return any( + path == ".gitattributes" or path.endswith("/.gitattributes") + for path in changed_paths + ) + + +def _validate_lfs_payload_markers( + source_git: GitOps, + source_ref: str, + pointers: list[LfsPointer], +) -> None: + """Reject text LFS payloads that rely on repo-sync stripping markers.""" + errors: list[str] = [] + with tempfile.TemporaryDirectory(prefix="repo-sync-lfs-payload-") as temp_dir: + for index, pointer in enumerate(pointers): + payload_path = os.path.join(temp_dir, f"payload-{index}") + source_git.lfs_write_path( + source_ref, + pointer.path, + payload_path, + expected_oid=pointer.oid, + ) + errors.extend( + validate_lfs_payload_file(payload_path, filepath=pointer.path) + ) + if errors: + raise PermanentSyncError( + "Git LFS payloads cannot contain repo-sync private markers:\n" + + "\n".join(errors) + ) + + # Docker image for the PR description agent. Built locally by the sync # workflow from docker/pr-description/Dockerfile (not pushed to a registry). # Can be overridden via environment variable for testing. @@ -473,6 +577,7 @@ def _sync_private_to_public( return False # Empty diff — all changes were internal-only. snapshot_dir, prev_snapshot_dir, diff_repo, patch_file, diff_commit = diff_result + changed_paths = GitOps(diff_repo).diff_name_only("HEAD~1", "HEAD") try: # Apply the delta to the peer repo by cherry-picking from the temp repo. @@ -542,6 +647,16 @@ def _sync_private_to_public( ) else: peer_git.commit_amend_message("repo-sync: sync from private", origin_trailer) + _mirror_lfs_objects( + source_git=source_git, + peer_git=peer_git, + source_ref=source_sha, + snapshot_dir=snapshot_dir, + changed_paths=changed_paths, + attributes_git=GitOps(diff_repo), + attributes_ref="HEAD", + validate_payload_markers=True, + ) # Build the PR description (identical for conflict and non-conflict). if pr_desc_cache.source_sha == source_sha: @@ -626,6 +741,7 @@ def _sync_public_to_private( Returns True if a PR was created. Raises TransientSyncError on base-branch-deleted failures. """ + changed_paths = source_git.diff_name_only(f"{source_sha}^", source_sha) # Add the source repo as a remote. Use the absolute path because # peer_git runs with cwd=peer_repo_dir, so a relative path would # resolve to the wrong location. @@ -685,6 +801,20 @@ def _sync_public_to_private( peer_git.commit_amend_message( current_msg, origin_trailer, allow_empty=True ) + lfs_snapshot_dir = tempfile.mkdtemp(prefix=f"repo-sync-lfs-{short_sha}-") + try: + source_git.archive_to_dir(source_sha, lfs_snapshot_dir) + _mirror_lfs_objects( + source_git=source_git, + peer_git=peer_git, + source_ref=source_sha, + snapshot_dir=lfs_snapshot_dir, + changed_paths=changed_paths, + attributes_git=source_git, + attributes_ref=source_sha, + ) + finally: + shutil.rmtree(lfs_snapshot_dir, ignore_errors=True) # Build the PR description (identical for conflict and non-conflict). source_gh = GhOps(source_repo, token=os.environ.get("GH_TOKEN")) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..98f3c07 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +"""Test package for repo-sync.""" diff --git a/tests/stack/test_git_ops.py b/tests/stack/test_git_ops.py new file mode 100644 index 0000000..d213bf5 --- /dev/null +++ b/tests/stack/test_git_ops.py @@ -0,0 +1,128 @@ +"""Tests for git operation helpers.""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from repo_sync.errors import VerboseCalledProcessError +from repo_sync.stack.git_ops import GitOps + + +def test_lfs_tracked_paths_uses_source_ref(tmp_git_repo: GitOps) -> None: + repo_dir = Path(tmp_git_repo.repo_dir) + (repo_dir / ".gitattributes").write_text("*.bin filter=lfs\n", encoding="utf-8") + tmp_git_repo._run(["add", ".gitattributes"]) + tmp_git_repo._run(["commit", "-m", "track bin files"]) + bin_attrs_ref = tmp_git_repo.rev_parse("HEAD") + + (repo_dir / ".gitattributes").write_text("*.dat filter=lfs\n", encoding="utf-8") + tmp_git_repo._run(["add", ".gitattributes"]) + tmp_git_repo._run(["commit", "-m", "track dat files"]) + + assert tmp_git_repo.lfs_tracked_paths( + ["asset.bin", "asset.dat"], + source_ref=bin_attrs_ref, + ) == {"asset.bin"} + assert tmp_git_repo.lfs_tracked_paths(["asset.bin", "asset.dat"]) == { + "asset.dat" + } + + +def test_lfs_fetch_paths_uses_cat_file_filters_for_exact_paths( + tmp_git_repo: GitOps, +) -> None: + result = MagicMock() + result.returncode = 0 + result.stderr = "" + + with patch("repo_sync.stack.git_ops.subprocess.run", return_value=result) as run: + tmp_git_repo.lfs_fetch_paths("abc123", ["asset,with-comma.bin"]) + + run.assert_called_once() + assert run.call_args.args[0] == [ + "git", + "cat-file", + "--filters", + "abc123:asset,with-comma.bin", + ] + assert run.call_args.kwargs["env"]["GIT_ATTR_SOURCE"] == "abc123" + + +def test_lfs_write_path_fails_when_filters_return_expected_pointer( + tmp_git_repo: GitOps, + tmp_path: Path, +) -> None: + oid = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" + result = MagicMock() + result.returncode = 0 + result.stderr = b"" + output_path = tmp_path / "payload" + + def run(_command: list[str], **kwargs: object) -> MagicMock: + stdout = kwargs["stdout"] + stdout.write( + b"version https://git-lfs.github.com/spec/v1\n" + + f"oid sha256:{oid}\n".encode() + + b"size 1234\n" + ) + return result + + with patch("repo_sync.stack.git_ops.subprocess.run", side_effect=run): + with pytest.raises( + VerboseCalledProcessError, + match="git lfs install --local", + ): + tmp_git_repo.lfs_write_path( + "abc123", + "asset.bin", + str(output_path), + expected_oid=oid, + ) + + +def test_lfs_write_path_uses_attributes_from_ref(tmp_git_repo: GitOps) -> None: + repo_dir = Path(tmp_git_repo.repo_dir) + tmp_git_repo._run(["lfs", "install", "--local"]) + tmp_git_repo._run(["lfs", "track", "*.txt"]) + (repo_dir / "asset.txt").write_text("payload\n", encoding="utf-8") + tmp_git_repo._run(["add", ".gitattributes", "asset.txt"]) + tmp_git_repo._run(["commit", "-m", "add lfs asset"]) + lfs_ref = tmp_git_repo.rev_parse("HEAD") + + (repo_dir / ".gitattributes").unlink() + tmp_git_repo._run(["add", ".gitattributes"]) + tmp_git_repo._run(["commit", "-m", "stop tracking txt files"]) + + output_path = repo_dir / "payload.out" + tmp_git_repo.lfs_write_path(lfs_ref, "asset.txt", str(output_path)) + + assert output_path.read_text(encoding="utf-8") == "payload\n" + + +def test_lfs_write_path_uses_cat_file_filters_for_exact_path( + tmp_git_repo: GitOps, + tmp_path: Path, +) -> None: + result = MagicMock() + result.returncode = 0 + result.stderr = b"" + output_path = tmp_path / "payload" + + with patch("repo_sync.stack.git_ops.subprocess.run", return_value=result) as run: + tmp_git_repo.lfs_write_path( + "abc123", + "asset,with-comma.bin", + str(output_path), + ) + + run.assert_called_once() + assert run.call_args.args[0] == [ + "git", + "cat-file", + "--filters", + "abc123:asset,with-comma.bin", + ] + assert run.call_args.kwargs["env"]["GIT_ATTR_SOURCE"] == "abc123" diff --git a/tests/stack/test_lfs.py b/tests/stack/test_lfs.py new file mode 100644 index 0000000..2373d1d --- /dev/null +++ b/tests/stack/test_lfs.py @@ -0,0 +1,90 @@ +"""Tests for Git LFS pointer detection helpers.""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import mock_open, patch + +import pytest + +from repo_sync.stack.lfs import ( + _MAX_POINTER_BYTES, + LfsPointer, + collect_lfs_pointers, + parse_lfs_pointer, + parse_lfs_pointer_file, +) + + +OID = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" + + +def _pointer(oid: str = OID, size: int = 1234) -> bytes: + """Return valid Git LFS pointer file content.""" + return ( + "version https://git-lfs.github.com/spec/v1\n" + f"oid sha256:{oid}\n" + f"size {size}\n" + ).encode("utf-8") + + +def test_parse_lfs_pointer() -> None: + pointer = parse_lfs_pointer(_pointer(), "asset.bin") + + assert pointer == LfsPointer(path="asset.bin", oid=OID, size=1234) + + +def test_parse_lfs_pointer_file_reads_only_pointer_sized_prefix() -> None: + data = _pointer() + b"x" * (_MAX_POINTER_BYTES * 2) + file_open = mock_open(read_data=data) + + with patch("builtins.open", file_open): + assert parse_lfs_pointer_file("payload.bin", "asset.bin") is None + + handle = file_open() + handle.read.assert_called_once_with(_MAX_POINTER_BYTES + 1) + + +def test_parse_lfs_pointer_rejects_ordinary_file() -> None: + assert parse_lfs_pointer(b"hello\n", "hello.txt") is None + + +def test_parse_lfs_pointer_rejects_invalid_oid() -> None: + data = ( + "version https://git-lfs.github.com/spec/v1\n" + "oid sha256:not-a-sha\n" + "size 1234\n" + ).encode("utf-8") + + assert parse_lfs_pointer(data, "asset.bin") is None + + +def test_collect_lfs_pointers_filters_to_changed_paths(tmp_path: Path) -> None: + (tmp_path / "asset.bin").write_bytes(_pointer()) + (tmp_path / "unchanged.bin").write_bytes(_pointer("f" * 64)) + (tmp_path / "ordinary.txt").write_text("hello\n", encoding="utf-8") + + pointers = collect_lfs_pointers( + str(tmp_path), + ["asset.bin", "ordinary.txt", "deleted.bin"], + ) + + assert pointers == [LfsPointer(path="asset.bin", oid=OID, size=1234)] + + +def test_collect_lfs_pointers_raises_read_errors_for_changed_paths( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + (tmp_path / "asset.bin").write_bytes(_pointer()) + original_read_bytes = Path.read_bytes + + def read_bytes(path: Path) -> bytes: + if path.name == "asset.bin": + raise PermissionError("cannot read") + return original_read_bytes(path) + + monkeypatch.setattr(Path, "read_bytes", read_bytes) + + with pytest.raises(PermissionError): + collect_lfs_pointers(str(tmp_path), ["asset.bin"]) diff --git a/tests/test_cli.py b/tests/test_cli.py index 8bc74cf..74ea8ed 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -3,8 +3,10 @@ from __future__ import annotations from pathlib import Path +from unittest.mock import patch from repo_sync.strip.cli import main +from repo_sync.strip.tree import StripResult def _write(root: Path, relpath: str, content: str) -> Path: @@ -62,3 +64,39 @@ def test_validate_only_with_paths(self, tmp_path: Path) -> None: # Only validate good.rs. rc = main(["--validate-only", str(tmp_path), "good.rs"]) assert rc == 0 + + def test_validate_only_with_lfs_payload_errors(self, tmp_path: Path) -> None: + """CLI --validate-only can include Git LFS payload marker validation.""" + with patch( + "repo_sync.strip.cli.validate_lfs_payloads", + return_value=StripResult(["asset.txt: LFS marker error"], []), + ): + rc = main(["--validate-only", "--validate-lfs-payloads", str(tmp_path)]) + + assert rc == 1 + + def test_validate_only_with_paths_json_preserves_spaces( + self, + tmp_path: Path, + ) -> None: + """CLI --paths-json preserves exact file paths with whitespace.""" + _write(tmp_path, "a b.txt", "hello\n") + with patch( + "repo_sync.strip.cli.validate_lfs_payloads", + return_value=StripResult([], []), + ) as validate_lfs_payloads: + rc = main( + [ + "--validate-only", + "--validate-lfs-payloads", + str(tmp_path), + "--paths-json", + '["a b.txt"]', + ] + ) + + assert rc == 0 + validate_lfs_payloads.assert_called_once_with( + str(tmp_path), + paths=["a b.txt"], + ) diff --git a/tests/test_lfs_payload_validation.py b/tests/test_lfs_payload_validation.py new file mode 100644 index 0000000..b1c85b7 --- /dev/null +++ b/tests/test_lfs_payload_validation.py @@ -0,0 +1,114 @@ +"""Tests for validating repo-sync markers in Git LFS payloads.""" + +from __future__ import annotations + +import subprocess +from pathlib import Path +from unittest.mock import patch + +from repo_sync.strip.lfs import validate_lfs_payload_file, validate_lfs_payloads + + +def test_validate_lfs_payload_file_rejects_private_region_markers( + tmp_path: Path, +) -> None: + payload = tmp_path / "payload.txt" + payload.write_text( + "public\n" + "# !repo-sync: private-start\n" + "secret\n" + "# !repo-sync: private-end\n", + encoding="utf-8", + ) + + errors = validate_lfs_payload_file(str(payload), filepath="asset.txt") + + assert len(errors) == 1 + assert "asset.txt" in errors[0] + assert "private region markers" in errors[0] + + +def test_validate_lfs_payload_file_allows_binary_marker_bytes( + tmp_path: Path, +) -> None: + payload = tmp_path / "payload.bin" + payload.write_bytes(b"\x00!repo-sync: private-file") + + assert validate_lfs_payload_file(str(payload), filepath="asset.bin") == [] + + +def test_validate_lfs_payload_file_allows_marker_substrings( + tmp_path: Path, +) -> None: + payload = tmp_path / "payload.txt" + payload.write_text( + "Docs mention !repo-sync: private-start/end as prose.\n", + encoding="utf-8", + ) + + assert validate_lfs_payload_file(str(payload), filepath="asset.txt") == [] + + +def test_validate_lfs_payloads_materializes_lfs_paths( + tmp_path: Path, +) -> None: + def run(command: list[str], **kwargs: object) -> subprocess.CompletedProcess: + if command[:4] == ["git", "lfs", "ls-files", "--json"]: + return subprocess.CompletedProcess( + command, + 0, + stdout='{"files":[{"name":"asset.txt"}]}', + stderr="", + ) + if command[:3] == ["git", "cat-file", "--filters"]: + assert kwargs["env"]["GIT_ATTR_SOURCE"] == "HEAD" + stdout = kwargs["stdout"] + stdout.write( + b"public\n" + b"# !repo-sync: private-start\n" + b"secret\n" + b"# !repo-sync: private-end\n" + ) + return subprocess.CompletedProcess(command, 0, stderr=b"") + raise AssertionError(f"unexpected command: {command}") + + with patch("repo_sync.strip.lfs.subprocess.run", side_effect=run): + result = validate_lfs_payloads(str(tmp_path)) + + assert len(result.errors) == 1 + assert "asset.txt" in result.errors[0] + + +def test_validate_lfs_payloads_fails_when_filters_return_pointer( + tmp_path: Path, +) -> None: + oid = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" + + def run(command: list[str], **kwargs: object) -> subprocess.CompletedProcess: + if command[:4] == ["git", "lfs", "ls-files", "--json"]: + return subprocess.CompletedProcess( + command, + 0, + stdout=( + '{"files":[{"name":"asset.bin",' + f'"oid":"{oid}"' + "}]}" + ), + stderr="", + ) + if command[:3] == ["git", "cat-file", "--filters"]: + stdout = kwargs["stdout"] + stdout.write( + b"version https://git-lfs.github.com/spec/v1\n" + + f"oid sha256:{oid}\n".encode() + + b"size 1234\n" + ) + return subprocess.CompletedProcess(command, 0, stderr=b"") + raise AssertionError(f"unexpected command: {command}") + + with patch("repo_sync.strip.lfs.subprocess.run", side_effect=run): + result = validate_lfs_payloads(str(tmp_path)) + + assert len(result.errors) == 1 + assert "asset.bin" in result.errors[0] + assert "git lfs install --local" in result.errors[0] diff --git a/tests/workflows/test_lfs_mirroring.py b/tests/workflows/test_lfs_mirroring.py new file mode 100644 index 0000000..501ad48 --- /dev/null +++ b/tests/workflows/test_lfs_mirroring.py @@ -0,0 +1,318 @@ +"""Tests for Git LFS object mirroring during sync.""" + +from __future__ import annotations + +import logging +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest +from pytest import LogCaptureFixture + +from repo_sync.stack.git_ops import GitOps +from repo_sync.workflows.create_sync_prs import PermanentSyncError, _mirror_lfs_objects + + +OID = "0123456789abcdef0123456789abcdef0123456789abcdef0123456789abcdef" +PRIVATE_OID = "fedcba9876543210fedcba9876543210fedcba9876543210fedcba9876543210" + + +def _pointer(oid: str) -> str: + """Return valid Git LFS pointer file content.""" + return ( + "version https://git-lfs.github.com/spec/v1\n" + f"oid sha256:{oid}\n" + "size 1234\n" + ) + + +def test_mirror_lfs_objects_pushes_changed_pointer_oids( + tmp_path: Path, + caplog: LogCaptureFixture, +) -> None: + (tmp_path / "asset.bin").write_text(_pointer(OID), encoding="utf-8") + + source_git = MagicMock(spec=GitOps) + peer_git = MagicMock(spec=GitOps) + attributes_git = MagicMock(spec=GitOps) + peer_git.remote_url.return_value = "https://github.com/org/peer.git" + attributes_git.lfs_tracked_paths.return_value = {"asset.bin"} + source_git.lfs_missing_oids.return_value = [] + caplog.set_level(logging.INFO, logger="repo_sync.workflows.create_sync_prs") + + with patch("repo_sync.workflows.create_sync_prs.os.getpid", return_value=42): + _mirror_lfs_objects( + source_git=source_git, + peer_git=peer_git, + source_ref="abc123", + snapshot_dir=str(tmp_path), + changed_paths=["asset.bin"], + attributes_git=attributes_git, + attributes_ref="attrs-ref", + ) + + remote = "repo_sync_lfs_target_42" + attributes_git.lfs_tracked_paths.assert_called_once_with( + ["asset.bin"], + source_ref="attrs-ref", + ) + source_git.lfs_fetch_paths.assert_called_once_with( + "abc123", + ["asset.bin"], + expected_oids={"asset.bin": OID}, + ) + source_git.lfs_missing_oids.assert_called_once_with([OID]) + peer_git.remote_url.assert_called_once_with("origin") + source_git.remote_add_or_update.assert_called_once_with( + remote, + "https://github.com/org/peer.git", + ) + source_git.lfs_push_oids.assert_called_once_with(remote, [OID]) + source_git.remote_remove.assert_called_once_with(remote) + assert OID in caplog.text + assert "asset.bin" in caplog.text + + +def test_mirror_lfs_objects_skips_when_no_changed_pointer(tmp_path: Path) -> None: + (tmp_path / "ordinary.txt").write_text("hello\n", encoding="utf-8") + source_git = MagicMock(spec=GitOps) + peer_git = MagicMock(spec=GitOps) + attributes_git = MagicMock(spec=GitOps) + + _mirror_lfs_objects( + source_git=source_git, + peer_git=peer_git, + source_ref="abc123", + snapshot_dir=str(tmp_path), + changed_paths=["ordinary.txt"], + attributes_git=attributes_git, + attributes_ref="attrs-ref", + ) + + attributes_git.lfs_tracked_paths.assert_not_called() + source_git.lfs_fetch_paths.assert_not_called() + source_git.lfs_missing_oids.assert_not_called() + peer_git.remote_url.assert_not_called() + source_git.remote_add_or_update.assert_not_called() + source_git.lfs_push_oids.assert_not_called() + source_git.remote_remove.assert_not_called() + + +def test_mirror_lfs_objects_does_not_push_private_stripped_pointer( + tmp_path: Path, +) -> None: + source_tree = tmp_path / "source" + source_tree.joinpath("private").mkdir(parents=True) + source_tree.joinpath("private/secret.bin").write_text( + _pointer(PRIVATE_OID), + encoding="utf-8", + ) + + snapshot = tmp_path / "stripped-snapshot" + snapshot.mkdir() + snapshot.joinpath("asset.bin").write_text(_pointer(OID), encoding="utf-8") + source_git = MagicMock(spec=GitOps) + peer_git = MagicMock(spec=GitOps) + attributes_git = MagicMock(spec=GitOps) + peer_git.remote_url.return_value = "https://github.com/org/public.git" + attributes_git.lfs_tracked_paths.return_value = {"asset.bin"} + source_git.lfs_missing_oids.return_value = [] + + with patch("repo_sync.workflows.create_sync_prs.os.getpid", return_value=42): + _mirror_lfs_objects( + source_git=source_git, + peer_git=peer_git, + source_ref="abc123", + snapshot_dir=str(snapshot), + changed_paths=["asset.bin", "private/secret.bin"], + attributes_git=attributes_git, + attributes_ref="attrs-ref", + ) + + remote = "repo_sync_lfs_target_42" + source_git.lfs_fetch_paths.assert_called_once_with( + "abc123", + ["asset.bin"], + expected_oids={"asset.bin": OID}, + ) + source_git.lfs_push_oids.assert_called_once_with(remote, [OID]) + assert PRIVATE_OID not in source_git.lfs_push_oids.call_args.args[1] + + +def test_mirror_lfs_objects_skips_pointer_shaped_non_lfs_file( + tmp_path: Path, +) -> None: + (tmp_path / "looks-like-pointer.txt").write_text( + _pointer(PRIVATE_OID), + encoding="utf-8", + ) + source_git = MagicMock(spec=GitOps) + peer_git = MagicMock(spec=GitOps) + attributes_git = MagicMock(spec=GitOps) + attributes_git.lfs_tracked_paths.return_value = set() + + _mirror_lfs_objects( + source_git=source_git, + peer_git=peer_git, + source_ref="abc123", + snapshot_dir=str(tmp_path), + changed_paths=["looks-like-pointer.txt"], + attributes_git=attributes_git, + attributes_ref="attrs-ref", + ) + + attributes_git.lfs_tracked_paths.assert_called_once_with( + ["looks-like-pointer.txt"], + source_ref="attrs-ref", + ) + source_git.lfs_fetch_paths.assert_not_called() + source_git.lfs_missing_oids.assert_not_called() + source_git.lfs_push_oids.assert_not_called() + + +def test_mirror_lfs_objects_fails_when_fetched_object_is_missing( + tmp_path: Path, +) -> None: + (tmp_path / "asset.bin").write_text(_pointer(OID), encoding="utf-8") + source_git = MagicMock(spec=GitOps) + peer_git = MagicMock(spec=GitOps) + attributes_git = MagicMock(spec=GitOps) + attributes_git.lfs_tracked_paths.return_value = {"asset.bin"} + source_git.lfs_missing_oids.return_value = [OID] + + with pytest.raises(PermanentSyncError): + _mirror_lfs_objects( + source_git=source_git, + peer_git=peer_git, + source_ref="abc123", + snapshot_dir=str(tmp_path), + changed_paths=["asset.bin"], + attributes_git=attributes_git, + attributes_ref="attrs-ref", + ) + + source_git.lfs_fetch_paths.assert_called_once_with( + "abc123", + ["asset.bin"], + expected_oids={"asset.bin": OID}, + ) + source_git.lfs_push_oids.assert_not_called() + + +def test_mirror_lfs_objects_fails_before_push_for_lfs_payload_markers( + tmp_path: Path, +) -> None: + (tmp_path / "asset.txt").write_text(_pointer(OID), encoding="utf-8") + source_git = MagicMock(spec=GitOps) + peer_git = MagicMock(spec=GitOps) + attributes_git = MagicMock(spec=GitOps) + attributes_git.lfs_tracked_paths.return_value = {"asset.txt"} + source_git.lfs_missing_oids.return_value = [] + + def write_payload( + _ref: str, + _path: str, + output_path: str, + expected_oid: str | None = None, + ) -> None: + Path(output_path).write_text( + "public\n" + "# !repo-sync: private-start\n" + "secret\n" + "# !repo-sync: private-end\n", + encoding="utf-8", + ) + + source_git.lfs_write_path.side_effect = write_payload + + with pytest.raises(PermanentSyncError, match="asset.txt"): + _mirror_lfs_objects( + source_git=source_git, + peer_git=peer_git, + source_ref="abc123", + snapshot_dir=str(tmp_path), + changed_paths=["asset.txt"], + attributes_git=attributes_git, + attributes_ref="attrs-ref", + validate_payload_markers=True, + ) + + source_git.lfs_fetch_paths.assert_called_once_with( + "abc123", + ["asset.txt"], + expected_oids={"asset.txt": OID}, + ) + source_git.lfs_write_path.assert_called_once() + peer_git.remote_url.assert_not_called() + source_git.remote_add_or_update.assert_not_called() + source_git.lfs_push_oids.assert_not_called() + + +def test_mirror_lfs_objects_fetches_exact_comma_path( + tmp_path: Path, +) -> None: + (tmp_path / "asset,with-comma.bin").write_text( + _pointer(OID), + encoding="utf-8", + ) + source_git = MagicMock(spec=GitOps) + peer_git = MagicMock(spec=GitOps) + attributes_git = MagicMock(spec=GitOps) + peer_git.remote_url.return_value = "https://github.com/org/peer.git" + attributes_git.lfs_tracked_paths.return_value = {"asset,with-comma.bin"} + source_git.lfs_missing_oids.return_value = [] + + with patch("repo_sync.workflows.create_sync_prs.os.getpid", return_value=42): + _mirror_lfs_objects( + source_git=source_git, + peer_git=peer_git, + source_ref="abc123", + snapshot_dir=str(tmp_path), + changed_paths=["asset,with-comma.bin"], + attributes_git=attributes_git, + attributes_ref="attrs-ref", + ) + + source_git.lfs_fetch_paths.assert_called_once_with( + "abc123", + ["asset,with-comma.bin"], + expected_oids={"asset,with-comma.bin": OID}, + ) + + +def test_mirror_lfs_objects_scans_all_pointers_when_attributes_change( + tmp_path: Path, +) -> None: + (tmp_path / ".gitattributes").write_text( + "*.bin filter=lfs\n", + encoding="utf-8", + ) + (tmp_path / "existing.bin").write_text(_pointer(OID), encoding="utf-8") + (tmp_path / "ordinary.txt").write_text("hello\n", encoding="utf-8") + source_git = MagicMock(spec=GitOps) + peer_git = MagicMock(spec=GitOps) + attributes_git = MagicMock(spec=GitOps) + peer_git.remote_url.return_value = "https://github.com/org/peer.git" + attributes_git.lfs_tracked_paths.return_value = {"existing.bin"} + source_git.lfs_missing_oids.return_value = [] + + with patch("repo_sync.workflows.create_sync_prs.os.getpid", return_value=42): + _mirror_lfs_objects( + source_git=source_git, + peer_git=peer_git, + source_ref="abc123", + snapshot_dir=str(tmp_path), + changed_paths=[".gitattributes"], + attributes_git=attributes_git, + attributes_ref="attrs-ref", + ) + + attributes_git.lfs_tracked_paths.assert_called_once_with( + ["existing.bin"], + source_ref="attrs-ref", + ) + source_git.lfs_fetch_paths.assert_called_once_with( + "abc123", + ["existing.bin"], + expected_oids={"existing.bin": OID}, + ) diff --git a/tests/workflows/test_sync_workflow_config.py b/tests/workflows/test_sync_workflow_config.py new file mode 100644 index 0000000..87248db --- /dev/null +++ b/tests/workflows/test_sync_workflow_config.py @@ -0,0 +1,16 @@ +"""Tests for reusable sync workflow configuration.""" + +from __future__ import annotations + +from pathlib import Path + + +def test_sync_workflow_configures_lfs_for_both_checkouts() -> None: + workflow = Path(".github/workflows/sync.yml").read_text(encoding="utf-8") + + assert "- name: Configure Git LFS" in workflow + assert "git lfs install --local" in workflow + assert "git -C peer lfs install --local" in workflow + assert workflow.index("- name: Configure Git LFS") < workflow.index( + "- name: Run sync" + )