Skip to content

Commit 19d29e7

Browse files
Add script to refresh local git repositories
1 parent 1b8189b commit 19d29e7

1 file changed

Lines changed: 257 additions & 0 deletions

File tree

scripts/daily_refresh.py

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
"""Utilities for refreshing multiple Git repositories at once.
2+
3+
This module provides a small command line application that searches for Git
4+
repositories under a given directory and executes ``git fetch --all --prune``
5+
for each of them. The goal is to make it easy to plug the script into a cron
6+
job (or any other scheduler) so that a server can refresh all of its local
7+
checkouts on a daily basis.
8+
9+
Example usage that fetches every repository under ``/srv/git``::
10+
11+
$ python -m scripts.daily_refresh /srv/git
12+
13+
For safety the script performs fast-forward pulls only when the
14+
``--pull`` flag is supplied. The ``--dry-run`` option can be used to inspect
15+
the commands that would be executed without touching the repositories.
16+
"""
17+
18+
from __future__ import annotations
19+
20+
import argparse
21+
import logging
22+
import shlex
23+
import subprocess
24+
from collections.abc import Iterable, Sequence
25+
from dataclasses import dataclass
26+
from pathlib import Path
27+
28+
29+
LOG_FORMAT = "%(asctime)s | %(levelname)-8s | %(message)s"
30+
31+
32+
@dataclass(slots=True)
33+
class RefreshResult:
34+
"""Container describing the outcome of refreshing a repository."""
35+
36+
repository: Path
37+
command: Sequence[str]
38+
returncode: int
39+
stdout: str
40+
stderr: str
41+
42+
@property
43+
def succeeded(self) -> bool:
44+
"""Return ``True`` when the git command was successful."""
45+
46+
return self.returncode == 0
47+
48+
49+
def _discover_repositories(root: Path, excluded: set[Path]) -> list[Path]:
50+
"""Return every Git repository rooted at ``root``.
51+
52+
Parameters
53+
----------
54+
root:
55+
Directory that should be scanned. The path is resolved to avoid
56+
surprises with symbolic links.
57+
excluded:
58+
Absolute paths that should not be traversed while searching. A
59+
directory is considered excluded when it is equal to one of the
60+
supplied paths or when it is a child of one.
61+
"""
62+
63+
repositories: list[Path] = []
64+
root = root.resolve()
65+
66+
def is_excluded(path: Path) -> bool:
67+
return any(path == item or item in path.parents for item in excluded)
68+
69+
def walk(directory: Path) -> None:
70+
if is_excluded(directory):
71+
logging.debug("Skipping excluded directory %s", directory)
72+
return
73+
74+
git_directory = directory / ".git"
75+
if git_directory.is_dir():
76+
repositories.append(directory)
77+
logging.debug("Found git repository in %s", directory)
78+
return
79+
80+
for child in directory.iterdir():
81+
if not child.is_dir() or child.is_symlink():
82+
continue
83+
walk(child)
84+
85+
walk(root)
86+
return repositories
87+
88+
89+
def _run_git_command(
90+
repository: Path, command: Sequence[str], *, dry_run: bool
91+
) -> RefreshResult:
92+
full_command = ("git", "-C", str(repository), *command)
93+
if dry_run:
94+
logging.info("DRY-RUN %s", shlex.join(full_command))
95+
return RefreshResult(repository, full_command, 0, "", "")
96+
97+
logging.info("Running %s", shlex.join(full_command))
98+
process = subprocess.run( # noqa: S603, S607 - `git` is a trusted command.
99+
full_command,
100+
capture_output=True,
101+
text=True,
102+
check=False,
103+
)
104+
return RefreshResult(
105+
repository,
106+
full_command,
107+
process.returncode,
108+
process.stdout.strip(),
109+
process.stderr.strip(),
110+
)
111+
112+
113+
def refresh_repository(repository: Path, *, pull: bool, dry_run: bool) -> bool:
114+
"""Refresh ``repository`` by fetching and (optionally) pulling updates.
115+
116+
Parameters
117+
----------
118+
repository:
119+
Path to a directory containing a Git checkout.
120+
pull:
121+
When ``True`` the script performs a fast-forward pull after fetching.
122+
dry_run:
123+
When ``True`` the underlying git commands are not executed.
124+
"""
125+
126+
commands: Iterable[Sequence[str]] = [("fetch", "--all", "--prune")]
127+
if pull:
128+
commands = (*commands, ("pull", "--ff-only"))
129+
130+
for command in commands:
131+
result = _run_git_command(repository, command, dry_run=dry_run)
132+
if not result.succeeded:
133+
logging.error(
134+
"Failed to refresh %s with %s (exit code %s)",
135+
repository,
136+
shlex.join(result.command),
137+
result.returncode,
138+
)
139+
if result.stdout:
140+
logging.error("stdout: %s", result.stdout)
141+
if result.stderr:
142+
logging.error("stderr: %s", result.stderr)
143+
return False
144+
145+
if result.stdout:
146+
logging.debug("%s", result.stdout)
147+
if result.stderr:
148+
logging.debug("%s", result.stderr)
149+
150+
return True
151+
152+
153+
def _parse_arguments() -> argparse.Namespace:
154+
parser = argparse.ArgumentParser(
155+
description="Refresh every git repository found under a directory.",
156+
)
157+
parser.add_argument(
158+
"root",
159+
type=Path,
160+
nargs="?",
161+
default=Path.cwd(),
162+
help="Directory that should be scanned for repositories.",
163+
)
164+
parser.add_argument(
165+
"-e",
166+
"--exclude",
167+
action="append",
168+
default=[],
169+
metavar="PATH",
170+
help=(
171+
"Directory names (relative to ROOT) or absolute paths that should "
172+
"be skipped while searching for repositories. The option can be "
173+
"provided multiple times."
174+
),
175+
)
176+
parser.add_argument(
177+
"-p",
178+
"--pull",
179+
action="store_true",
180+
help="Perform a fast-forward pull after fetching.",
181+
)
182+
parser.add_argument(
183+
"-n",
184+
"--dry-run",
185+
action="store_true",
186+
help="Print the git commands without executing them.",
187+
)
188+
parser.add_argument(
189+
"-v",
190+
"--verbose",
191+
action="count",
192+
default=0,
193+
help="Increase logging verbosity (can be supplied multiple times).",
194+
)
195+
parser.add_argument(
196+
"-q",
197+
"--quiet",
198+
action="store_true",
199+
help="Silence informational output.",
200+
)
201+
return parser.parse_args()
202+
203+
204+
def _prepare_excluded(root: Path, entries: list[str]) -> set[Path]:
205+
excluded: set[Path] = set()
206+
for entry in entries:
207+
path = Path(entry)
208+
if not path.is_absolute():
209+
path = (root / path).resolve()
210+
else:
211+
path = path.resolve()
212+
excluded.add(path)
213+
return excluded
214+
215+
216+
def main() -> int:
217+
args = _parse_arguments()
218+
219+
log_level = logging.INFO
220+
if args.quiet:
221+
log_level = logging.WARNING
222+
elif args.verbose >= 2:
223+
log_level = logging.DEBUG
224+
225+
logging.basicConfig(level=log_level, format=LOG_FORMAT)
226+
227+
root = args.root.resolve()
228+
if not root.is_dir():
229+
logging.error("%s is not a directory", root)
230+
return 1
231+
232+
excluded = _prepare_excluded(root, args.exclude)
233+
repositories = _discover_repositories(root, excluded)
234+
235+
if not repositories:
236+
logging.warning("No Git repositories found under %s", root)
237+
return 0
238+
239+
logging.info(
240+
"Refreshing %s repositories under %s", len(repositories), root
241+
)
242+
243+
failures = 0
244+
for repository in repositories:
245+
if not refresh_repository(repository, pull=args.pull, dry_run=args.dry_run):
246+
failures += 1
247+
248+
if failures:
249+
logging.error("Failed to refresh %s repositories", failures)
250+
return 1
251+
252+
logging.info("Successfully refreshed all repositories")
253+
return 0
254+
255+
256+
if __name__ == "__main__": # pragma: no cover - CLI entry point.
257+
raise SystemExit(main())

0 commit comments

Comments
 (0)