diff --git a/dfetch/log.py b/dfetch/log.py index 340190bd..8dc83dd4 100644 --- a/dfetch/log.py +++ b/dfetch/log.py @@ -1,30 +1,53 @@ """Logging related items.""" +from __future__ import annotations + import logging import os import sys import types from contextlib import nullcontext -from typing import Any, cast +from logging import LogRecord +from typing import TYPE_CHECKING, Any, cast -from rich._log_render import LogRender # type: ignore[import-untyped] -from rich.console import Console +from rich.console import Console, ConsoleRenderable from rich.highlighter import NullHighlighter from rich.logging import RichHandler from rich.markup import escape as markup_escape from rich.status import Status +from rich.table import Table from dfetch import __version__ +if TYPE_CHECKING: + from rich.traceback import Traceback + + +class _NoExpandRichHandler(RichHandler): + """RichHandler that disables table expansion to prevent blank lines in asciicasts. -class _NoExpandLogRender(LogRender): # pylint: disable=too-few-public-methods - """LogRender that disables table expansion to prevent blank lines in asciicasts.""" + Rich's LogRender uses expand=True on its Table.grid, which pads every + log message with trailing spaces to fill the full terminal width. When + asciinema records the output the padded line fills the terminal exactly, + causing the subsequent newline to produce a blank line in the cast + player. Overriding render to set expand=False removes the trailing + spaces and avoids the spurious blank lines. + """ - def __call__(self, *args: Any, **kwargs: Any) -> Any: + def render( + self, + *, + record: LogRecord, + traceback: Traceback | None, + message_renderable: ConsoleRenderable, + ) -> ConsoleRenderable: """Render log entry without expanding the table to the full terminal width.""" - table = super().__call__(*args, **kwargs) - table.expand = False - return table + renderable = super().render( + record=record, traceback=traceback, message_renderable=message_renderable + ) + if isinstance(renderable, Table): + renderable.expand = False + return renderable def make_console(no_color: bool = False) -> Console: @@ -40,7 +63,8 @@ def configure_root_logger(console: Console | None = None) -> None: """Configure the root logger with RichHandler using the provided Console.""" console = console or make_console() - handler = RichHandler( + handler_class = _NoExpandRichHandler if os.getenv("ASCIINEMA_REC") else RichHandler + handler = handler_class( console=console, show_time=False, show_path=False, @@ -50,17 +74,6 @@ def configure_root_logger(console: Console | None = None) -> None: highlighter=NullHighlighter(), ) - if os.getenv("ASCIINEMA_REC"): - # Rich's LogRender uses expand=True on its Table.grid, which pads every - # log message with trailing spaces to fill the full terminal width. When - # asciinema records the output the padded line fills the terminal exactly, - # causing the subsequent newline to produce a blank line in the cast - # player. Wrapping _log_render so it returns a non-expanding table - # removes the trailing spaces and avoids the spurious blank lines. - handler._log_render = _NoExpandLogRender( # pylint: disable=protected-access - show_time=False, show_level=False, show_path=False - ) - logging.basicConfig( level=logging.INFO, format="%(message)s", diff --git a/dfetch/util/license.py b/dfetch/util/license.py index 31236533..dc335fc4 100644 --- a/dfetch/util/license.py +++ b/dfetch/util/license.py @@ -119,8 +119,7 @@ def guess_license_in_file( probable_licenses = infer_license.api.probabilities(license_text) - return ( - None - if not probable_licenses - else License.from_inferred(*probable_licenses[0], text=license_text) - ) + if not probable_licenses: + return None + inferred, probability = probable_licenses[0] + return License.from_inferred(inferred, probability, text=license_text) diff --git a/dfetch/vcs/git.py b/dfetch/vcs/git.py index 9d6806f1..0bc762b4 100644 --- a/dfetch/vcs/git.py +++ b/dfetch/vcs/git.py @@ -420,9 +420,7 @@ def _apply_src_and_ignore( ) -> list[Submodule]: """Apply src filter and ignore patterns, returning surviving submodules.""" if src: - for submodule in submodules: - submodule.path = strip_glob_prefix(submodule.path, src) - self._move_src_folder_up(remote, src) + submodules = self._filter_submodules_by_src(remote, src, submodules) for ignore_path in ignore or []: paths = [ @@ -434,6 +432,50 @@ def _apply_src_and_ignore( return [s for s in submodules if os.path.exists(s.path)] + def _filter_submodules_by_src( + self, remote: str, src: str, submodules: list[Submodule] + ) -> list[Submodule]: + """Keep only submodules within *src*, remove others, then promote *src* to root.""" + within_src = [] + to_remove: set[str] = set() + for submodule in submodules: + if submodule.path == src: + # Submodule IS the src directory itself; keep it in-scope without + # altering its path and let _move_src_folder_up handle promotion. + within_src.append(submodule) + continue + new_path = strip_glob_prefix(submodule.path, src) + if new_path != submodule.path: + submodule.path = new_path + within_src.append(submodule) + else: + if Path(src).is_relative_to(Path(submodule.path)): + continue + to_remove.add(submodule.path) + for path in to_remove: + safe_rm(path, within=".") + GitLocalRepo._remove_empty_parents(to_remove) + self._move_src_folder_up(remote, src) + return within_src + + @staticmethod + def _remove_empty_parents(paths: set[str]) -> None: + """Remove empty ancestor directories left after removing out-of-scope submodule dirs. + + git submodule update may create a parent directory for a submodule even when + sparse-checkout excludes it; after safe_rm removes the exact submodule path the + parent can be left as an empty directory. os.rmdir is used because it is atomic + and raises OSError when the directory is not empty, which stops the upward walk. + """ + for path in paths: + parent = Path(path).parent + while parent != Path("."): + try: + parent.rmdir() + except OSError: + break + parent = parent.parent + @staticmethod def _collect_safe_paths(src: str, repo_root: Path, remote: str) -> list[str]: """Return glob-matched paths for *src* that are within *repo_root*. @@ -451,6 +493,12 @@ def _collect_safe_paths(src: str, repo_root: Path, remote: str) -> list[str]: @staticmethod def _apply_move(chosen: Path, repo_root: Path, remote: str) -> None: """Move the contents of *chosen* to the repo root and remove the empty parent.""" + # Pre-remove git metadata at the root of *chosen* before promoting its contents. + # When *chosen* is itself a cloned submodule it contains a .git file that would + # collide with the parent repo's .git directory; the caller cleans these up + # recursively after checkout anyway. + for name in (GitLocalRepo.METADATA_DIR, GitLocalRepo.GIT_MODULES_FILE): + safe_rm(chosen / name, within=chosen) try: move_directory_contents(str(chosen), ".") except FileNotFoundError: diff --git a/features/fetch-git-repo-with-submodule.feature b/features/fetch-git-repo-with-submodule.feature index 8cd3eb18..c349a3c6 100644 --- a/features/fetch-git-repo-with-submodule.feature +++ b/features/fetch-git-repo-with-submodule.feature @@ -154,3 +154,98 @@ Feature: Fetch projects with nested VCS dependencies test-repo/ README.md """ + + Scenario: A submodule within a plain src directory is fetched + Given a git-repository "PlainSrcProject.git" with the following submodules + | path | url | revision | + | src_folder/ext/sub-repo | some-remote-server/TestRepo.git | master | + Given the manifest 'dfetch.yaml' in MyProject + """ + manifest: + version: 0.0 + projects: + - name: plain-src-project + url: some-remote-server/PlainSrcProject.git + src: src_folder + """ + When I run "dfetch update" + Then the output shows + """ + Dfetch (0.13.0) + plain-src-project: + > Found & fetched submodule "./ext/sub-repo" (some-remote-server/TestRepo.git @ master - 79698c99152e4a4b7b759c9def50a130bc91a2ff) + > Fetched master - e1fda19a57b873eb8e6ae37780594cbb77b70f1a + """ + Then 'MyProject' looks like: + """ + MyProject/ + dfetch.yaml + plain-src-project/ + .dfetch_data.yaml + ext/ + sub-repo/ + README.md + """ + + Scenario: A submodule outside the src folder is not fetched when src is specified + Given a git-repository "MixedSubmoduleProject.git" with the following submodules + | path | url | revision | + | src_folder/ext/inside | some-remote-server/TestRepo.git | master | + | other_ext/outside | some-remote-server/TestRepo.git | master | + Given the manifest 'dfetch.yaml' in MyProject + """ + manifest: + version: 0.0 + projects: + - name: mixed-project + url: some-remote-server/MixedSubmoduleProject.git + src: src_folder + """ + When I run "dfetch update" + Then the output shows + """ + Dfetch (0.13.0) + mixed-project: + > Found & fetched submodule "./ext/inside" (some-remote-server/TestRepo.git @ master - 79698c99152e4a4b7b759c9def50a130bc91a2ff) + > Fetched master - e1fda19a57b873eb8e6ae37780594cbb77b70f1a + """ + Then 'MyProject' looks like: + """ + MyProject/ + dfetch.yaml + mixed-project/ + .dfetch_data.yaml + ext/ + inside/ + README.md + """ + + Scenario: A sibling submodule at the same top-level dir as src is not fetched + Given a git-repository "SiblingSubmoduleProject.git" with the following submodules + | path | url | revision | + | apps/lib | some-remote-server/TestRepo.git | master | + | apps/widget | some-remote-server/TestRepo.git | master | + Given the manifest 'dfetch.yaml' in MyProject + """ + manifest: + version: 0.0 + projects: + - name: sibling-project + url: some-remote-server/SiblingSubmoduleProject.git + src: apps/lib + """ + When I run "dfetch update" + Then the output shows + """ + Dfetch (0.13.0) + sibling-project: + > Fetched master - e1fda19a57b873eb8e6ae37780594cbb77b70f1a + """ + Then 'MyProject' looks like: + """ + MyProject/ + dfetch.yaml + sibling-project/ + .dfetch_data.yaml + README.md + """ diff --git a/pyproject.toml b/pyproject.toml index 55a4d287..e3f038f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -145,6 +145,13 @@ ignore_missing_imports = true strict = true warn_unused_ignores = false +[[tool.mypy.overrides]] +module = "dfetch.log" +# RichHandler is untyped in the pre-commit mypy environment (rich not installed there), +# so mypy sees it as Any and raises [misc] when subclassing. Disabling this check for +# this module avoids the false positive without suppressing other errors. +disallow_subclassing_any = false + [tool.doc8] ignore-path = "doc/_build,doc/static/uml/styles/plantuml-c4" max-line-length = 120 diff --git a/tests/test_git_vcs.py b/tests/test_git_vcs.py index 891d5a0e..33b08d14 100644 --- a/tests/test_git_vcs.py +++ b/tests/test_git_vcs.py @@ -16,6 +16,7 @@ GitRemote, _build_git_ssh_command, ) +from dfetch.vcs.git_types import Submodule # --------------------------------------------------------------------------- # unique_parent_dirs (dfetch.util.util) @@ -104,6 +105,120 @@ def test_move_src_folder_up_rejects_traversal_src(tmp_path): mock_move.assert_not_called() +# --------------------------------------------------------------------------- +# GitLocalRepo._filter_submodules_by_src +# --------------------------------------------------------------------------- + + +def _make_submodule(path: str, url: str = "some-url") -> Submodule: + return Submodule( + name=path, + toplevel="", + path=path, + sha="abc123", + url=url, + branch="master", + tag="", + ) + + +def test_filter_submodules_ancestor_of_src_not_removed(tmp_path, monkeypatch): + """A submodule whose path is an ancestor of src must not be deleted. + + When src='apps/myapp' and a submodule exists at 'apps', the old logic + incorrectly added 'apps' to to_remove (because Path('apps/myapp').is_relative_to('apps') + was True with sub_top='apps'). The fix checks the full submodule.path so that an + ancestor submodule is skipped and _move_src_folder_up can promote its content. + """ + (tmp_path / "apps" / "myapp").mkdir(parents=True) + (tmp_path / "apps" / "myapp" / "README.md").write_text("content") + (tmp_path / "outside").mkdir() + (tmp_path / "outside" / "file.txt").write_text("content") + + monkeypatch.chdir(tmp_path) + repo = GitLocalRepo(str(tmp_path)) + result = repo._filter_submodules_by_src( + "remote-url", + "apps/myapp", + [_make_submodule("apps"), _make_submodule("outside")], + ) + + assert ( + tmp_path / "README.md" + ).exists(), "apps/myapp content should be promoted to root" + assert not (tmp_path / "outside").exists(), "outside/ submodule should be removed" + assert not any( + s.path == "apps" for s in result + ), "ancestor submodule must not appear in result" + assert not any( + s.path == "outside" for s in result + ), "out-of-scope submodule must not appear in result" + + +def test_filter_submodules_disjoint_submodule_removed(tmp_path, monkeypatch): + """A submodule whose path is disjoint from src must be removed. + + The fix stores the full submodule.path (not its top-level component) in + to_remove, so only the exact submodule directory is deleted. + """ + (tmp_path / "src_folder" / "ext" / "inside").mkdir(parents=True) + (tmp_path / "src_folder" / "ext" / "inside" / "README.md").write_text("content") + (tmp_path / "other_ext" / "outside").mkdir(parents=True) + (tmp_path / "other_ext" / "outside" / "README.md").write_text("content") + + monkeypatch.chdir(tmp_path) + repo = GitLocalRepo(str(tmp_path)) + result = repo._filter_submodules_by_src( + "remote-url", + "src_folder", + [ + _make_submodule("src_folder/ext/inside"), + _make_submodule("other_ext/outside"), + ], + ) + + assert not ( + tmp_path / "other_ext" / "outside" + ).exists(), "other_ext/outside submodule dir should be removed" + assert any( + s.path == "ext/inside" for s in result + ), "inside submodule should be promoted" + + +def test_filter_submodules_sibling_of_src_not_removed(tmp_path, monkeypatch): + """A sibling submodule sharing the same top-level dir as src must not destroy src content. + + When src='apps/lib' and submodules exist at both 'apps/lib' (exact match, the src) + and 'apps/widget' (sibling, outside src), the old logic used parts[0]='apps' and + called safe_rm('apps'), which deleted the entire apps/ directory — including the + already-cloned apps/lib content needed by _move_src_folder_up. + The fix stores the full submodule.path so only apps/widget is targeted, leaving + apps/lib intact for promotion. + """ + (tmp_path / "apps" / "lib").mkdir(parents=True) + (tmp_path / "apps" / "lib" / "README.md").write_text("content") + (tmp_path / "apps" / "widget").mkdir(parents=True) + (tmp_path / "apps" / "widget" / "widget.h").write_text("content") + + monkeypatch.chdir(tmp_path) + repo = GitLocalRepo(str(tmp_path)) + result = repo._filter_submodules_by_src( + "remote-url", + "apps/lib", + [_make_submodule("apps/lib"), _make_submodule("apps/widget")], + ) + + assert ( + tmp_path / "README.md" + ).exists(), "apps/lib content must be promoted to root" + assert not ( + tmp_path / "apps" / "widget" + ).exists(), "sibling apps/widget must be removed" + assert any( + s.path == "apps/lib" for s in result + ), "src submodule should appear in result before final os.path.exists filtering" + + @pytest.mark.parametrize( "name, cmd_result, expectation", [