Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 14 additions & 5 deletions src/cocoindex_code/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,20 @@ def _normalize_gitignore_lines(lines: Iterable[str], directory: PurePath) -> lis
stripped = line.lstrip()
if not stripped or stripped.startswith("#"):
continue
if line.startswith("\\#") or line.startswith("\\!"):
line = line[1:]
negated = line.startswith("!")
if negated:
line = line[1:]
# A leading "\#" or "\!" escapes a literal '#'/'!' — such a line is
# neither a comment nor a negation. Detect the escape *before* the
# negation check, and KEEP the backslash so the emitted pattern stays
# escaped for pathspec. Stripping it would leave a bare leading "!"/"#"
# — fine when a "**/" prefix is added ("\!foo" -> "**/!foo"), but for a
# path-bearing pattern there is no such prefix ("\!dir/f" -> "!dir/f"),
# and GitIgnoreSpec would then read it back as a negation/comment.
escaped = line.startswith("\\#") or line.startswith("\\!")
if escaped:
negated = False
else:
negated = line.startswith("!")
if negated:
line = line[1:]
Comment on lines +58 to +64
body = line.strip()
if not body:
continue
Expand Down
64 changes: 64 additions & 0 deletions tests/test_indexer_gitignore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""Unit tests for .gitignore line normalization in the indexer."""

from __future__ import annotations

from pathlib import PurePath

from pathspec import GitIgnoreSpec

from cocoindex_code.indexer import _normalize_gitignore_lines

ROOT = PurePath(".")


def test_plain_pattern_is_globbed() -> None:
assert _normalize_gitignore_lines(["build"], ROOT) == ["**/build"]


def test_negation_is_preserved() -> None:
assert _normalize_gitignore_lines(["build", "!build/keep.txt"], ROOT) == [
"**/build",
"!build/keep.txt",
]


def test_escaped_hash_is_literal_not_comment() -> None:
# "\#notacomment" -> a file literally named "#notacomment". The escape is
# kept so GitIgnoreSpec does not read the pattern back as a comment.
assert _normalize_gitignore_lines(["\\#notacomment"], ROOT) == ["**/\\#notacomment"]


def test_escaped_bang_is_literal_not_negation() -> None:
# Regression: "\!important" means "ignore a file literally named '!important'",
# NOT a negation, so it must not become a "!"-prefixed (negation) pattern.
assert _normalize_gitignore_lines(["\\!important"], ROOT) == ["**/\\!important"]


def test_escaped_bang_does_not_re_include_unrelated_matches() -> None:
# End-to-end: a "\!important" line must not cancel an unrelated "important"
# ignore rule. Before the fix it normalized to "!**/important", which
# re-included every "important" file the previous line had ignored.
spec = GitIgnoreSpec.from_lines(
_normalize_gitignore_lines(["important", "\\!important"], ROOT)
)
assert spec.match_file("important") is True # still ignored
assert spec.match_file("!important") is True # literal file ignored too


def test_subdirectory_prefix_is_applied() -> None:
assert _normalize_gitignore_lines(["\\!keep"], PurePath("sub/dir")) == [
"sub/dir/**/\\!keep"
]


def test_escaped_path_bearing_pattern_is_literal() -> None:
# An escaped pattern that contains a "/" is anchored (no "**/" prefix is
# added), so the leading "!"/"#" would sit at the very start of the emitted
# pattern. Keeping the backslash is what stops GitIgnoreSpec from reading it
# back as a negation ("\!dir/file") or a comment ("\#dir/file").
spec = GitIgnoreSpec.from_lines(
_normalize_gitignore_lines(["\\!dir/file", "\\#dir/other"], ROOT)
)
assert spec.match_file("dir/file") is False # unescaped sibling, untouched
assert spec.match_file("!dir/file") is True # literal "!dir/file" ignored
assert spec.match_file("#dir/other") is True # literal "#dir/other" ignored