Skip to content

Commit 47f898b

Browse files
authored
Merge pull request #216 from calchiwo/refactor/unify-file-ingestion
refactor: unify file ingestion across local and GitHub paths
2 parents baded70 + 55453ae commit 47f898b

8 files changed

Lines changed: 70 additions & 286 deletions

File tree

_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
VERSION = "0.23.0"
1+
VERSION = "0.24.1"

explain_this_repo/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
VERSION = "0.23.0"
1+
VERSION = "0.24.1"

explain_this_repo/cli.py

Lines changed: 2 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -324,61 +324,6 @@ def _extract_file_signals(read_result) -> dict:
324324
"size_bytes": read_result.size_bytes,
325325
}
326326

327-
328-
def _validate_github_file_result(read_result) -> None:
329-
content = getattr(read_result, "content", None)
330-
size_bytes = getattr(read_result, "size_bytes", None)
331-
is_text = getattr(read_result, "is_text", None)
332-
333-
if not getattr(read_result, "path", None):
334-
raise ValueError("GitHub path does not resolve to a file.")
335-
336-
if is_text is not True:
337-
raise ValueError("binary GitHub files are not supported.")
338-
339-
if not isinstance(content, str):
340-
raise ValueError("GitHub file content is unavailable.")
341-
342-
if size_bytes is None:
343-
raise ValueError("GitHub file size is unavailable.")
344-
345-
if int(size_bytes) > _MAX_GITHUB_FILE_BYTES:
346-
raise ValueError(
347-
f"GitHub file too large ({size_bytes} bytes, limit {_MAX_GITHUB_FILE_BYTES} bytes)."
348-
)
349-
350-
351-
def _handle_github_file_error(error: Exception) -> None:
352-
message = str(error).strip()
353-
lowered = message.lower()
354-
355-
if "directory" in lowered and "not a file" in lowered:
356-
print("error: GitHub path resolves to a directory, not a file.")
357-
return
358-
359-
if "404" in message or "not found" in lowered:
360-
print("error: GitHub 404: file not found.")
361-
return
362-
363-
if "binary" in lowered:
364-
print("error: binary GitHub files are not supported.")
365-
return
366-
367-
if "too large" in lowered or "truncated" in lowered:
368-
print("error: GitHub file is too large to explain safely.")
369-
return
370-
371-
if "rate limit" in lowered:
372-
print("error: GitHub API rate limit exceeded.")
373-
return
374-
375-
if "forbidden" in lowered or "permission" in lowered or "private" in lowered:
376-
print("error: GitHub permission denied. Check access for private repositories.")
377-
return
378-
379-
print("error: could not fetch GitHub file.")
380-
381-
382327
def _handle_file_mode(args, llm: str | None) -> None:
383328
if args.stack:
384329
print("error: --stack is not supported for file targets")
@@ -462,15 +407,14 @@ def _handle_github_file_mode(args, llm: str | None) -> None:
462407
try:
463408
owner, repo, file_path = resolve_github_file_target(args.repository)
464409
except ValueError as e:
465-
_handle_github_file_error(e)
410+
print(f"error: {str(e)}")
466411
raise SystemExit(1)
467412

468413
try:
469414
with console.status(f"Fetching {owner}/{repo}/{file_path}...", spinner="dots"):
470415
read_result = fetch_file_result(owner, repo, file_path)
471-
_validate_github_file_result(read_result)
472416
except Exception as e:
473-
_handle_github_file_error(e)
417+
print(f"error: {str(e)}")
474418
raise SystemExit(1)
475419

476420
display_path = f"{owner}/{repo}/{read_result.path}"

explain_this_repo/file_reader.py

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99

1010
@dataclass(frozen=True, slots=True)
11-
class LocalFileReadResult:
11+
class FileReadResult:
1212
path: str
1313
name: str
1414
extension: str
@@ -93,7 +93,36 @@ def _decode_text(raw: bytes) -> str:
9393
raise ValueError("file appears to be binary or uses an unsupported text encoding")
9494

9595

96-
def read_local_file(path: str, max_bytes: int = _MAX_DEFAULT_BYTES) -> LocalFileReadResult:
96+
def build_file_read_result(
97+
*,
98+
path: str,
99+
raw: bytes,
100+
size_bytes: int,
101+
max_bytes: int = _MAX_DEFAULT_BYTES,
102+
) -> FileReadResult:
103+
if max_bytes <= 0:
104+
raise ValueError("max_bytes must be greater than 0")
105+
106+
bounded = raw[:max_bytes]
107+
sample = bounded[:_SAMPLE_SIZE]
108+
109+
if _is_probably_binary(sample):
110+
raise ValueError("binary files are not supported")
111+
112+
content = _decode_text(bounded)
113+
114+
file_path = Path(path)
115+
return FileReadResult(
116+
path=path,
117+
name=file_path.name,
118+
extension=file_path.suffix.lower().lstrip("."),
119+
size_bytes=size_bytes,
120+
content=content,
121+
is_text=True,
122+
)
123+
124+
125+
def read_local_file(path: str, max_bytes: int = _MAX_DEFAULT_BYTES) -> FileReadResult:
97126
if max_bytes <= 0:
98127
raise ValueError("max_bytes must be greater than 0")
99128

@@ -112,17 +141,9 @@ def read_local_file(path: str, max_bytes: int = _MAX_DEFAULT_BYTES) -> LocalFile
112141
except OSError as exc:
113142
raise OSError(f"Could not read file '{path}': {exc}") from exc
114143

115-
sample = raw[:_SAMPLE_SIZE]
116-
if _is_probably_binary(sample):
117-
raise ValueError("binary files are not supported")
118-
119-
content = _decode_text(raw)
120-
121-
return LocalFileReadResult(
144+
return build_file_read_result(
122145
path=str(file_path.resolve()),
123-
name=file_path.name,
124-
extension=file_path.suffix.lower().lstrip("."),
146+
raw=raw,
125147
size_bytes=size_bytes,
126-
content=content,
127-
is_text=True,
148+
max_bytes=max_bytes,
128149
)

0 commit comments

Comments
 (0)