diff --git a/.github/workflows/python-wheels.yml b/.github/workflows/python-wheels.yml new file mode 100644 index 00000000..d3bb3690 --- /dev/null +++ b/.github/workflows/python-wheels.yml @@ -0,0 +1,166 @@ +name: python-wheels + +on: + push: + tags: + - "fff-py-v*" + workflow_dispatch: + inputs: + publish: + description: "Publish to PyPI (requires PYPI_API_TOKEN secret)" + required: false + default: "false" + +permissions: + contents: read + +jobs: + linux: + name: linux ${{ matrix.target }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + target: [x86_64, aarch64] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Build wheels (manylinux) + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist --manifest-path crates/fff-py/Cargo.toml + manylinux: auto + working-directory: . + - name: Build sdist (x86_64 only) + if: matrix.target == 'x86_64' + uses: PyO3/maturin-action@v1 + with: + command: sdist + args: --out dist --manifest-path crates/fff-py/Cargo.toml + - uses: actions/upload-artifact@v4 + with: + name: wheels-linux-${{ matrix.target }} + path: dist + + musllinux: + name: musllinux ${{ matrix.target }} + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + target: [x86_64, aarch64] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Build wheels (musllinux) + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist --manifest-path crates/fff-py/Cargo.toml + manylinux: musllinux_1_2 + - uses: actions/upload-artifact@v4 + with: + name: wheels-musllinux-${{ matrix.target }} + path: dist + + macos: + name: macos ${{ matrix.target }} + runs-on: ${{ matrix.target == 'aarch64' && 'macos-14' || 'macos-13' }} + strategy: + fail-fast: false + matrix: + target: [x86_64, aarch64] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist --manifest-path crates/fff-py/Cargo.toml + - uses: actions/upload-artifact@v4 + with: + name: wheels-macos-${{ matrix.target }} + path: dist + + windows: + name: windows ${{ matrix.target }} + runs-on: windows-latest + strategy: + fail-fast: false + matrix: + target: [x64] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + architecture: ${{ matrix.target == 'x64' && 'x64' || 'x86' }} + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + target: ${{ matrix.target }} + args: --release --out dist --manifest-path crates/fff-py/Cargo.toml + - uses: actions/upload-artifact@v4 + with: + name: wheels-windows-${{ matrix.target }} + path: dist + + smoke-test: + name: smoke-test ${{ matrix.os }} py${{ matrix.python }} + needs: [linux, macos, windows] + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-13, windows-latest] + python: ["3.9", "3.13"] + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python }} + - uses: actions/download-artifact@v4 + with: + pattern: wheels-* + merge-multiple: true + path: dist + - name: Install built wheel + shell: bash + run: | + pip install --no-index --find-links=dist fff-search + - name: Smoke test + shell: bash + run: | + python -c "from fff_search import FileFinder, __version__; print('fff_search', __version__); f = FileFinder.create(base_path='.'); f.wait_for_scan(timeout_ms=10000); print('files indexed:', f.get_scan_progress().scanned_files_count); r = f.file_search('readme'); print('matched:', r.total_matched); f.destroy()" + + release: + name: release to PyPI + needs: [linux, musllinux, macos, windows, smoke-test] + runs-on: ubuntu-latest + if: startsWith(github.ref, 'refs/tags/fff-py-v') || github.event.inputs.publish == 'true' + environment: + name: pypi + url: https://pypi.org/p/fff-search + permissions: + id-token: write + steps: + - uses: actions/download-artifact@v4 + with: + pattern: wheels-* + merge-multiple: true + path: dist + - name: Publish via maturin + uses: PyO3/maturin-action@v1 + with: + command: upload + args: --non-interactive --skip-existing dist/* + env: + MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.gitignore b/.gitignore index 2457b5a8..8990e133 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,14 @@ doc/tags big-repo target/ + +# Python (fff-py SDK) +.venv-py/ +.venv*/ +__pycache__/ +*.egg-info/ +.pytest_cache/ +*.pyc .archive.lua _*.lua .lazy.lua @@ -27,3 +35,6 @@ scripts/benchmark-results/ # Instruments traces *.trace/ +# Built python extension +*.pyd +*.so diff --git a/Cargo.lock b/Cargo.lock index 8d151b0d..c06b7a7e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -723,6 +723,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "fff-py" +version = "0.7.2" +dependencies = [ + "fff-query-parser", + "fff-search", + "mimalloc", + "pyo3", + "tracing", +] + [[package]] name = "fff-query-parser" version = "0.7.2" @@ -1220,6 +1231,15 @@ dependencies = [ "serde_core", ] +[[package]] +name = "indoc" +version = "2.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706" +dependencies = [ + "rustversion", +] + [[package]] name = "inotify" version = "0.11.1" @@ -1464,6 +1484,15 @@ dependencies = [ "libc", ] +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + [[package]] name = "mimalloc" version = "0.1.48" @@ -1807,6 +1836,12 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + [[package]] name = "potential_utf" version = "0.1.4" @@ -1867,6 +1902,69 @@ dependencies = [ "unarray", ] +[[package]] +name = "pyo3" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "once_cell", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.22.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -2361,6 +2459,12 @@ dependencies = [ "syn", ] +[[package]] +name = "target-lexicon" +version = "0.12.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" + [[package]] name = "tempfile" version = "3.27.0" @@ -2647,6 +2751,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unindent" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3" + [[package]] name = "url" version = "2.5.8" diff --git a/Cargo.toml b/Cargo.toml index ab251f8c..ea0ff8a9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ members = [ "crates/fff-core", "crates/fff-mcp", "crates/fff-nvim", + "crates/fff-py", "crates/fff-query-parser", "crates/fff-grep", ] diff --git a/README.md b/README.md index cb30457e..fcab25ff 100644 --- a/README.md +++ b/README.md @@ -366,6 +366,32 @@ Every method returns a `Result` (`{ ok: true, value } | { ok: false, error }` TypeScript wrapper over the C library for nodejs and bun. Build custom agent tools, CLIs, or IDE integrations on top of FFF. +
+ +

Python SDK

+
+ +```bash +pip install fff-search +``` + +```python +from fff_search import FileFinder + +with FileFinder.create(base_path=".", ai_mode=True) as f: + f.wait_for_scan(timeout_ms=10_000) + files = f.file_search("incognito profile", page_size=20) + hits = f.grep("GetOffTheRecordProfile", classify_definitions=True) +``` + +Errors raise `FffError`. Type stubs bundled (`py.typed`). Wheels for Linux (x86_64, aarch64), macOS (x86_64, aarch64), Windows (x86_64) on Python 3.9+ via abi3. + +Source: [`crates/fff-py/`](./crates/fff-py/). + +
+ +PyO3 bindings on the same Rust core. +

Rust crate

@@ -540,6 +566,7 @@ If you are running one grep from a terminal, `rg` is still the right tool. If yo - `crates/fff-c` - C FFI used by every language binding. - `crates/fff-nvim` - Lua/mlua bindings for the Neovim plugin. - `crates/fff-mcp` - MCP server binary. +- `crates/fff-py` - Python SDK (PyO3, `pip install fff-search`). - `packages/fff-node` - Node.js SDK (`@ff-labs/fff-node`). - `packages/fff-bun` - Bun SDK (`@ff-labs/fff-node`). - `packages/pi-fff` - pi extension (`@ff-labs/pi-fff`). diff --git a/crates/fff-py/Cargo.toml b/crates/fff-py/Cargo.toml new file mode 100644 index 00000000..f9066d30 --- /dev/null +++ b/crates/fff-py/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "fff-py" +version = "0.7.2" +edition = "2024" +description = "Python bindings for fff: fuzzy file finder and content grep" +license = "MIT" +authors = ["Dmitriy Kovalenko "] +repository = "https://github.com/dmtrKovalenko/fff.nvim" +publish = false + +[lib] +name = "fff_search_native" +crate-type = ["cdylib"] + +[features] +default = [] +zlob = ["fff/zlob"] + +[dependencies] +fff = { package = "fff-search", path = "../fff-core", version = "0.7.2" } +fff-query-parser = { path = "../fff-query-parser", version = "0.7.2" } +mimalloc.workspace = true +tracing.workspace = true +pyo3 = { version = "0.22", features = ["extension-module", "abi3-py39"] } diff --git a/crates/fff-py/README.md b/crates/fff-py/README.md new file mode 100644 index 00000000..79ccde67 --- /dev/null +++ b/crates/fff-py/README.md @@ -0,0 +1,39 @@ +# fff-search + +Python bindings for [fff](https://github.com/dmtrKovalenko/fff.nvim). + +```bash +pip install fff-search +``` + +```python +from fff_search import FileFinder + +with FileFinder.create(base_path=".", ai_mode=True) as f: + f.wait_for_scan(timeout_ms=10_000) + files = f.file_search("incognito profile", page_size=20) + hits = f.grep("GetOffTheRecordProfile", classify_definitions=True) +``` + +Errors raise `FffError`. Type stubs bundled. + +Wheels: Linux (x86_64, aarch64; manylinux + musllinux), macOS (x86_64, aarch64), Windows (x86_64). Python 3.9+ via abi3. + +## API + +See [`python/fff_search/_native.pyi`](python/fff_search/_native.pyi). Methods on `FileFinder`: + +- `file_search`, `directory_search`, `mixed_search` +- `grep(query, mode='plain'|'regex'|'fuzzy', ...)`, `multi_grep(patterns, ...)` +- `scan_files`, `refresh_git_status` +- `wait_for_scan`, `wait_for_watcher`, `is_scanning`, `get_scan_progress` +- `track_query`, `get_historical_query` + +## Develop + +```bash +pip install maturin pytest +cd crates/fff-py +maturin develop --release +pytest tests/ +``` diff --git a/crates/fff-py/examples/basic.py b/crates/fff-py/examples/basic.py new file mode 100644 index 00000000..ecabdb1b --- /dev/null +++ b/crates/fff-py/examples/basic.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import os +import sys +import tempfile + +from fff_search import FileFinder + + +def main() -> int: + base = os.path.abspath(sys.argv[1] if len(sys.argv) > 1 else ".") + + with tempfile.TemporaryDirectory() as tmp: + with FileFinder.create( + base_path=base, + frecency_db_path=os.path.join(tmp, "frecency"), + history_db_path=os.path.join(tmp, "history"), + ai_mode=True, + ) as finder: + print(f"Indexing {base!r}…") + ok = finder.wait_for_scan(timeout_ms=10_000) + if not ok: + print("Scan timed out", file=sys.stderr) + return 1 + + print("\n== fuzzy file_search('readme') ==") + r = finder.file_search("readme", page_size=10) + print(r) + for item, score in zip(r.items, r.scores): + print(f" {score.total:>6} {item.relative_path}") + + print("\n== grep('TODO', mode='plain') ==") + g = finder.grep("TODO", mode="plain", page_limit=10) + print(g) + for m in g.items[:10]: + print(f" {m.relative_path}:{m.line_number} {m.line_content[:80]}") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/crates/fff-py/pyproject.toml b/crates/fff-py/pyproject.toml new file mode 100644 index 00000000..f9a5a771 --- /dev/null +++ b/crates/fff-py/pyproject.toml @@ -0,0 +1,49 @@ +[build-system] +requires = ["maturin>=1.7,<2.0"] +build-backend = "maturin" + +[project] +name = "fff-search" +description = "Python bindings for fff: fuzzy file finder and content grep." +readme = "README.md" +license = { text = "MIT" } +requires-python = ">=3.9" +authors = [ + { name = "Dmitriy Kovalenko", email = "dmtr.kovalenko@outlook.com" }, +] +keywords = [ + "file-finder", + "fuzzy-search", + "grep", + "ai-agents", + "llm-tools", + "fast", + "rust", +] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Rust", + "Topic :: Software Development", + "Topic :: Text Processing :: Indexing", + "Topic :: Utilities", +] +dynamic = ["version"] + +[project.urls] +Homepage = "https://github.com/dmtrKovalenko/fff.nvim" +Repository = "https://github.com/dmtrKovalenko/fff.nvim" +Issues = "https://github.com/dmtrKovalenko/fff.nvim/issues" + +[tool.maturin] +python-source = "python" +module-name = "fff_search._native" +features = ["pyo3/extension-module"] +strip = true diff --git a/crates/fff-py/python/fff_search/__init__.py b/crates/fff-py/python/fff_search/__init__.py new file mode 100644 index 00000000..c15232fc --- /dev/null +++ b/crates/fff-py/python/fff_search/__init__.py @@ -0,0 +1,35 @@ +"""Python bindings for fff. See https://github.com/dmtrKovalenko/fff.nvim.""" + +from ._native import ( + DirItem, + DirSearchResult, + FffError, + FileFinder, + FileItem, + GrepMatch, + GrepResult, + Location, + MixedItem, + MixedSearchResult, + ScanProgress, + Score, + SearchResult, + __version__, +) + +__all__ = [ + "FileFinder", + "FileItem", + "Score", + "Location", + "SearchResult", + "DirItem", + "DirSearchResult", + "MixedItem", + "MixedSearchResult", + "GrepMatch", + "GrepResult", + "ScanProgress", + "FffError", + "__version__", +] diff --git a/crates/fff-py/python/fff_search/_native.pyi b/crates/fff-py/python/fff_search/_native.pyi new file mode 100644 index 00000000..5608e578 --- /dev/null +++ b/crates/fff-py/python/fff_search/_native.pyi @@ -0,0 +1,201 @@ +from __future__ import annotations + +from typing import Optional + +__version__: str + +class FffError(RuntimeError): ... + +class FileItem: + relative_path: str + file_name: str + git_status: str + size: int + modified: int + access_frecency_score: int + modification_frecency_score: int + total_frecency_score: int + is_binary: bool + +class Score: + total: int + base_score: int + filename_bonus: int + special_filename_bonus: int + frecency_boost: int + distance_penalty: int + current_file_penalty: int + combo_match_boost: int + path_alignment_bonus: int + exact_match: bool + match_type: str + +class Location: + kind: str + line: int + col: int + end_line: int + end_col: int + +class SearchResult: + items: list[FileItem] + scores: list[Score] + total_matched: int + total_files: int + location: Optional[Location] + def __len__(self) -> int: ... + +class DirItem: + relative_path: str + dir_name: str + max_access_frecency: int + +class DirSearchResult: + items: list[DirItem] + scores: list[Score] + total_matched: int + total_dirs: int + def __len__(self) -> int: ... + +class MixedItem: + kind: str + file: Optional[FileItem] + directory: Optional[DirItem] + +class MixedSearchResult: + items: list[MixedItem] + scores: list[Score] + total_matched: int + total_files: int + total_dirs: int + location: Optional[Location] + def __len__(self) -> int: ... + +class GrepMatch: + relative_path: str + file_name: str + git_status: str + line_content: str + match_ranges: list[tuple[int, int]] + context_before: list[str] + context_after: list[str] + size: int + modified: int + total_frecency_score: int + access_frecency_score: int + modification_frecency_score: int + line_number: int + byte_offset: int + col: int + fuzzy_score: Optional[int] + is_binary: bool + is_definition: bool + +class GrepResult: + items: list[GrepMatch] + total_matched: int + total_files_searched: int + total_files: int + filtered_file_count: int + next_cursor: Optional[int] + regex_fallback_error: Optional[str] + def __len__(self) -> int: ... + +class ScanProgress: + scanned_files_count: int + is_scanning: bool + is_watcher_ready: bool + is_warmup_complete: bool + +class FileFinder: + is_destroyed: bool + + @staticmethod + def create( + base_path: str, + *, + frecency_db_path: Optional[str] = None, + history_db_path: Optional[str] = None, + disable_mmap_cache: bool = False, + disable_content_indexing: Optional[bool] = None, + disable_watch: bool = False, + ai_mode: bool = False, + log_file_path: Optional[str] = None, + log_level: Optional[str] = None, + cache_budget_max_files: int = 0, + cache_budget_max_bytes: int = 0, + cache_budget_max_file_size: int = 0, + ) -> FileFinder: ... + def destroy(self) -> None: ... + def __enter__(self) -> FileFinder: ... + def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> bool: ... + def wait_for_scan(self, timeout_ms: int) -> bool: ... + def wait_for_watcher(self, timeout_ms: int) -> bool: ... + def is_scanning(self) -> bool: ... + def get_scan_progress(self) -> ScanProgress: ... + def scan_files(self) -> None: ... + def refresh_git_status(self) -> int: ... + def get_base_path(self) -> Optional[str]: ... + def file_search( + self, + query: str, + *, + current_file: Optional[str] = None, + max_threads: int = 0, + page_index: int = 0, + page_size: int = 0, + combo_boost_multiplier: int = 0, + min_combo_count: int = 0, + ) -> SearchResult: ... + def directory_search( + self, + query: str, + *, + current_file: Optional[str] = None, + max_threads: int = 0, + page_index: int = 0, + page_size: int = 0, + ) -> DirSearchResult: ... + def mixed_search( + self, + query: str, + *, + current_file: Optional[str] = None, + max_threads: int = 0, + page_index: int = 0, + page_size: int = 0, + combo_boost_multiplier: int = 0, + min_combo_count: int = 0, + ) -> MixedSearchResult: ... + def grep( + self, + query: str, + *, + mode: str = "plain", + max_file_size: int = 0, + max_matches_per_file: int = 0, + smart_case: bool = True, + cursor: Optional[int] = None, + page_limit: int = 0, + time_budget_ms: int = 0, + before_context: int = 0, + after_context: int = 0, + classify_definitions: bool = False, + ) -> GrepResult: ... + def multi_grep( + self, + patterns: list[str], + *, + constraints: Optional[str] = None, + max_file_size: int = 0, + max_matches_per_file: int = 0, + smart_case: bool = True, + cursor: Optional[int] = None, + page_limit: int = 0, + time_budget_ms: int = 0, + before_context: int = 0, + after_context: int = 0, + classify_definitions: bool = False, + ) -> GrepResult: ... + def track_query(self, query: str, selected_file_path: str) -> bool: ... + def get_historical_query(self, offset: int) -> Optional[str]: ... diff --git a/crates/fff-py/python/fff_search/py.typed b/crates/fff-py/python/fff_search/py.typed new file mode 100644 index 00000000..e69de29b diff --git a/crates/fff-py/src/lib.rs b/crates/fff-py/src/lib.rs new file mode 100644 index 00000000..3cfa3f68 --- /dev/null +++ b/crates/fff-py/src/lib.rs @@ -0,0 +1,1137 @@ +//! Python bindings for FFF. + +#![allow(unsafe_op_in_unsafe_fn)] + +use std::path::PathBuf; +use std::time::Duration; + +use pyo3::create_exception; +use pyo3::exceptions::PyRuntimeError; +use pyo3::prelude::*; + +use fff::file_picker::FilePicker; +use fff::frecency::FrecencyTracker; +use fff::git::format_git_status; +use fff::grep::{GrepMatch as CoreGrepMatch, GrepResult as CoreGrepResult, GrepSearchOptions}; +use fff::query_tracker::QueryTracker; +use fff::shared::{SharedFilePicker, SharedFrecency, SharedQueryTracker}; +use fff::types::ContentCacheBudget; +use fff::{ + DirItem as CoreDirItem, DirSearchResult as CoreDirSearchResult, FFFMode, FilePickerOptions, + FileItem as CoreFileItem, FuzzySearchOptions, Location as CoreLocation, + MixedItemRef as CoreMixedItemRef, MixedSearchResult as CoreMixedSearchResult, + PaginationArgs, QueryParser, Score as CoreScore, SearchResult as CoreSearchResult, +}; + +create_exception!(fff_search, FffError, PyRuntimeError); + +fn err(msg: impl Into) -> PyErr { + PyErr::new::(msg.into()) +} + +fn default_or(val: T, default: T) -> T { + if val == T::default() { default } else { val } +} + +fn grep_mode_from_str(s: &str) -> PyResult { + match s { + "plain" | "" => Ok(fff::GrepMode::PlainText), + "regex" => Ok(fff::GrepMode::Regex), + "fuzzy" => Ok(fff::GrepMode::Fuzzy), + other => Err(err(format!( + "invalid grep mode: {other:?} (expected 'plain', 'regex', or 'fuzzy')" + ))), + } +} + +#[pyclass(module = "fff_search._native", frozen, get_all)] +#[derive(Clone, Debug)] +pub struct FileItem { + pub relative_path: String, + pub file_name: String, + pub git_status: String, + pub size: u64, + pub modified: u64, + pub access_frecency_score: i64, + pub modification_frecency_score: i64, + pub total_frecency_score: i64, + pub is_binary: bool, +} + +#[pymethods] +impl FileItem { + fn __repr__(&self) -> String { + format!( + "FileItem(relative_path={:?}, size={}, total_frecency_score={}, is_binary={})", + self.relative_path, self.size, self.total_frecency_score, self.is_binary + ) + } +} + +impl FileItem { + fn from_core(item: &CoreFileItem, picker: &FilePicker) -> Self { + FileItem { + relative_path: item.relative_path(picker).to_string(), + file_name: item.file_name(picker).to_string(), + git_status: format_git_status(item.git_status).to_string(), + size: item.size, + modified: item.modified, + access_frecency_score: item.access_frecency_score as i64, + modification_frecency_score: item.modification_frecency_score as i64, + total_frecency_score: item.total_frecency_score() as i64, + is_binary: item.is_binary(), + } + } +} + +#[pyclass(module = "fff_search._native", frozen, get_all)] +#[derive(Clone, Debug)] +pub struct Score { + pub total: i32, + pub base_score: i32, + pub filename_bonus: i32, + pub special_filename_bonus: i32, + pub frecency_boost: i32, + pub distance_penalty: i32, + pub current_file_penalty: i32, + pub combo_match_boost: i32, + pub path_alignment_bonus: i32, + pub exact_match: bool, + pub match_type: String, +} + +#[pymethods] +impl Score { + fn __repr__(&self) -> String { + format!( + "Score(total={}, match_type={:?}, exact_match={})", + self.total, self.match_type, self.exact_match + ) + } +} + +impl From<&CoreScore> for Score { + fn from(s: &CoreScore) -> Self { + Score { + total: s.total, + base_score: s.base_score, + filename_bonus: s.filename_bonus, + special_filename_bonus: s.special_filename_bonus, + frecency_boost: s.frecency_boost, + distance_penalty: s.distance_penalty, + current_file_penalty: s.current_file_penalty, + combo_match_boost: s.combo_match_boost, + path_alignment_bonus: s.path_alignment_bonus, + exact_match: s.exact_match, + match_type: s.match_type.to_string(), + } + } +} + +#[pyclass(module = "fff_search._native", frozen, get_all)] +#[derive(Clone, Debug)] +pub struct Location { + /// "line" | "position" | "range" + pub kind: String, + pub line: i32, + pub col: i32, + pub end_line: i32, + pub end_col: i32, +} + +#[pymethods] +impl Location { + fn __repr__(&self) -> String { + match self.kind.as_str() { + "line" => format!("Location(line={})", self.line), + "position" => format!("Location(line={}, col={})", self.line, self.col), + "range" => format!( + "Location({}:{}..{}:{})", + self.line, self.col, self.end_line, self.end_col + ), + _ => "Location(none)".to_string(), + } + } +} + +impl Location { + fn from_core(loc: Option<&CoreLocation>) -> Option { + match loc? { + CoreLocation::Line(line) => Some(Location { + kind: "line".to_string(), + line: *line, + col: 0, + end_line: 0, + end_col: 0, + }), + CoreLocation::Position { line, col } => Some(Location { + kind: "position".to_string(), + line: *line, + col: *col, + end_line: 0, + end_col: 0, + }), + CoreLocation::Range { start, end } => Some(Location { + kind: "range".to_string(), + line: start.0, + col: start.1, + end_line: end.0, + end_col: end.1, + }), + } + } +} + +#[pyclass(module = "fff_search._native", frozen, get_all)] +#[derive(Clone, Debug)] +pub struct SearchResult { + pub items: Vec, + pub scores: Vec, + pub total_matched: u32, + pub total_files: u32, + pub location: Option, +} + +#[pymethods] +impl SearchResult { + fn __repr__(&self) -> String { + format!( + "SearchResult(items={}, total_matched={}, total_files={})", + self.items.len(), + self.total_matched, + self.total_files + ) + } + + fn __len__(&self) -> usize { + self.items.len() + } +} + +impl SearchResult { + fn from_core(r: &CoreSearchResult, picker: &FilePicker) -> Self { + SearchResult { + items: r + .items + .iter() + .map(|i| FileItem::from_core(i, picker)) + .collect(), + scores: r.scores.iter().map(Score::from).collect(), + total_matched: r.total_matched as u32, + total_files: r.total_files as u32, + location: Location::from_core(r.location.as_ref()), + } + } +} + +#[pyclass(module = "fff_search._native", frozen, get_all)] +#[derive(Clone, Debug)] +pub struct DirItem { + pub relative_path: String, + pub dir_name: String, + pub max_access_frecency: i32, +} + +#[pymethods] +impl DirItem { + fn __repr__(&self) -> String { + format!("DirItem(relative_path={:?})", self.relative_path) + } +} + +impl DirItem { + fn from_core(d: &CoreDirItem, picker: &FilePicker) -> Self { + DirItem { + relative_path: d.relative_path(picker).to_string(), + dir_name: d.dir_name(picker).to_string(), + max_access_frecency: d.max_access_frecency(), + } + } +} + +#[pyclass(module = "fff_search._native", frozen, get_all)] +#[derive(Clone, Debug)] +pub struct DirSearchResult { + pub items: Vec, + pub scores: Vec, + pub total_matched: u32, + pub total_dirs: u32, +} + +#[pymethods] +impl DirSearchResult { + fn __repr__(&self) -> String { + format!( + "DirSearchResult(items={}, total_matched={}, total_dirs={})", + self.items.len(), + self.total_matched, + self.total_dirs + ) + } + + fn __len__(&self) -> usize { + self.items.len() + } +} + +impl DirSearchResult { + fn from_core(r: &CoreDirSearchResult, picker: &FilePicker) -> Self { + DirSearchResult { + items: r + .items + .iter() + .map(|d| DirItem::from_core(d, picker)) + .collect(), + scores: r.scores.iter().map(Score::from).collect(), + total_matched: r.total_matched as u32, + total_dirs: r.total_dirs as u32, + } + } +} + +#[pyclass(module = "fff_search._native", frozen, get_all)] +#[derive(Clone, Debug)] +pub struct MixedItem { + /// "file" or "directory" + pub kind: String, + pub file: Option, + pub directory: Option, +} + +#[pymethods] +impl MixedItem { + fn __repr__(&self) -> String { + match self.kind.as_str() { + "file" => format!("MixedItem(file={:?})", self.file), + "directory" => format!("MixedItem(directory={:?})", self.directory), + _ => "MixedItem(unknown)".to_string(), + } + } +} + +#[pyclass(module = "fff_search._native", frozen, get_all)] +#[derive(Clone, Debug)] +pub struct MixedSearchResult { + pub items: Vec, + pub scores: Vec, + pub total_matched: u32, + pub total_files: u32, + pub total_dirs: u32, + pub location: Option, +} + +#[pymethods] +impl MixedSearchResult { + fn __repr__(&self) -> String { + format!( + "MixedSearchResult(items={}, total_matched={}, total_files={}, total_dirs={})", + self.items.len(), + self.total_matched, + self.total_files, + self.total_dirs + ) + } + + fn __len__(&self) -> usize { + self.items.len() + } +} + +impl MixedSearchResult { + fn from_core(r: &CoreMixedSearchResult, picker: &FilePicker) -> Self { + let items = r + .items + .iter() + .map(|item| match item { + CoreMixedItemRef::File(f) => MixedItem { + kind: "file".to_string(), + file: Some(FileItem::from_core(f, picker)), + directory: None, + }, + CoreMixedItemRef::Dir(d) => MixedItem { + kind: "directory".to_string(), + file: None, + directory: Some(DirItem::from_core(d, picker)), + }, + }) + .collect(); + MixedSearchResult { + items, + scores: r.scores.iter().map(Score::from).collect(), + total_matched: r.total_matched as u32, + total_files: r.total_files as u32, + total_dirs: r.total_dirs as u32, + location: Location::from_core(r.location.as_ref()), + } + } +} + +#[pyclass(module = "fff_search._native", frozen, get_all)] +#[derive(Clone, Debug)] +pub struct GrepMatch { + pub relative_path: String, + pub file_name: String, + pub git_status: String, + pub line_content: String, + pub match_ranges: Vec<(u32, u32)>, + pub context_before: Vec, + pub context_after: Vec, + pub size: u64, + pub modified: u64, + pub total_frecency_score: i64, + pub access_frecency_score: i64, + pub modification_frecency_score: i64, + pub line_number: u64, + pub byte_offset: u64, + pub col: u32, + pub fuzzy_score: Option, + pub is_binary: bool, + pub is_definition: bool, +} + +#[pymethods] +impl GrepMatch { + fn __repr__(&self) -> String { + format!( + "GrepMatch({}:{}: {:?})", + self.relative_path, + self.line_number, + if self.line_content.len() > 60 { + format!("{}…", &self.line_content[..60]) + } else { + self.line_content.clone() + } + ) + } +} + +impl GrepMatch { + fn from_core(m: &CoreGrepMatch, file: &CoreFileItem, picker: &FilePicker) -> Self { + GrepMatch { + relative_path: file.relative_path(picker).to_string(), + file_name: file.file_name(picker).to_string(), + git_status: format_git_status(file.git_status).to_string(), + line_content: m.line_content.clone(), + match_ranges: m + .match_byte_offsets + .iter() + .map(|r| (r.0, r.1)) + .collect(), + context_before: m.context_before.clone(), + context_after: m.context_after.clone(), + size: file.size, + modified: file.modified, + total_frecency_score: file.total_frecency_score() as i64, + access_frecency_score: file.access_frecency_score as i64, + modification_frecency_score: file.modification_frecency_score as i64, + line_number: m.line_number, + byte_offset: m.byte_offset, + col: m.col as u32, + fuzzy_score: m.fuzzy_score, + is_binary: file.is_binary(), + is_definition: m.is_definition, + } + } +} + +#[pyclass(module = "fff_search._native", frozen, get_all)] +#[derive(Clone, Debug)] +pub struct GrepResult { + pub items: Vec, + pub total_matched: u32, + pub total_files_searched: u32, + pub total_files: u32, + pub filtered_file_count: u32, + /// Pass back as `cursor=` to fetch the next page; `None` when done. + pub next_cursor: Option, + pub regex_fallback_error: Option, +} + +#[pymethods] +impl GrepResult { + fn __repr__(&self) -> String { + format!( + "GrepResult(items={}, total_files_searched={}, next_cursor={:?})", + self.items.len(), + self.total_files_searched, + self.next_cursor + ) + } + + fn __len__(&self) -> usize { + self.items.len() + } +} + +impl GrepResult { + fn from_core(r: &CoreGrepResult<'_>, picker: &FilePicker) -> Self { + let items: Vec = r + .matches + .iter() + .filter_map(|m| { + let file: &CoreFileItem = *r.files.get(m.file_index)?; + Some(GrepMatch::from_core(m, file, picker)) + }) + .collect(); + let total_matched = items.len() as u32; + GrepResult { + items, + total_matched, + total_files_searched: r.total_files_searched as u32, + total_files: r.total_files as u32, + filtered_file_count: r.filtered_file_count as u32, + next_cursor: if r.next_file_offset == 0 { + None + } else { + Some(r.next_file_offset as u32) + }, + regex_fallback_error: r.regex_fallback_error.clone(), + } + } +} + +#[pyclass(module = "fff_search._native", frozen, get_all)] +#[derive(Clone, Debug)] +pub struct ScanProgress { + pub scanned_files_count: u64, + pub is_scanning: bool, + pub is_watcher_ready: bool, + pub is_warmup_complete: bool, +} + +#[pymethods] +impl ScanProgress { + fn __repr__(&self) -> String { + format!( + "ScanProgress(scanned_files_count={}, is_scanning={}, is_watcher_ready={}, is_warmup_complete={})", + self.scanned_files_count, + self.is_scanning, + self.is_watcher_ready, + self.is_warmup_complete + ) + } +} + +#[pyclass(module = "fff_search._native")] +pub struct FileFinder { + picker: SharedFilePicker, + frecency: SharedFrecency, + query_tracker: SharedQueryTracker, + destroyed: bool, +} + +#[pymethods] +impl FileFinder { + /// Create a new file finder rooted at `base_path`. + #[staticmethod] + #[pyo3(signature = ( + base_path, + *, + frecency_db_path = None, + history_db_path = None, + disable_mmap_cache = false, + disable_content_indexing = None, + disable_watch = false, + ai_mode = false, + log_file_path = None, + log_level = None, + cache_budget_max_files = 0, + cache_budget_max_bytes = 0, + cache_budget_max_file_size = 0, + ))] + #[allow(clippy::too_many_arguments)] + fn create( + base_path: String, + frecency_db_path: Option, + history_db_path: Option, + disable_mmap_cache: bool, + disable_content_indexing: Option, + disable_watch: bool, + ai_mode: bool, + log_file_path: Option, + log_level: Option, + cache_budget_max_files: u64, + cache_budget_max_bytes: u64, + cache_budget_max_file_size: u64, + ) -> PyResult { + if base_path.is_empty() { + return Err(err("base_path is required and cannot be empty")); + } + + if let Some(log_path) = log_file_path.as_deref() { + let level = log_level.as_deref(); + fff::log::init_tracing(log_path, level) + .map_err(|e| err(format!("Failed to init tracing: {e}")))?; + } + + let shared_picker = SharedFilePicker::default(); + let shared_frecency = SharedFrecency::default(); + let shared_query_tracker = SharedQueryTracker::default(); + + if let Some(p) = frecency_db_path.as_deref() { + if !p.is_empty() { + if let Some(parent) = PathBuf::from(p).parent() { + let _ = std::fs::create_dir_all(parent); + } + let tracker = FrecencyTracker::open(p) + .map_err(|e| err(format!("Failed to init frecency db: {e}")))?; + shared_frecency + .init(tracker) + .map_err(|e| err(format!("Failed to acquire frecency lock: {e}")))?; + let _ = shared_frecency.spawn_gc(p.to_string()); + } + } + + if let Some(p) = history_db_path.as_deref() { + if !p.is_empty() { + if let Some(parent) = PathBuf::from(p).parent() { + let _ = std::fs::create_dir_all(parent); + } + let tracker = QueryTracker::open(p) + .map_err(|e| err(format!("Failed to init query tracker db: {e}")))?; + shared_query_tracker + .init(tracker) + .map_err(|e| err(format!("Failed to acquire query tracker lock: {e}")))?; + } + } + + let mode = if ai_mode { FFFMode::Ai } else { FFFMode::Neovim }; + + let cache_budget = ContentCacheBudget::from_overrides( + cache_budget_max_files as usize, + cache_budget_max_bytes, + cache_budget_max_file_size, + ); + + let enable_mmap_cache = !disable_mmap_cache; + let enable_content_indexing = !disable_content_indexing.unwrap_or(disable_mmap_cache); + let watch = !disable_watch; + + FilePicker::new_with_shared_state( + shared_picker.clone(), + shared_frecency.clone(), + FilePickerOptions { + base_path, + enable_mmap_cache, + enable_content_indexing, + watch, + mode, + cache_budget, + }, + ) + .map_err(|e| err(format!("Failed to init file picker: {e}")))?; + + Ok(FileFinder { + picker: shared_picker, + frecency: shared_frecency, + query_tracker: shared_query_tracker, + destroyed: false, + }) + } + + /// Tear down the picker, watcher, and databases. Idempotent. + fn destroy(&mut self) { + if self.destroyed { + return; + } + if let Ok(mut guard) = self.picker.write() { + if let Some(mut p) = guard.take() { + p.stop_background_monitor(); + } + } + if let Ok(mut g) = self.frecency.write() { + *g = None; + } + if let Ok(mut g) = self.query_tracker.write() { + *g = None; + } + self.destroyed = true; + } + + #[getter] + fn is_destroyed(&self) -> bool { + self.destroyed + } + + fn __enter__<'py>(slf: Bound<'py, Self>) -> Bound<'py, Self> { + slf + } + + fn __exit__( + &mut self, + _exc_type: Option, + _exc_val: Option, + _exc_tb: Option, + ) -> bool { + self.destroy(); + false + } + + /// Block until the initial scan finishes or `timeout_ms` elapses. + fn wait_for_scan(&self, py: Python<'_>, timeout_ms: u64) -> PyResult { + self.ensure_alive()?; + let dur = Duration::from_millis(timeout_ms); + Ok(py.allow_threads(|| self.picker.wait_for_scan(dur))) + } + + /// Block until the background watcher is ready or `timeout_ms` elapses. + fn wait_for_watcher(&self, py: Python<'_>, timeout_ms: u64) -> PyResult { + self.ensure_alive()?; + let dur = Duration::from_millis(timeout_ms); + Ok(py.allow_threads(|| self.picker.wait_for_watcher(dur))) + } + + fn is_scanning(&self) -> PyResult { + self.ensure_alive()?; + let guard = self + .picker + .read() + .map_err(|e| err(format!("picker lock: {e}")))?; + Ok(guard.as_ref().map(|p| p.is_scan_active()).unwrap_or(false)) + } + + fn get_scan_progress(&self) -> PyResult { + self.ensure_alive()?; + let guard = self + .picker + .read() + .map_err(|e| err(format!("picker lock: {e}")))?; + let p = guard + .as_ref() + .ok_or_else(|| err("picker not initialized"))?; + let prog = p.get_scan_progress(); + Ok(ScanProgress { + scanned_files_count: prog.scanned_files_count as u64, + is_scanning: prog.is_scanning, + is_watcher_ready: prog.is_watcher_ready, + is_warmup_complete: prog.is_warmup_complete, + }) + } + + /// Trigger an async full rescan. + fn scan_files(&self) -> PyResult<()> { + self.ensure_alive()?; + self.picker + .trigger_full_rescan_async(&self.frecency) + .map_err(|e| err(format!("scan_files failed: {e}")))?; + Ok(()) + } + + /// Refresh git status. Returns the number of files updated. + fn refresh_git_status(&self) -> PyResult { + self.ensure_alive()?; + self.picker + .refresh_git_status(&self.frecency) + .map_err(|e| err(format!("refresh_git_status failed: {e}"))) + } + + fn get_base_path(&self) -> PyResult> { + self.ensure_alive()?; + let guard = self + .picker + .read() + .map_err(|e| err(format!("picker lock: {e}")))?; + Ok(guard.as_ref().map(|p| p.base_path().display().to_string())) + } + + /// Fuzzy-search indexed files. + #[pyo3(signature = ( + query, + *, + current_file = None, + max_threads = 0, + page_index = 0, + page_size = 0, + combo_boost_multiplier = 0, + min_combo_count = 0, + ))] + #[allow(clippy::too_many_arguments)] + fn file_search( + &self, + py: Python<'_>, + query: &str, + current_file: Option<&str>, + max_threads: u32, + page_index: u32, + page_size: u32, + combo_boost_multiplier: i32, + min_combo_count: u32, + ) -> PyResult { + self.ensure_alive()?; + let page_size = default_or(page_size, 100) as usize; + let min_combo_count = default_or(min_combo_count, 3); + let combo_boost_multiplier = default_or(combo_boost_multiplier, 100); + + py.allow_threads(|| -> PyResult { + let guard = self + .picker + .read() + .map_err(|e| err(format!("picker lock: {e}")))?; + let picker = guard.as_ref().ok_or_else(|| err("picker not initialized"))?; + let qt_guard = self + .query_tracker + .read() + .map_err(|_| err("query tracker lock"))?; + + let parser = QueryParser::default(); + let parsed = parser.parse(query); + + let results = picker.fuzzy_search( + &parsed, + qt_guard.as_ref(), + FuzzySearchOptions { + max_threads: max_threads as usize, + current_file, + project_path: Some(picker.base_path()), + combo_boost_score_multiplier: combo_boost_multiplier, + min_combo_count, + pagination: PaginationArgs { + offset: page_index as usize, + limit: page_size, + }, + }, + ); + Ok(SearchResult::from_core(&results, picker)) + }) + } + + /// Fuzzy-search indexed directories. + #[pyo3(signature = ( + query, + *, + current_file = None, + max_threads = 0, + page_index = 0, + page_size = 0, + ))] + fn directory_search( + &self, + py: Python<'_>, + query: &str, + current_file: Option<&str>, + max_threads: u32, + page_index: u32, + page_size: u32, + ) -> PyResult { + self.ensure_alive()?; + let page_size = default_or(page_size, 100) as usize; + + py.allow_threads(|| -> PyResult { + let guard = self + .picker + .read() + .map_err(|e| err(format!("picker lock: {e}")))?; + let picker = guard.as_ref().ok_or_else(|| err("picker not initialized"))?; + + let parser = QueryParser::new(fff_query_parser::DirSearchConfig); + let parsed = parser.parse(query); + + let results = picker.fuzzy_search_directories( + &parsed, + FuzzySearchOptions { + max_threads: max_threads as usize, + current_file, + project_path: Some(picker.base_path()), + combo_boost_score_multiplier: 0, + min_combo_count: 0, + pagination: PaginationArgs { + offset: page_index as usize, + limit: page_size, + }, + }, + ); + Ok(DirSearchResult::from_core(&results, picker)) + }) + } + + /// Files + directories interleaved by score. + #[pyo3(signature = ( + query, + *, + current_file = None, + max_threads = 0, + page_index = 0, + page_size = 0, + combo_boost_multiplier = 0, + min_combo_count = 0, + ))] + #[allow(clippy::too_many_arguments)] + fn mixed_search( + &self, + py: Python<'_>, + query: &str, + current_file: Option<&str>, + max_threads: u32, + page_index: u32, + page_size: u32, + combo_boost_multiplier: i32, + min_combo_count: u32, + ) -> PyResult { + self.ensure_alive()?; + let page_size = default_or(page_size, 100) as usize; + let min_combo_count = default_or(min_combo_count, 3); + let combo_boost_multiplier = default_or(combo_boost_multiplier, 100); + + py.allow_threads(|| -> PyResult { + let guard = self + .picker + .read() + .map_err(|e| err(format!("picker lock: {e}")))?; + let picker = guard.as_ref().ok_or_else(|| err("picker not initialized"))?; + let qt_guard = self + .query_tracker + .read() + .map_err(|_| err("query tracker lock"))?; + + let parser = QueryParser::new(fff_query_parser::MixedSearchConfig); + let parsed = parser.parse(query); + + let results = picker.fuzzy_search_mixed( + &parsed, + qt_guard.as_ref(), + FuzzySearchOptions { + max_threads: max_threads as usize, + current_file, + project_path: Some(picker.base_path()), + combo_boost_score_multiplier: combo_boost_multiplier, + min_combo_count, + pagination: PaginationArgs { + offset: page_index as usize, + limit: page_size, + }, + }, + ); + Ok(MixedSearchResult::from_core(&results, picker)) + }) + } + + /// Content grep. `mode` is `'plain' | 'regex' | 'fuzzy'`. + #[pyo3(signature = ( + query, + *, + mode = "plain", + max_file_size = 0, + max_matches_per_file = 0, + smart_case = true, + cursor = None, + page_limit = 0, + time_budget_ms = 0, + before_context = 0, + after_context = 0, + classify_definitions = false, + ))] + #[allow(clippy::too_many_arguments)] + fn grep( + &self, + py: Python<'_>, + query: &str, + mode: &str, + max_file_size: u64, + max_matches_per_file: u32, + smart_case: bool, + cursor: Option, + page_limit: u32, + time_budget_ms: u64, + before_context: u32, + after_context: u32, + classify_definitions: bool, + ) -> PyResult { + self.ensure_alive()?; + let mode = grep_mode_from_str(mode)?; + + py.allow_threads(|| -> PyResult { + let guard = self + .picker + .read() + .map_err(|e| err(format!("picker lock: {e}")))?; + let picker = guard.as_ref().ok_or_else(|| err("picker not initialized"))?; + + let is_ai = picker.mode().is_ai(); + let parsed = if is_ai { + QueryParser::new(fff_query_parser::AiGrepConfig).parse(query) + } else { + fff::grep::parse_grep_query(query) + }; + + let options = GrepSearchOptions { + max_file_size: default_or(max_file_size, 10 * 1024 * 1024), + max_matches_per_file: max_matches_per_file as usize, + smart_case, + file_offset: cursor.unwrap_or(0) as usize, + page_limit: default_or(page_limit, 50) as usize, + mode, + time_budget_ms, + before_context: before_context as usize, + after_context: after_context as usize, + classify_definitions, + trim_whitespace: false, + abort_signal: None, + }; + + let result = picker.grep(&parsed, &options); + Ok(GrepResult::from_core(&result, picker)) + }) + } + + /// Multi-pattern OR grep (Aho-Corasick). `patterns` must be non-empty. + #[pyo3(signature = ( + patterns, + *, + constraints = None, + max_file_size = 0, + max_matches_per_file = 0, + smart_case = true, + cursor = None, + page_limit = 0, + time_budget_ms = 0, + before_context = 0, + after_context = 0, + classify_definitions = false, + ))] + #[allow(clippy::too_many_arguments)] + fn multi_grep( + &self, + py: Python<'_>, + patterns: Vec, + constraints: Option<&str>, + max_file_size: u64, + max_matches_per_file: u32, + smart_case: bool, + cursor: Option, + page_limit: u32, + time_budget_ms: u64, + before_context: u32, + after_context: u32, + classify_definitions: bool, + ) -> PyResult { + self.ensure_alive()?; + if patterns.is_empty() || patterns.iter().all(|p| p.is_empty()) { + return Err(err("patterns must contain at least one non-empty string")); + } + + py.allow_threads(|| -> PyResult { + let guard = self + .picker + .read() + .map_err(|e| err(format!("picker lock: {e}")))?; + let picker = guard.as_ref().ok_or_else(|| err("picker not initialized"))?; + + let is_ai = picker.mode().is_ai(); + + let parsed_constraints = constraints.map(|c| { + if is_ai { + QueryParser::new(fff_query_parser::AiGrepConfig).parse(c) + } else { + fff::grep::parse_grep_query(c) + } + }); + let constraint_refs: &[fff::Constraint<'_>] = match &parsed_constraints { + Some(q) => &q.constraints, + None => &[], + }; + + let pattern_refs: Vec<&str> = patterns.iter().map(|s| s.as_str()).collect(); + + let options = GrepSearchOptions { + max_file_size: default_or(max_file_size, 10 * 1024 * 1024), + max_matches_per_file: max_matches_per_file as usize, + smart_case, + file_offset: cursor.unwrap_or(0) as usize, + page_limit: default_or(page_limit, 50) as usize, + mode: fff::GrepMode::PlainText, + time_budget_ms, + before_context: before_context as usize, + after_context: after_context as usize, + classify_definitions, + trim_whitespace: false, + abort_signal: None, + }; + + let result = picker.multi_grep(&pattern_refs, constraint_refs, &options); + Ok(GrepResult::from_core(&result, picker)) + }) + } + + /// Record a query→file selection. Requires the query tracker DB. + fn track_query(&self, query: &str, selected_file_path: &str) -> PyResult { + self.ensure_alive()?; + let project_path = { + let pg = self + .picker + .read() + .map_err(|e| err(format!("picker lock: {e}")))?; + pg.as_ref().map(|p| p.base_path().to_path_buf()) + }; + let project_path = match project_path { + Some(p) => p, + None => return Ok(false), + }; + let mut qt_guard = self + .query_tracker + .write() + .map_err(|_| err("query tracker lock"))?; + let qt = match qt_guard.as_mut() { + Some(q) => q, + None => return Ok(false), + }; + Ok(qt + .track_query_completion(query, &project_path, std::path::Path::new(selected_file_path)) + .is_ok()) + } + + /// Historical query at `offset` (0 = most recent). + fn get_historical_query(&self, offset: u64) -> PyResult> { + self.ensure_alive()?; + let project_path = { + let pg = self + .picker + .read() + .map_err(|e| err(format!("picker lock: {e}")))?; + pg.as_ref().map(|p| p.base_path().to_path_buf()) + }; + let project_path = match project_path { + Some(p) => p, + None => return Ok(None), + }; + let qt_guard = self + .query_tracker + .read() + .map_err(|_| err("query tracker lock"))?; + let qt = match qt_guard.as_ref() { + Some(q) => q, + None => return Ok(None), + }; + Ok(qt + .get_historical_query(&project_path, offset as usize) + .ok() + .flatten()) + } +} + +impl FileFinder { + fn ensure_alive(&self) -> PyResult<()> { + if self.destroyed { + return Err(err("FileFinder has been destroyed")); + } + Ok(()) + } +} + +#[pymodule] +fn _native(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add("__version__", env!("CARGO_PKG_VERSION"))?; + m.add("FffError", m.py().get_type_bound::())?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + m.add_class::()?; + Ok(()) +} diff --git a/crates/fff-py/tests/conftest.py b/crates/fff-py/tests/conftest.py new file mode 100644 index 00000000..c53ae246 --- /dev/null +++ b/crates/fff-py/tests/conftest.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import shutil +import tempfile +from pathlib import Path +from typing import Iterator + +import pytest + + +@pytest.fixture +def sample_repo() -> Iterator[Path]: + tmp = Path(tempfile.mkdtemp(prefix="fff_py_")) + try: + (tmp / "src").mkdir() + (tmp / "src" / "main.py").write_text( + "def hello():\n return 'world'\n\n" + "class Greeter:\n def greet(self):\n return hello()\n", + encoding="utf-8", + ) + (tmp / "src" / "lib.py").write_text( + "TODO: write the library\n" + "def add(a, b):\n return a + b\n", + encoding="utf-8", + ) + (tmp / "README.md").write_text( + "# Sample Repo\n\nFor fff_search tests.\n", encoding="utf-8" + ) + (tmp / "tests").mkdir() + (tmp / "tests" / "test_basic.py").write_text( + "def test_passes():\n assert True\n", encoding="utf-8" + ) + (tmp / "binary.dat").write_bytes(bytes([0, 1, 2, 3, 0xFF, 0xFE])) + yield tmp + finally: + shutil.rmtree(tmp, ignore_errors=True) diff --git a/crates/fff-py/tests/test_grep.py b/crates/fff-py/tests/test_grep.py new file mode 100644 index 00000000..f4e6f709 --- /dev/null +++ b/crates/fff-py/tests/test_grep.py @@ -0,0 +1,53 @@ +from __future__ import annotations + +from pathlib import Path + +from fff_search import FileFinder + + +def _open(repo: Path) -> FileFinder: + f = FileFinder.create(base_path=str(repo), disable_watch=True) + assert f.wait_for_scan(timeout_ms=10_000) + return f + + +def test_plain_grep_finds_todo(sample_repo: Path): + with _open(sample_repo) as f: + r = f.grep("TODO", mode="plain") + assert any("TODO" in m.line_content for m in r.items) + for m in r.items: + assert m.line_number >= 1 + + +def test_grep_classify_definitions(sample_repo: Path): + with _open(sample_repo) as f: + r = f.grep("def", mode="plain", classify_definitions=True, page_limit=20) + assert any(m.is_definition for m in r.items) + + +def test_regex_grep(sample_repo: Path): + with _open(sample_repo) as f: + r = f.grep(r"def\s+\w+", mode="regex", page_limit=20) + assert len(r.items) >= 1 + + +def test_fuzzy_grep(sample_repo: Path): + with _open(sample_repo) as f: + r = f.grep("hllo", mode="fuzzy", page_limit=20) + assert r.total_files_searched >= 1 + + +def test_multi_grep(sample_repo: Path): + with _open(sample_repo) as f: + r = f.multi_grep(["TODO", "hello"]) + assert len(r.items) >= 1 + contents = {m.line_content for m in r.items} + assert any("TODO" in c or "hello" in c for c in contents) + + +def test_grep_with_context(sample_repo: Path): + with _open(sample_repo) as f: + r = f.grep("TODO", mode="plain", before_context=1, after_context=1) + if r.items: + assert isinstance(r.items[0].context_before, list) + assert isinstance(r.items[0].context_after, list) diff --git a/crates/fff-py/tests/test_lifecycle.py b/crates/fff-py/tests/test_lifecycle.py new file mode 100644 index 00000000..820ebb39 --- /dev/null +++ b/crates/fff-py/tests/test_lifecycle.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import tempfile +from pathlib import Path + +import pytest + +from fff_search import FileFinder, FffError + + +def test_create_requires_base_path(): + with pytest.raises(FffError): + FileFinder.create(base_path="") + + +def test_create_and_destroy(sample_repo: Path): + finder = FileFinder.create(base_path=str(sample_repo), ai_mode=False, disable_watch=True) + try: + assert finder.is_destroyed is False + assert finder.get_base_path() is not None + finally: + finder.destroy() + assert finder.is_destroyed is True + finder.destroy() + + +def test_context_manager(sample_repo: Path): + with FileFinder.create(base_path=str(sample_repo), disable_watch=True) as f: + assert f.is_destroyed is False + assert f.is_destroyed is True + + +def test_use_after_destroy_raises(sample_repo: Path): + f = FileFinder.create(base_path=str(sample_repo), disable_watch=True) + f.destroy() + with pytest.raises(FffError): + f.file_search("anything") + + +def test_wait_for_scan_then_progress(sample_repo: Path): + with FileFinder.create(base_path=str(sample_repo), disable_watch=True) as f: + completed = f.wait_for_scan(timeout_ms=10_000) + assert completed is True + progress = f.get_scan_progress() + assert progress.is_scanning is False + assert progress.scanned_files_count >= 4 + + +def test_track_query_with_history_db(sample_repo: Path): + with tempfile.TemporaryDirectory() as tmp: + history_db = str(Path(tmp) / "history.db") + with FileFinder.create( + base_path=str(sample_repo), + history_db_path=history_db, + disable_watch=True, + ) as f: + assert f.wait_for_scan(timeout_ms=10_000) + target = str(sample_repo / "README.md") + assert f.track_query("readme", target) is True + # Tracking twice is idempotent enough to still succeed. + assert f.track_query("readme", target) is True + + +def test_get_historical_query_without_db_returns_none(sample_repo: Path): + with FileFinder.create(base_path=str(sample_repo), disable_watch=True) as f: + assert f.get_historical_query(0) is None diff --git a/crates/fff-py/tests/test_search.py b/crates/fff-py/tests/test_search.py new file mode 100644 index 00000000..451fdef8 --- /dev/null +++ b/crates/fff-py/tests/test_search.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +from pathlib import Path + +from fff_search import FileFinder + + +def _open(repo: Path, **kw) -> FileFinder: + f = FileFinder.create(base_path=str(repo), disable_watch=True, **kw) + assert f.wait_for_scan(timeout_ms=10_000) + return f + + +def test_file_search_finds_main(sample_repo: Path): + with _open(sample_repo) as f: + r = f.file_search("main", page_size=20) + assert r.total_matched >= 1 + paths = {item.relative_path for item in r.items} + assert any(p.endswith("main.py") for p in paths) + + +def test_file_search_typo_resistant(sample_repo: Path): + with _open(sample_repo) as f: + # "raedme" should still find README.md via fuzzy matching + r = f.file_search("raedme", page_size=10) + assert any(item.file_name.lower().startswith("readme") for item in r.items) + + +def test_file_search_returns_score(sample_repo: Path): + with _open(sample_repo) as f: + r = f.file_search("main.py", page_size=5) + assert len(r.scores) == len(r.items) + if r.items: + assert r.scores[0].total > 0 + + +def test_directory_search(sample_repo: Path): + with _open(sample_repo) as f: + r = f.directory_search("src", page_size=5) + names = {d.dir_name for d in r.items} + assert any("src" in n for n in names) + + +def test_mixed_search_yields_files_and_dirs(sample_repo: Path): + with _open(sample_repo) as f: + r = f.mixed_search("src", page_size=20) + kinds = {it.kind for it in r.items} + assert "file" in kinds or "directory" in kinds + + +def test_search_pagination(sample_repo: Path): + # page_index is a raw item offset (matches Node SDK) + with _open(sample_repo) as f: + first = f.file_search("py", page_index=0, page_size=2) + second = f.file_search("py", page_index=2, page_size=2) + if first.items and second.items: + assert {i.relative_path for i in first.items}.isdisjoint( + {i.relative_path for i in second.items} + ) + + +def test_search_total_matched_consistent(sample_repo: Path): + with _open(sample_repo) as f: + a = f.file_search("py", page_index=0, page_size=2) + b = f.file_search("py", page_index=0, page_size=10) + assert a.total_matched == b.total_matched