diff --git a/.gitignore b/.gitignore index 14bb93c..f11282f 100644 --- a/.gitignore +++ b/.gitignore @@ -33,18 +33,12 @@ CLAUDE.md .DS_Store Thumbs.db -# Bundled native libraries (binary artifacts) -python/paperjam/libpdfium.so - # Test fixtures (generated) tests/fixtures/large_*.pdf # Per-session test artifacts (accuracy reports, etc.) tests/output/ -# Sphinx -_build - # Lock file (library) uv.lock diff --git a/Cargo.toml b/Cargo.toml index 1e4ec4e..689677d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ members = [ ] [workspace.package] -version = "0.1.3" +version = "0.2.0" edition = "2021" rust-version = "1.75" license = "MIT" @@ -46,3 +46,13 @@ roxmltree = "0.20" ureq = { version = "3", default-features = false, features = ["rustls-no-provider"] } rustls = { version = "0.23", default-features = false, features = ["aws_lc_rs", "logging", "std", "tls12"] } tokio = { version = "1", features = ["rt-multi-thread"] } + +[profile.release] +lto = "thin" +codegen-units = 1 +strip = "symbols" + +[profile.release-with-debug] +inherits = "release" +strip = "none" +debug = true diff --git a/README.md b/README.md index 3e39e9f..4c88e41 100644 --- a/README.md +++ b/README.md @@ -79,16 +79,18 @@ steps: ``` ```bash -paperjam pipeline run pipeline.yaml +pj pipeline run pipeline.yaml ``` ### CLI usage +The CLI binary installed by `cargo install paperjam-cli` is named `pj`: + ```bash -paperjam extract text report.pdf -paperjam extract tables data.pdf --format csv -paperjam convert report.pdf report.docx -paperjam info document.pdf +pj info document.pdf +pj extract text report.pdf +pj extract tables data.pdf --strategy lattice --format json +pj convert auto report.pdf -o report.docx ``` ### MCP server diff --git a/docs-site/docs/getting-started/installation.md b/docs-site/docs/getting-started/installation.md index 793be4b..487d737 100644 --- a/docs-site/docs/getting-started/installation.md +++ b/docs-site/docs/getting-started/installation.md @@ -25,21 +25,23 @@ pip install "paperjam[pandas]" ### Documentation -To build these docs locally: +The docs site uses [Docusaurus](https://docusaurus.io/). To build it locally: ```bash -pip install "paperjam[docs]" -cd docs -make html +git clone https://github.com/ByteVeda/paperjam +cd paperjam/docs-site +npm ci +npm run start # dev server with hot reload +npm run build # static site under docs-site/build/ ``` ## Installing from source -Building from source requires a Rust toolchain (stable, 1.77+) and [maturin](https://maturin.rs/): +Building from source requires a Rust toolchain (stable, 1.75+) and [maturin](https://maturin.rs/): ```bash pip install maturin -git clone https://github.com/paperjam/paperjam +git clone https://github.com/ByteVeda/paperjam cd paperjam maturin develop --release ``` @@ -52,7 +54,11 @@ Pre-built wheels on PyPI include all features. | Feature | Methods enabled | |---------|----------------| | `render` | `render_page`, `render_pages`, `page.render`, `visual_diff` | -| `signatures` | `signatures`, `verify_signatures`, `sign` | +| `signatures` | `sign_document`, `verify_signatures`, `extract_signatures` | +| `ltv` | LTV timestamp embedding (TSA, OCSP, CRL) for signing | +| `validation` | `validate_pdf_a`, `validate_pdf_ua`, `convert_to_pdf_a` | +| `parallel` | Rayon-based parallel processing (default) | +| `mmap` | Memory-mapped file access for large documents | When building from source you can control features with the `--features` flag: diff --git a/py_src/paperjam/_paperjam.pyi b/py_src/paperjam/_paperjam.pyi index ae54504..a8b3cfd 100644 --- a/py_src/paperjam/_paperjam.pyi +++ b/py_src/paperjam/_paperjam.pyi @@ -234,6 +234,33 @@ def fill_form( document: RustDocument, values: dict[str, str], need_appearances: bool = True, + generate_appearances: bool = False, +) -> tuple[RustDocument, dict[str, Any]]: ... +def modify_form_field( + document: RustDocument, + field_name: str, + *, + value: str | None = None, + default_value: str | None = None, + read_only: bool | None = None, + required: bool | None = None, + max_length: int | None = None, + options: list[dict[str, str]] | None = None, +) -> tuple[RustDocument, dict[str, Any]]: ... +def add_form_field( + document: RustDocument, + name: str, + field_type: str, + page: int, + rect: tuple[float, float, float, float], + value: str | None = None, + default_value: str | None = None, + read_only: bool = False, + required: bool = False, + max_length: int | None = None, + options: list[dict[str, str]] | None = None, + font_size: float = 0.0, + generate_appearance: bool = True, ) -> tuple[RustDocument, dict[str, Any]]: ... def render_page( document: RustDocument, diff --git a/pyproject.toml b/pyproject.toml index 9553cf2..3383ad7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,8 +4,9 @@ build-backend = "maturin" [project] name = "paperjam" -version = "0.1.3" -description = "Fast PDF processing powered by Rust" +version = "0.2.0" +description = "Fast multi-format document processing (PDF, DOCX, XLSX, PPTX, HTML, EPUB) powered by Rust" +readme = "README.md" license = { text = "MIT" } requires-python = ">=3.12" classifiers = [ @@ -17,22 +18,33 @@ classifiers = [ "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Rust", + "Topic :: Office/Business", "Topic :: Software Development :: Libraries :: Python Modules", "Topic :: Text Processing", "Typing :: Typed", ] -keywords = ["pdf", "text-extraction", "table-extraction", "rust"] +keywords = [ + "pdf", + "docx", + "xlsx", + "pptx", + "html", + "epub", + "text-extraction", + "table-extraction", + "document-conversion", + "rust", +] + +[project.urls] +Homepage = "https://docs.byteveda.org/paperjam/" +Documentation = "https://docs.byteveda.org/paperjam/" +Repository = "https://github.com/ByteVeda/paperjam" +Issues = "https://github.com/ByteVeda/paperjam/issues" +Changelog = "https://github.com/ByteVeda/paperjam/blob/main/CHANGELOG.md" [project.optional-dependencies] pandas = ["pandas>=2.0"] -docs = [ - "sphinx>=7.0", - "furo>=2024.1.29", - "myst-parser>=2.0", - "sphinx-copybutton>=0.5", - "sphinxcontrib-mermaid>=2.0", - "sphinx-autobuild>=2024.0.0", -] dev = [ "pre-commit>=4.0", "pytest>=8.0",