Skip to content

Commit 37d6657

Browse files
authored
Merge pull request #68 from ByteVeda/chore/audit-cleanup
chore: audit-driven cleanup (stubs, metadata, docs, release profile)
2 parents 069b717 + 8dbb8e6 commit 37d6657

6 files changed

Lines changed: 81 additions & 30 deletions

File tree

.gitignore

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,12 @@ CLAUDE.md
3333
.DS_Store
3434
Thumbs.db
3535

36-
# Bundled native libraries (binary artifacts)
37-
python/paperjam/libpdfium.so
38-
3936
# Test fixtures (generated)
4037
tests/fixtures/large_*.pdf
4138

4239
# Per-session test artifacts (accuracy reports, etc.)
4340
tests/output/
4441

45-
# Sphinx
46-
_build
47-
4842
# Lock file (library)
4943
uv.lock
5044

Cargo.toml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ members = [
1919
]
2020

2121
[workspace.package]
22-
version = "0.1.3"
22+
version = "0.2.0"
2323
edition = "2021"
2424
rust-version = "1.75"
2525
license = "MIT"
@@ -46,3 +46,13 @@ roxmltree = "0.20"
4646
ureq = { version = "3", default-features = false, features = ["rustls-no-provider"] }
4747
rustls = { version = "0.23", default-features = false, features = ["aws_lc_rs", "logging", "std", "tls12"] }
4848
tokio = { version = "1", features = ["rt-multi-thread"] }
49+
50+
[profile.release]
51+
lto = "thin"
52+
codegen-units = 1
53+
strip = "symbols"
54+
55+
[profile.release-with-debug]
56+
inherits = "release"
57+
strip = "none"
58+
debug = true

README.md

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -79,16 +79,18 @@ steps:
7979
```
8080
8181
```bash
82-
paperjam pipeline run pipeline.yaml
82+
pj pipeline run pipeline.yaml
8383
```
8484

8585
### CLI usage
8686

87+
The CLI binary installed by `cargo install paperjam-cli` is named `pj`:
88+
8789
```bash
88-
paperjam extract text report.pdf
89-
paperjam extract tables data.pdf --format csv
90-
paperjam convert report.pdf report.docx
91-
paperjam info document.pdf
90+
pj info document.pdf
91+
pj extract text report.pdf
92+
pj extract tables data.pdf --strategy lattice --format json
93+
pj convert auto report.pdf -o report.docx
9294
```
9395

9496
### MCP server

docs-site/docs/getting-started/installation.md

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,23 @@ pip install "paperjam[pandas]"
2525

2626
### Documentation
2727

28-
To build these docs locally:
28+
The docs site uses [Docusaurus](https://docusaurus.io/). To build it locally:
2929

3030
```bash
31-
pip install "paperjam[docs]"
32-
cd docs
33-
make html
31+
git clone https://github.com/ByteVeda/paperjam
32+
cd paperjam/docs-site
33+
npm ci
34+
npm run start # dev server with hot reload
35+
npm run build # static site under docs-site/build/
3436
```
3537

3638
## Installing from source
3739

38-
Building from source requires a Rust toolchain (stable, 1.77+) and [maturin](https://maturin.rs/):
40+
Building from source requires a Rust toolchain (stable, 1.75+) and [maturin](https://maturin.rs/):
3941

4042
```bash
4143
pip install maturin
42-
git clone https://github.com/paperjam/paperjam
44+
git clone https://github.com/ByteVeda/paperjam
4345
cd paperjam
4446
maturin develop --release
4547
```
@@ -52,7 +54,11 @@ Pre-built wheels on PyPI include all features.
5254
| Feature | Methods enabled |
5355
|---------|----------------|
5456
| `render` | `render_page`, `render_pages`, `page.render`, `visual_diff` |
55-
| `signatures` | `signatures`, `verify_signatures`, `sign` |
57+
| `signatures` | `sign_document`, `verify_signatures`, `extract_signatures` |
58+
| `ltv` | LTV timestamp embedding (TSA, OCSP, CRL) for signing |
59+
| `validation` | `validate_pdf_a`, `validate_pdf_ua`, `convert_to_pdf_a` |
60+
| `parallel` | Rayon-based parallel processing (default) |
61+
| `mmap` | Memory-mapped file access for large documents |
5662

5763
When building from source you can control features with the `--features` flag:
5864

py_src/paperjam/_paperjam.pyi

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,33 @@ def fill_form(
234234
document: RustDocument,
235235
values: dict[str, str],
236236
need_appearances: bool = True,
237+
generate_appearances: bool = False,
238+
) -> tuple[RustDocument, dict[str, Any]]: ...
239+
def modify_form_field(
240+
document: RustDocument,
241+
field_name: str,
242+
*,
243+
value: str | None = None,
244+
default_value: str | None = None,
245+
read_only: bool | None = None,
246+
required: bool | None = None,
247+
max_length: int | None = None,
248+
options: list[dict[str, str]] | None = None,
249+
) -> tuple[RustDocument, dict[str, Any]]: ...
250+
def add_form_field(
251+
document: RustDocument,
252+
name: str,
253+
field_type: str,
254+
page: int,
255+
rect: tuple[float, float, float, float],
256+
value: str | None = None,
257+
default_value: str | None = None,
258+
read_only: bool = False,
259+
required: bool = False,
260+
max_length: int | None = None,
261+
options: list[dict[str, str]] | None = None,
262+
font_size: float = 0.0,
263+
generate_appearance: bool = True,
237264
) -> tuple[RustDocument, dict[str, Any]]: ...
238265
def render_page(
239266
document: RustDocument,

pyproject.toml

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ build-backend = "maturin"
44

55
[project]
66
name = "paperjam"
7-
version = "0.1.3"
8-
description = "Fast PDF processing powered by Rust"
7+
version = "0.2.0"
8+
description = "Fast multi-format document processing (PDF, DOCX, XLSX, PPTX, HTML, EPUB) powered by Rust"
9+
readme = "README.md"
910
license = { text = "MIT" }
1011
requires-python = ">=3.12"
1112
classifiers = [
@@ -17,22 +18,33 @@ classifiers = [
1718
"Programming Language :: Python :: 3.13",
1819
"Programming Language :: Python :: Implementation :: CPython",
1920
"Programming Language :: Rust",
21+
"Topic :: Office/Business",
2022
"Topic :: Software Development :: Libraries :: Python Modules",
2123
"Topic :: Text Processing",
2224
"Typing :: Typed",
2325
]
24-
keywords = ["pdf", "text-extraction", "table-extraction", "rust"]
26+
keywords = [
27+
"pdf",
28+
"docx",
29+
"xlsx",
30+
"pptx",
31+
"html",
32+
"epub",
33+
"text-extraction",
34+
"table-extraction",
35+
"document-conversion",
36+
"rust",
37+
]
38+
39+
[project.urls]
40+
Homepage = "https://docs.byteveda.org/paperjam/"
41+
Documentation = "https://docs.byteveda.org/paperjam/"
42+
Repository = "https://github.com/ByteVeda/paperjam"
43+
Issues = "https://github.com/ByteVeda/paperjam/issues"
44+
Changelog = "https://github.com/ByteVeda/paperjam/blob/main/CHANGELOG.md"
2545

2646
[project.optional-dependencies]
2747
pandas = ["pandas>=2.0"]
28-
docs = [
29-
"sphinx>=7.0",
30-
"furo>=2024.1.29",
31-
"myst-parser>=2.0",
32-
"sphinx-copybutton>=0.5",
33-
"sphinxcontrib-mermaid>=2.0",
34-
"sphinx-autobuild>=2024.0.0",
35-
]
3648
dev = [
3749
"pre-commit>=4.0",
3850
"pytest>=8.0",

0 commit comments

Comments
 (0)