Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ members = [
"encodings/zstd",
"encodings/bytebool",
"encodings/parquet-variant",
# Experimental encodings
"encodings/experimental/onpair",
"encodings/experimental/onpair-sys",
"encodings/experimental/onpair-rs",
# Benchmarks
"benchmarks/lance-bench",
"benchmarks/compress-bench",
Expand Down Expand Up @@ -289,6 +293,9 @@ vortex-ipc = { version = "0.1.0", path = "./vortex-ipc", default-features = fals
vortex-layout = { version = "0.1.0", path = "./vortex-layout", default-features = false }
vortex-mask = { version = "0.1.0", path = "./vortex-mask", default-features = false }
vortex-metrics = { version = "0.1.0", path = "./vortex-metrics", default-features = false }
vortex-onpair = { version = "0.1.0", path = "./encodings/experimental/onpair", default-features = false }
vortex-onpair-rs = { version = "0.1.0", path = "./encodings/experimental/onpair-rs", default-features = false }
vortex-onpair-sys = { version = "0.1.0", path = "./encodings/experimental/onpair-sys", default-features = false }
vortex-pco = { version = "0.1.0", path = "./encodings/pco", default-features = false }
vortex-proto = { version = "0.1.0", path = "./vortex-proto", default-features = false }
vortex-runend = { version = "0.1.0", path = "./encodings/runend", default-features = false }
Expand Down
35 changes: 35 additions & 0 deletions encodings/experimental/onpair-rs/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
[package]
name = "vortex-onpair-rs"
description = "Pure-Rust port of the OnPair short-string compression library"
authors = { workspace = true }
categories = { workspace = true }
edition = { workspace = true }
homepage = { workspace = true }
include = { workspace = true }
keywords = { workspace = true }
license = { workspace = true }
readme = "README.md"
repository = { workspace = true }
rust-version = { workspace = true }
version = { workspace = true }

[lints]
workspace = true

[dependencies]
aho-corasick = { workspace = true }
hashbrown = { workspace = true }
memchr = { workspace = true }
rand = { workspace = true }

[dev-dependencies]
arrow-array = { workspace = true }
arrow-schema = { workspace = true }
divan = { workspace = true }
parquet = { workspace = true }
rstest = { workspace = true }
vortex-onpair-sys = { workspace = true }

[[bench]]
name = "clickbench"
harness = false
11 changes: 11 additions & 0 deletions encodings/experimental/onpair-rs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# onpair-lib

Pure-Rust port of the training + encoding parts of
[`onpair_cpp`](https://github.com/gargiulofrancesco/onpair_cpp).

Scope is limited to what `vortex-onpair` actually consumes from
`vortex-onpair-sys`: `Column::compress` (BPE-style dictionary training plus
LSB-first bit-packed token encoding) and raw access to the resulting parts
(dictionary bytes/offsets, packed token stream, per-row boundaries). Decode,
LIKE, and EQ predicates are already pure Rust in `vortex-onpair` and reuse the
same `parts()` layout.
Loading
Loading