diff --git a/.gitignore b/.gitignore
index ee5f4bd..6f75543 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,4 +7,15 @@ labs
 .venv
 *.7z
 *.env
-*.egg*/
\ No newline at end of file
+*.egg*/
+
+# Rust / Maturin build artifacts
+target/
+*.pyd
+*.pdb
+*.so
+
+# IDE / tool state
+.idea/
+.serena/
+claudedocs/
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..b9e5dc0
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,494 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
+
+[[package]]
+name = "autocfg"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53"
+
+[[package]]
+name = "base64"
+version = "0.21.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567"
+
+[[package]]
+name = "bit-set"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
+dependencies = [
+ "bit-vec",
+]
+
+[[package]]
+name = "bit-vec"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
+
+[[package]]
+name = "bitflags"
+version = "2.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8"
+
+[[package]]
+name = "bstr"
+version = "1.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63044e1ae8e69f3b5a92c736ca6269b8d12fa7efe39bf34ddb06d102cf0e2cab"
+dependencies = [
+ "memchr",
+ "regex-automata",
+ "serde",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "chardetng"
+version = "0.1.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14b8f0b65b7b08ae3c8187e8d77174de20cb6777864c6b832d8ad365999cf1ea"
+dependencies = [
+ "cfg-if",
+ "encoding_rs",
+ "memchr",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "either"
+version = "1.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
+
+[[package]]
+name = "encoding_rs"
+version = "0.8.35"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "fancy-regex"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7493d4c459da9f84325ad297371a6b2b8a162800873a22e3b6b6512e61d18c05"
+dependencies = [
+ "bit-set",
+ "regex",
+]
+
+[[package]]
+name = "heck"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
+
+[[package]]
+name = "indoc"
+version = "2.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
+dependencies = [
+ "rustversion",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "libc"
+version = "0.2.186"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
+[[package]]
+name = "memchr"
+version = "2.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4"
+
+[[package]]
+name = "memoffset"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
+
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "portable-atomic"
+version = "1.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "pyo3"
+version = "0.21.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8"
+dependencies = [
+ "cfg-if",
+ "indoc",
+ "libc",
+ "memoffset",
+ "parking_lot",
+ "portable-atomic",
+ "pyo3-build-config",
+ "pyo3-ffi",
+ "pyo3-macros",
+ "unindent",
+]
+
+[[package]]
+name = "pyo3-build-config"
+version = "0.21.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50"
+dependencies = [
+ "once_cell",
+ "target-lexicon",
+]
+
+[[package]]
+name = "pyo3-ffi"
+version = "0.21.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403"
+dependencies = [
+ "libc",
+ "pyo3-build-config",
+]
+
+[[package]]
+name = "pyo3-macros"
+version = "0.21.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c"
+dependencies = [
+ "proc-macro2",
+ "pyo3-macros-backend",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "pyo3-macros-backend"
+version = "0.21.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "pyo3-build-config",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "rayon"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
+]
+
+[[package]]
+name = "regex"
+version = "1.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4"
+
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "smallvec"
+version = "1.15.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90"
+
+[[package]]
+name = "syn"
+version = "2.0.117"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "target-lexicon"
+version = "0.12.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1"
+
+[[package]]
+name = "textspitter-core"
+version = "2.0.0"
+dependencies = [
+ "chardetng",
+ "memchr",
+ "pyo3",
+ "rayon",
+ "regex",
+ "tiktoken-rs",
+ "unicode-normalization",
+]
+
+[[package]]
+name = "tiktoken-rs"
+version = "0.5.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c314e7ce51440f9e8f5a497394682a57b7c323d0f4d0a6b1b13c429056e0e234"
+dependencies = [
+ "anyhow",
+ "base64",
+ "bstr",
+ "fancy-regex",
+ "lazy_static",
+ "parking_lot",
+ "rustc-hash",
+]
+
+[[package]]
+name = "tinyvec"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-normalization"
+version = "0.1.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8"
+dependencies = [
+ "tinyvec",
+]
+
+[[package]]
+name = "unindent"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..bc840c2
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,29 @@
+[package]
+name = "textspitter-core"
+version = "2.0.0"
+edition = "2021"
+
+[lib]
+name = "_core"
+crate-type = ["cdylib"]
+
+[dependencies]
+pyo3 = { version = "0.21", features = ["extension-module", "abi3-py310"] }
+chardetng = "0.1"
+rayon = "1"
+regex = "1"
+unicode-normalization = "0.1"
+tiktoken-rs = "0.5"
+
+[features]
+default = []
+simd = ["memchr/std"]
+
+[dependencies.memchr]
+version = "2"
+optional = true
+
+[profile.release]
+lto = true
+codegen-units = 1
+opt-level = 3
diff --git a/TextSpitter/__init__.py b/TextSpitter/__init__.py
index 8f03c68..4a64a45 100644
--- a/TextSpitter/__init__.py
+++ b/TextSpitter/__init__.py
@@ -9,9 +9,40 @@
 except PackageNotFoundError:
     __version__ = "unknown"
 
+try:
+    from TextSpitter._core import (  # type: ignore[import]
+        Chunk,
+        TextChunker,
+        TextNormalizer,
+        TokenCounter,
+        detect_encoding,
+    )
+
+    _RUST_AVAILABLE = True
+except ImportError:
+    from TextSpitter._fallback import (
+        Chunk,
+        TextChunker,
+        TextNormalizer,
+        TokenCounter,
+        detect_encoding,
+    )
+
+    _RUST_AVAILABLE = False
+
 from .main import WordLoader
 
-__all__ = ["TextSpitter", "WordLoader", "__version__"]
+__all__ = [
+    "TextSpitter",
+    "WordLoader",
+    "TextNormalizer",
+    "TextChunker",
+    "TokenCounter",
+    "Chunk",
+    "detect_encoding",
+    "_RUST_AVAILABLE",
+    "__version__",
+]
 
 
 def TextSpitter(
diff --git a/TextSpitter/_fallback.py b/TextSpitter/_fallback.py
new file mode 100644
index 0000000..0e1ee38
--- /dev/null
+++ b/TextSpitter/_fallback.py
@@ -0,0 +1,322 @@
+"""
+Pure-Python fallback implementations for when the Rust extension is unavailable.
+
+These match the interface of TextSpitter._core exactly, so callers can use
+either path without branching.
+"""
+
+from __future__ import annotations
+
+import unicodedata
+from typing import Literal
+
+
+def detect_encoding(data: bytes) -> str:
+    """Detect encoding by trying common codecs in priority order."""
+    if not data:
+        return "utf-8"
+    for enc in ("utf-8", "utf-8-sig", "cp1252", "latin-1"):
+        try:
+            data.decode(enc)
+            return enc
+        except (UnicodeDecodeError, LookupError):
+            continue
+    return "utf-8"
+
+
+class TextNormalizer:
+    _NormForm = Literal["NFC", "NFD", "NFKC", "NFKD"]
+
+    def __init__(
+        self,
+        unicode_form: _NormForm = "NFC",
+        collapse_whitespace: bool = True,
+        repair_ocr: bool = False,
+        strip_headers_footers: bool = False,
+    ) -> None:
+        self.unicode_form: TextNormalizer._NormForm = unicode_form
+        self.collapse_whitespace = collapse_whitespace
+        self.repair_ocr = repair_ocr
+        self.strip_headers_footers = strip_headers_footers
+
+    def normalize(self, text: str) -> str:
+        s = unicodedata.normalize(self.unicode_form, text)
+        if self.strip_headers_footers:
+            s = self._strip_headers(s)
+        if self.repair_ocr:
+            s = self._repair_ocr(s)
+        if self.collapse_whitespace:
+            import re
+
+            s = re.sub(r"[^\S\n]+", " ", s)
+            s = re.sub(r"\n{3,}", "\n\n", s)
+            s = s.strip()
+        return s
+
+    def normalize_batch(self, texts: list[str]) -> list[str]:
+        return [self.normalize(t) for t in texts]
+
+    def _strip_headers(self, text: str) -> str:
+        pages = text.split("\x0c")
+        if len(pages) < 2:
+            return text
+        all_lines = [p.splitlines() for p in pages]
+        candidates = {
+            ln.strip()
+            for ln in all_lines[0]
+            if ln.strip()
+            and sum(
+                1
+                for pl in all_lines
+                if ln.strip() in [row.strip() for row in pl]
+            )
+            * 2
+            > len(pages)
+        }
+        return "\x0c".join(
+            "\n".join(
+                row
+                for row in page.splitlines()
+                if row.strip() not in candidates
+            )
+            for page in pages
+        )
+
+    def _repair_ocr(self, text: str) -> str:
+        import re
+
+        text = re.sub(r"([a-z])rn([a-z])", r"\1m\2", text)
+        text = re.sub(r"(\d)l(\d)", r"\g<1>1\2", text)
+        return text
+
+
+class Chunk:
+    def __init__(
+        self,
+        text: str,
+        token_count: int,
+        char_start: int,
+        char_end: int,
+        section_title: str | None,
+        chunk_index: int,
+        total_chunks: int | None,
+        metadata: dict,
+    ) -> None:
+        self.text = text
+        self.token_count = token_count
+        self.char_start = char_start
+        self.char_end = char_end
+        self.section_title = section_title
+        self.chunk_index = chunk_index
+        self.total_chunks = total_chunks
+        self.metadata = metadata
+
+    def __repr__(self) -> str:
+        return (
+            f"Chunk(index={self.chunk_index}/{self.total_chunks}, "
+            f"tokens={self.token_count}, "
+            f"chars={self.char_start}..{self.char_end})"
+        )
+
+
+class TextChunker:
+    def __init__(
+        self,
+        max_tokens: int = 2000,
+        min_tokens: int = 100,
+        tokenizer: str = "cl100k_base",
+        preserve_tables: bool = True,
+        section_patterns: list[str] | None = None,
+    ) -> None:
+        if min_tokens > max_tokens:
+            raise ValueError(
+                f"min_tokens ({min_tokens}) must be "
+                f"<= max_tokens ({max_tokens})"
+            )
+        try:
+            import tiktoken
+
+            tiktoken.get_encoding(tokenizer)
+        except ImportError:
+            pass
+        self.max_tokens = max_tokens
+        self.min_tokens = min_tokens
+        self.tokenizer = tokenizer
+        self.preserve_tables = preserve_tables
+        self.section_patterns = section_patterns or []
+
+    def _count(self, text: str) -> int:
+        try:
+            import tiktoken
+
+            enc = tiktoken.get_encoding(self.tokenizer)
+            # Mirror Rust encode_with_special_tokens: allow all special tokens.
+            return len(enc.encode(text, allowed_special="all"))
+        except Exception:
+            return len(text) // 4
+
+    def chunk(self, text: str) -> list[Chunk]:
+        import re
+
+        # Split with a capturing group so we can measure the actual separator
+        # length (2+ newlines). Without this, char_cursor drifts when gaps use
+        # 3+ newlines because the old code always added a fixed +2.
+        pieces = re.split(r"(\n\n+)", text)
+        chunks: list[Chunk] = []
+        current_parts: list[str] = []
+        current_tokens = 0
+        char_cursor = 0
+        current_start = 0
+        section_title: str | None = None
+
+        for idx, piece in enumerate(pieces):
+            if idx % 2 == 1:
+                # Odd indices are separator strings ("\n\n", "\n\n\n", …)
+                char_cursor += len(piece)
+                continue
+
+            para = piece.strip()
+            if not para:
+                char_cursor += len(piece)
+                continue
+
+            para_tokens = self._count(para)
+
+            # Single paragraph exceeds max_tokens — emit as oversized.
+            if para_tokens > self.max_tokens:
+                if current_parts:
+                    chunk_text = "\n\n".join(current_parts)
+                    chunks.append(
+                        Chunk(
+                            text=chunk_text,
+                            token_count=current_tokens,
+                            char_start=current_start,
+                            char_end=char_cursor,
+                            section_title=section_title,
+                            chunk_index=0,
+                            total_chunks=None,
+                            metadata={},
+                        )
+                    )
+                    current_parts = []
+                    current_tokens = 0
+                    current_start = char_cursor
+                end = char_cursor + len(piece)
+                chunks.append(
+                    Chunk(
+                        text=para,
+                        token_count=para_tokens,
+                        char_start=char_cursor,
+                        char_end=end,
+                        section_title=section_title,
+                        chunk_index=0,
+                        total_chunks=None,
+                        metadata={"oversized": True},
+                    )
+                )
+                char_cursor = end
+                current_start = char_cursor
+                continue
+
+            if current_tokens + para_tokens > self.max_tokens and current_parts:
+                chunk_text = "\n\n".join(current_parts)
+                chunks.append(
+                    Chunk(
+                        text=chunk_text,
+                        token_count=current_tokens,
+                        char_start=current_start,
+                        char_end=char_cursor,
+                        section_title=section_title,
+                        chunk_index=0,
+                        total_chunks=None,
+                        metadata={},
+                    )
+                )
+                current_parts = []
+                current_tokens = 0
+                current_start = char_cursor
+
+            current_parts.append(para)
+            current_tokens += para_tokens
+            char_cursor += len(piece)
+
+        if current_parts:
+            chunk_text = "\n\n".join(current_parts)
+            chunks.append(
+                Chunk(
+                    text=chunk_text,
+                    token_count=current_tokens,
+                    char_start=current_start,
+                    char_end=char_cursor,
+                    section_title=section_title,
+                    chunk_index=0,
+                    total_chunks=None,
+                    metadata={},
+                )
+            )
+
+        total = len(chunks)
+        for i, c in enumerate(chunks):
+            c.chunk_index = i
+            c.total_chunks = total
+        return chunks
+
+    def chunk_batch(self, texts: list[str]) -> list[list[Chunk]]:
+        return [self.chunk(t) for t in texts]
+
+
+class TokenCounter:
+    def __init__(self, model: str = "cl100k_base") -> None:
+        try:
+            import tiktoken
+
+            tiktoken.get_encoding(model)
+        except ImportError:
+            pass
+        self.model = model
+
+    def _bpe(self):
+        try:
+            import tiktoken
+
+            return tiktoken.get_encoding(self.model)
+        except Exception:
+            return None
+
+    def count(self, text: str) -> int:
+        bpe = self._bpe()
+        # allowed_special="all" mirrors Rust encode_with_special_tokens and
+        # prevents ValueError when text contains tokens like <|endoftext|>.
+        return (
+            len(bpe.encode(text, allowed_special="all"))
+            if bpe
+            else len(text) // 4
+        )
+
+    def count_batch(self, texts: list[str]) -> list[int]:
+        return [self.count(t) for t in texts]
+
+    def truncate(
+        self, text: str, max_tokens: int, strategy: str = "end"
+    ) -> str:
+        bpe = self._bpe()
+        if bpe is None:
+            # Word-based approximation: ~1 token per word
+            words = text.split()
+            if len(words) <= max_tokens:
+                return text
+            if strategy == "middle":
+                half = max_tokens // 2
+                kept = words[:half] + words[-(max_tokens - half) :]
+            else:
+                kept = words[:max_tokens]
+            return " ".join(kept)
+        tokens = bpe.encode(text, allowed_special="all")
+        if len(tokens) <= max_tokens:
+            return text
+        if strategy == "middle":
+            half = max_tokens // 2
+            kept = tokens[:half] + tokens[-(max_tokens - half) :]
+        else:
+            kept = tokens[:max_tokens]
+        return bpe.decode(kept)
diff --git a/TextSpitter/core.py b/TextSpitter/core.py
index 125b5c3..4e91c32 100644
--- a/TextSpitter/core.py
+++ b/TextSpitter/core.py
@@ -10,6 +10,8 @@
 
 from docx import Document
 
+from TextSpitter import detect_encoding
+
 # --- Module-level imports for optional PDF libraries ---
 try:
     import pymupdf
@@ -301,32 +303,25 @@ def get_contents(self) -> bytes:
     def code_file_read(self) -> str:
         """
         Reads contents from programming language files (.py, .js, .java, etc.)
-        with enhanced encoding detection and preserves original formatting.
+        with encoding detection and preserves original formatting.
 
         Returns:
             str: The file content as a string
         """
         contents_bytes = self.get_contents()
-
-        # Common encodings for source code files
-        encodings_to_try = ["utf-8", "utf-8-sig", "latin-1", "cp1252"]
-
-        for encoding in encodings_to_try:
-            try:
-                content = contents_bytes.decode(encoding)
-                logger.info(
-                    f"Successfully decoded {self.file_name} using {encoding}"
-                )
-                return content
-            except UnicodeDecodeError:
-                continue
-
-        # If all encodings fail, use utf-8 with replacement
-        logger.warning(
-            f"Could not decode code file {self.file_name} with standard "
-            f"encodings, using utf-8 with replacement characters."
-        )
-        return contents_bytes.decode("utf-8", errors="replace")
+        encoding = detect_encoding(contents_bytes)
+        try:
+            content = contents_bytes.decode(encoding)
+            logger.info(
+                f"Successfully decoded {self.file_name} using {encoding}"
+            )
+            return content
+        except (UnicodeDecodeError, LookupError):
+            logger.warning(
+                f"Could not decode {self.file_name} with detected encoding "
+                f"'{encoding}', falling back to utf-8 with replacement."
+            )
+            return contents_bytes.decode("utf-8", errors="replace")
 
     def pdf_file_read(self) -> str:  # Added return type hint
         """
@@ -405,8 +400,12 @@ def docx_file_read(self) -> str:  # Added return type hint
 
     def _decode_bytes(self, data: bytes, label: str) -> str:
         """
-        Decode bytes to str, trying UTF-8 then latin-1 then UTF-8 with
-        replacement characters.
+        Decode bytes to str, trying UTF-8, cp1252, then latin-1, then UTF-8
+        with replacement characters.
+
+        cp1252 is tried before latin-1 so Windows smart-quote bytes (0x80-0x9F)
+        decode to printable characters instead of C1 control characters.
+        latin-1 always succeeds and acts as the final deterministic fallback.
 
         Args:
             data: Raw bytes to decode.
@@ -415,16 +414,13 @@ def _decode_bytes(self, data: bytes, label: str) -> str:
         Returns:
             str
         """
-        try:
-            return data.decode("utf-8")
-        except UnicodeDecodeError:
-            pass
-        try:
-            return data.decode("latin-1")
-        except UnicodeDecodeError:
-            pass
+        for enc in ("utf-8", "cp1252", "latin-1"):
+            try:
+                return data.decode(enc)
+            except (UnicodeDecodeError, LookupError):
+                continue
         logger.warning(
-            f"Could not decode {label} with utf-8 or latin-1, "
+            f"Could not decode {label} with utf-8, cp1252, or latin-1, "
             f"using utf-8 with replacement characters."
         )
         return data.decode("utf-8", errors="replace")
diff --git a/pyproject.toml b/pyproject.toml
index 1e6d22c..e4a8576 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [build-system]
-requires = ["setuptools>=61.0"]
-build-backend = "setuptools.build_meta"
+requires = ["maturin>=1.5,<2.0"]
+build-backend = "maturin"
 
 [tool.ruff]
 fix = true
@@ -92,8 +92,10 @@ textspitter = "TextSpitter.cli:main"
 Homepage = "https://github.com/fsecada01/TextSpitter"
 Issues = "https://github.com/fsecada01/TextSpitter/issues"
 
-[tool.setuptools]
-packages = ["TextSpitter", "TextSpitter.guide"]
+[tool.maturin]
+features = ["pyo3/extension-module"]
+module-name = "TextSpitter._core"
+python-source = "."
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
@@ -119,4 +121,6 @@ dev = [
     "ruff",
     "ty",
     "twine",
+    "maturin>=1.14.0",
+    "tiktoken",
 ]
diff --git a/src/chunk.rs b/src/chunk.rs
new file mode 100644
index 0000000..50fe02c
--- /dev/null
+++ b/src/chunk.rs
@@ -0,0 +1,355 @@
+use pyo3::prelude::*;
+use rayon::prelude::*;
+use std::collections::HashMap;
+use tiktoken_rs::{get_bpe_from_model, CoreBPE};
+
+fn load_bpe(name: &str) -> Result<CoreBPE, String> {
+    let result = match name {
+        "cl100k_base" => tiktoken_rs::cl100k_base(),
+        "o200k_base"  => tiktoken_rs::o200k_base(),
+        "r50k_base"   => tiktoken_rs::r50k_base(),
+        "p50k_base"   => tiktoken_rs::p50k_base(),
+        "p50k_edit"   => tiktoken_rs::p50k_edit(),
+        other         => get_bpe_from_model(other),
+    };
+    result.map_err(|e| e.to_string())
+}
+
+/// A single chunk produced by ``TextChunker``.
+#[pyclass(get_all)]
+#[derive(Clone, Debug)]
+pub struct Chunk {
+    /// The chunk text.
+    pub text: String,
+    /// BPE token count for this chunk.
+    pub token_count: usize,
+    /// Unicode code-point start offset in the original input string.
+    pub char_start: usize,
+    /// Unicode code-point end offset (exclusive) in the original input string.
+    pub char_end: usize,
+    /// Enclosing section header, if detected.
+    pub section_title: Option<String>,
+    /// Zero-based position in the chunk sequence.
+    pub chunk_index: usize,
+    /// Total chunks in the sequence (None when produced by chunk_iter).
+    pub total_chunks: Option<usize>,
+    /// Extra metadata (e.g. {"oversized": true}).
+    pub metadata: HashMap<String, bool>,
+}
+
+#[pymethods]
+impl Chunk {
+    fn __repr__(&self) -> String {
+        format!(
+            "Chunk(index={}/{:?}, tokens={}, chars={}..{})",
+            self.chunk_index,
+            self.total_chunks,
+            self.token_count,
+            self.char_start,
+            self.char_end,
+        )
+    }
+}
+
+#[pyclass]
+pub struct TextChunker {
+    max_tokens: usize,
+    min_tokens: usize,
+    tokenizer: String,
+    preserve_tables: bool,
+    section_patterns: Vec<String>,
+}
+
+#[pymethods]
+impl TextChunker {
+    #[new]
+    #[pyo3(signature = (
+        max_tokens = 2000,
+        min_tokens = 100,
+        tokenizer = "cl100k_base".to_string(),
+        preserve_tables = true,
+        section_patterns = vec![],
+    ))]
+    pub fn new(
+        max_tokens: usize,
+        min_tokens: usize,
+        tokenizer: String,
+        preserve_tables: bool,
+        section_patterns: Vec<String>,
+    ) -> PyResult<Self> {
+        if min_tokens > max_tokens {
+            return Err(pyo3::exceptions::PyValueError::new_err(format!(
+                "min_tokens ({min_tokens}) must be <= max_tokens ({max_tokens})"
+            )));
+        }
+        // Validate tokenizer name at construction time.
+        load_bpe(&tokenizer)
+            .map_err(|e| pyo3::exceptions::PyValueError::new_err(e))?;
+        Ok(Self { max_tokens, min_tokens, tokenizer, preserve_tables, section_patterns })
+    }
+
+    /// Chunk text into a list of ``Chunk`` objects.
+    pub fn chunk(&self, text: &str) -> PyResult<Vec<Chunk>> {
+        let chunks = self.split(text)?;
+        let total = chunks.len();
+        Ok(chunks.into_iter().enumerate().map(|(i, mut c)| {
+            c.chunk_index = i;
+            c.total_chunks = Some(total);
+            c
+        }).collect())
+    }
+
+    /// Chunk a batch of texts in parallel (GIL released).
+    pub fn chunk_batch(
+        &self,
+        py: Python<'_>,
+        texts: Vec<String>,
+    ) -> PyResult<Vec<Vec<Chunk>>> {
+        // Capture config for use inside the thread closure.
+        let max_tokens = self.max_tokens;
+        let min_tokens = self.min_tokens;
+        let tokenizer = self.tokenizer.clone();
+        let preserve_tables = self.preserve_tables;
+        let section_patterns = self.section_patterns.clone();
+
+        py.allow_threads(|| {
+            texts.par_iter()
+                .map(|text| {
+                    let chunker = TextChunker {
+                        max_tokens,
+                        min_tokens,
+                        tokenizer: tokenizer.clone(),
+                        preserve_tables,
+                        section_patterns: section_patterns.clone(),
+                    };
+                    let chunks = chunker.split(text)?;
+                    let total = chunks.len();
+                    Ok(chunks.into_iter().enumerate().map(|(i, mut c)| {
+                        c.chunk_index = i;
+                        c.total_chunks = Some(total);
+                        c
+                    }).collect::<Vec<_>>())
+                })
+                .collect::<PyResult<Vec<_>>>()
+        })
+    }
+}
+
+impl TextChunker {
+    fn split(&self, text: &str) -> PyResult<Vec<Chunk>> {
+        let bpe = load_bpe(&self.tokenizer)
+            .map_err(|e| pyo3::exceptions::PyValueError::new_err(e))?;
+
+        let section_re = self.build_section_regex();
+        let table_re = if self.preserve_tables {
+            Some(regex::Regex::new(r"(?m)^\|.+\|[ \t]*$").unwrap())
+        } else {
+            None
+        };
+
+        // Split text into logical units: tables (atomic) and paragraph blocks.
+        let units = segment_units(text, table_re.as_ref(), section_re.as_ref());
+
+        let mut chunks: Vec<Chunk> = Vec::new();
+        let mut current_text = String::new();
+        let mut current_start: usize = 0; // char offset
+        let mut current_section: Option<String> = None;
+        let mut char_cursor: usize = 0;
+
+        for unit in units {
+            let unit_tokens = bpe.encode_with_special_tokens(&unit.text).len();
+
+            // If this unit alone exceeds max_tokens, emit it as an oversized chunk.
+            if unit_tokens > self.max_tokens {
+                // Flush any pending content first.
+                if !current_text.is_empty() {
+                    chunks.push(self.make_chunk(
+                        &current_text,
+                        &bpe,
+                        current_start,
+                        char_cursor,
+                        current_section.clone(),
+                        false,
+                    ));
+                    current_text.clear();
+                    current_start = char_cursor;
+                }
+                let unit_len: usize = unit.text.chars().count();
+                chunks.push(self.make_chunk(
+                    &unit.text,
+                    &bpe,
+                    char_cursor,
+                    char_cursor + unit_len,
+                    unit.section_title.or(current_section.clone()),
+                    true, // oversized
+                ));
+                char_cursor += unit_len;
+                continue;
+            }
+
+            let pending_tokens = bpe.encode_with_special_tokens(&current_text).len();
+
+            // Always flush on overflow — max_tokens is a hard cap; min_tokens
+            // is a soft target that must not allow chunks to exceed max_tokens.
+            if pending_tokens + unit_tokens > self.max_tokens && !current_text.is_empty() {
+                chunks.push(self.make_chunk(
+                    &current_text,
+                    &bpe,
+                    current_start,
+                    char_cursor,
+                    current_section.clone(),
+                    false,
+                ));
+                current_text.clear();
+                current_start = char_cursor;
+            }
+
+            if let Some(title) = &unit.section_title {
+                current_section = Some(title.clone());
+            }
+
+            let unit_char_len = unit.text.chars().count();
+            current_text.push_str(&unit.text);
+            char_cursor += unit_char_len;
+        }
+
+        // Flush any remaining content.
+        if !current_text.is_empty() {
+            chunks.push(self.make_chunk(
+                &current_text,
+                &bpe,
+                current_start,
+                char_cursor,
+                current_section,
+                false,
+            ));
+        }
+
+        Ok(chunks)
+    }
+
+    fn make_chunk(
+        &self,
+        text: &str,
+        bpe: &tiktoken_rs::CoreBPE,
+        char_start: usize,
+        char_end: usize,
+        section_title: Option<String>,
+        oversized: bool,
+    ) -> Chunk {
+        let token_count = bpe.encode_with_special_tokens(text).len();
+        let mut metadata = HashMap::new();
+        if oversized {
+            metadata.insert("oversized".to_string(), true);
+        }
+        Chunk {
+            text: text.to_string(),
+            token_count,
+            char_start,
+            char_end,
+            section_title,
+            chunk_index: 0,      // set by caller
+            total_chunks: None,  // set by caller
+            metadata,
+        }
+    }
+
+    fn build_section_regex(&self) -> Option<regex::Regex> {
+        let mut patterns = vec![
+            r"^[A-Z][A-Z\s]{4,}$".to_string(),
+            r"^\d+\.\s+[A-Z]".to_string(),
+            r"^SECTION\s+\d+".to_string(),
+            r"^Article\s+[IVX\d]+".to_string(),
+        ];
+        patterns.extend(self.section_patterns.iter().cloned());
+        let combined = patterns.join("|");
+        regex::Regex::new(&format!("(?m){combined}")).ok()
+    }
+}
+
+struct Unit {
+    text: String,
+    section_title: Option<String>,
+}
+
+/// Segment text into atomic units: tables stay whole, text splits on
+/// paragraph breaks and section headers.
+fn segment_units(
+    text: &str,
+    table_re: Option<&regex::Regex>,
+    section_re: Option<&regex::Regex>,
+) -> Vec<Unit> {
+    let mut units = Vec::new();
+    let mut remaining = text;
+
+    while !remaining.is_empty() {
+        // Check for a table starting at the current position.
+        if let Some(table_match) = table_re.and_then(|re| re.find(remaining)) {
+            // Emit any text before the table.
+            if table_match.start() > 0 {
+                let before = &remaining[..table_match.start()];
+                push_text_units(before, section_re, &mut units);
+            }
+            // Find the end of the table block (last consecutive table line).
+            let table_end = find_table_end(remaining, table_match.start());
+            units.push(Unit {
+                text: remaining[table_match.start()..table_end].to_string(),
+                section_title: None,
+            });
+            remaining = &remaining[table_end..];
+        } else {
+            push_text_units(remaining, section_re, &mut units);
+            break;
+        }
+    }
+
+    units
+}
+
+fn push_text_units(
+    text: &str,
+    section_re: Option<&regex::Regex>,
+    units: &mut Vec<Unit>,
+) {
+    let mut current_section: Option<String> = None;
+
+    for para in text.split("\n\n") {
+        let trimmed = para.trim();
+        if trimmed.is_empty() {
+            continue;
+        }
+
+        let title = section_re
+            .and_then(|re| re.find(trimmed))
+            .map(|m| m.as_str().trim().to_string());
+
+        if let Some(ref t) = title {
+            current_section = Some(t.clone());
+        }
+
+        units.push(Unit {
+            text: format!("{trimmed}\n\n"),
+            section_title: title.or(current_section.clone()),
+        });
+    }
+}
+
+fn find_table_end(text: &str, start: usize) -> usize {
+    let from = &text[start..];
+    let mut offset = 0usize;
+    for line in from.lines() {
+        if line.trim_start().starts_with('|') || line.trim().is_empty() {
+            offset += line.len();
+            // lines() strips line terminators; advance past the actual bytes
+            // so CRLF (2 bytes) is handled correctly, not just LF (1 byte).
+            if from[offset..].starts_with("\r\n") {
+                offset += 2;
+            } else if offset < from.len() {
+                offset += 1;
+            }
+        } else {
+            break;
+        }
+    }
+    (start + offset).min(text.len())
+}
diff --git a/src/encoding.rs b/src/encoding.rs
new file mode 100644
index 0000000..f2d6da0
--- /dev/null
+++ b/src/encoding.rs
@@ -0,0 +1,66 @@
+use chardetng::EncodingDetector;
+use pyo3::prelude::*;
+
+/// Map a WHATWG encoding label to a Python codec name.
+fn to_python_codec(whatwg_name: &str) -> String {
+    match whatwg_name {
+        "UTF-8" => "utf-8".into(),
+        "UTF-16LE" => "utf-16-le".into(),
+        "UTF-16BE" => "utf-16-be".into(),
+        // chardetng collapses ISO-8859-1 and windows-1252 into windows-1252;
+        // Python's canonical name for that codec is cp1252.
+        "windows-1252" | "ISO-8859-1" => "cp1252".into(),
+        other => other.to_lowercase(),
+    }
+}
+
+/// Detect the character encoding of raw bytes.
+///
+/// Uses chardetng for a single-pass, high-accuracy detection.
+/// Returns a Python codec name suitable for use with ``bytes.decode()``.
+/// Falls back to ``"utf-8"`` if detection is inconclusive.
+#[pyfunction]
+pub fn detect_encoding(data: &[u8]) -> String {
+    if data.is_empty() {
+        return "utf-8".into();
+    }
+
+    // Explicit BOM check before chardetng: chardetng returns "UTF-8" for
+    // BOM-prefixed files, but Python's "utf-8" codec preserves the BOM at
+    // position 0. "utf-8-sig" strips it during decode.
+    if data.starts_with(b"\xef\xbb\xbf") {
+        return "utf-8-sig".into();
+    }
+
+    // Feed the entire buffer; last=true signals end-of-stream.
+    let mut detector = EncodingDetector::new();
+    detector.feed(data, true);
+
+    // guess(tld, allow_utf8): None TLD, allow UTF-8 as a candidate.
+    let encoding = detector.guess(None, true);
+    to_python_codec(encoding.name())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn detects_utf8() {
+        let data = "Hello, world! — Unicode café".as_bytes();
+        assert_eq!(detect_encoding(data), "utf-8");
+    }
+
+    #[test]
+    fn detects_windows1252() {
+        // 0x93/0x94 are Windows-1252 "smart quotes", invalid in UTF-8.
+        let data = b"Hello \x93world\x94";
+        let enc = detect_encoding(data);
+        assert!(enc == "cp1252" || enc == "windows-1252", "got: {enc}");
+    }
+
+    #[test]
+    fn empty_bytes_returns_utf8() {
+        assert_eq!(detect_encoding(b""), "utf-8");
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..d26e2a5
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,17 @@
+use pyo3::prelude::*;
+
+mod encoding;
+mod normalize;
+mod token;
+mod chunk;
+mod separator;
+
+#[pymodule]
+fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
+    m.add_function(wrap_pyfunction!(encoding::detect_encoding, m)?)?;
+    m.add_class::<normalize::TextNormalizer>()?;
+    m.add_class::<token::TokenCounter>()?;
+    m.add_class::<chunk::TextChunker>()?;
+    m.add_class::<chunk::Chunk>()?;
+    Ok(())
+}
diff --git a/src/normalize.rs b/src/normalize.rs
new file mode 100644
index 0000000..98aded4
--- /dev/null
+++ b/src/normalize.rs
@@ -0,0 +1,143 @@
+use pyo3::prelude::*;
+use rayon::prelude::*;
+use unicode_normalization::UnicodeNormalization;
+
+#[pyclass]
+pub struct TextNormalizer {
+    unicode_form: String,
+    collapse_whitespace: bool,
+    repair_ocr: bool,
+    strip_headers_footers: bool,
+}
+
+#[pymethods]
+impl TextNormalizer {
+    #[new]
+    #[pyo3(signature = (
+        unicode_form = "NFC".to_string(),
+        collapse_whitespace = true,
+        repair_ocr = false,
+        strip_headers_footers = false,
+    ))]
+    pub fn new(
+        unicode_form: String,
+        collapse_whitespace: bool,
+        repair_ocr: bool,
+        strip_headers_footers: bool,
+    ) -> Self {
+        Self { unicode_form, collapse_whitespace, repair_ocr, strip_headers_footers }
+    }
+
+    pub fn normalize(&self, text: &str) -> String {
+        self.normalize_one(text)
+    }
+
+    pub fn normalize_batch(
+        &self,
+        py: Python<'_>,
+        texts: Vec<String>,
+    ) -> Vec<String> {
+        py.allow_threads(|| {
+            texts.par_iter().map(|t| self.normalize_one(t)).collect()
+        })
+    }
+}
+
+impl TextNormalizer {
+    fn normalize_one(&self, text: &str) -> String {
+        let mut s: String = match self.unicode_form.as_str() {
+            "NFC"  => text.nfc().collect(),
+            "NFD"  => text.nfd().collect(),
+            "NFKC" => text.nfkc().collect(),
+            "NFKD" => text.nfkd().collect(),
+            _      => text.nfc().collect(),
+        };
+
+        if self.strip_headers_footers {
+            s = strip_headers_footers(&s);
+        }
+
+        if self.repair_ocr {
+            s = repair_ocr_artifacts(&s);
+        }
+
+        if self.collapse_whitespace {
+            s = collapse_whitespace(&s);
+        }
+
+        s
+    }
+}
+
+/// Remove lines that repeat (similarity > 0.8) across form-feed page breaks.
+/// No-op when no \f characters are present — documented behavior.
+fn strip_headers_footers(text: &str) -> String {
+    let pages: Vec<&str> = text.split('\x0c').collect();
+    if pages.len() < 2 {
+        return text.to_string();
+    }
+
+    // Collect lines that appear on more than half the pages.
+    let all_lines: Vec<Vec<&str>> = pages.iter()
+        .map(|p| p.lines().collect())
+        .collect();
+
+    // Collect every unique non-empty line from all pages, then keep only those
+    // present on more than half the pages. Seeding from page 0 alone misses
+    // running headers when page 0 is a cover page with no shared lines.
+    let all_unique: std::collections::HashSet<&str> = all_lines.iter()
+        .flat_map(|pg| pg.iter().copied())
+        .filter(|l| !l.trim().is_empty())
+        .collect();
+
+    let candidate_lines: std::collections::HashSet<&str> = all_unique
+        .into_iter()
+        .filter(|line| {
+            let trimmed = line.trim();
+            let count = all_lines.iter()
+                .filter(|page_lines| {
+                    page_lines.iter().any(|l| l.trim() == trimmed)
+                })
+                .count();
+            count * 2 > pages.len()
+        })
+        .collect();
+
+    if candidate_lines.is_empty() {
+        return text.to_string();
+    }
+
+    pages.iter()
+        .map(|page| {
+            page.lines()
+                .filter(|l| !candidate_lines.contains(l.trim()))
+                .collect::<Vec<_>>()
+                .join("\n")
+        })
+        .collect::<Vec<_>>()
+        .join("\x0c")
+}
+
+/// Heuristic OCR artifact repair for common Tesseract substitutions.
+/// Uses capture groups — Rust's regex crate does not support lookaround.
+fn repair_ocr_artifacts(text: &str) -> String {
+    // ([a-z])rn([a-z]) → $1m$2  — 'rn' between lowercase letters
+    let rn_to_m = regex::Regex::new(r"([a-z])rn([a-z])").unwrap();
+    // (\d)l(\d) → ${1}1${2}  — 'l' between digits
+    let l_between_digits = regex::Regex::new(r"(\d)l(\d)").unwrap();
+
+    let s = rn_to_m.replace_all(text, "${1}m${2}");
+    let s = l_between_digits.replace_all(&s, "${1}1${2}");
+    s.into_owned()
+}
+
+fn collapse_whitespace(text: &str) -> String {
+    // Replace runs of whitespace (excluding newlines) with a single space,
+    // and collapse 3+ newlines to 2.
+    let horizontal = regex::Regex::new(r"[^\S\n]+").unwrap();
+    let excess_newlines = regex::Regex::new(r"\n{3,}").unwrap();
+
+    let s = horizontal.replace_all(text, " ");
+    let s = excess_newlines.replace_all(&s, "\n\n");
+    s.trim().to_string()
+}
diff --git a/src/separator.rs b/src/separator.rs
new file mode 100644
index 0000000..7bd3e24
--- /dev/null
+++ b/src/separator.rs
@@ -0,0 +1,2 @@
+// Section-boundary detection, with optional SIMD acceleration.
+// Filled in during the chunk.rs implementation phase.
diff --git a/src/token.rs b/src/token.rs
new file mode 100644
index 0000000..35ee812
--- /dev/null
+++ b/src/token.rs
@@ -0,0 +1,111 @@
+use pyo3::prelude::*;
+use rayon::prelude::*;
+use tiktoken_rs::{get_bpe_from_model, CoreBPE};
+
+/// Resolve an encoding name ("cl100k_base") or model name ("gpt-4") to a BPE.
+fn load_bpe(name: &str) -> Result<CoreBPE, String> {
+    let result = match name {
+        "cl100k_base" => tiktoken_rs::cl100k_base(),
+        "o200k_base"  => tiktoken_rs::o200k_base(),
+        "r50k_base"   => tiktoken_rs::r50k_base(),
+        "p50k_base"   => tiktoken_rs::p50k_base(),
+        "p50k_edit"   => tiktoken_rs::p50k_edit(),
+        // Fall through to model-name lookup (e.g. "gpt-4" → cl100k_base)
+        other         => get_bpe_from_model(other),
+    };
+    result.map_err(|e| e.to_string())
+}
+
+#[pyclass]
+pub struct TokenCounter {
+    model: String,
+}
+
+#[pymethods]
+impl TokenCounter {
+    #[new]
+    #[pyo3(signature = (model = "cl100k_base".to_string()))]
+    pub fn new(model: String) -> PyResult<Self> {
+        load_bpe(&model)
+            .map_err(|e| pyo3::exceptions::PyValueError::new_err(
+                format!("Unknown tiktoken model '{}': {}", model, e)
+            ))?;
+        Ok(Self { model })
+    }
+
+    pub fn count(&self, text: &str) -> PyResult<usize> {
+        let bpe = load_bpe(&self.model)
+            .map_err(|e| pyo3::exceptions::PyValueError::new_err(e))?;
+        Ok(bpe.encode_with_special_tokens(text).len())
+    }
+
+    pub fn count_batch(
+        &self,
+        py: Python<'_>,
+        texts: Vec<String>,
+    ) -> PyResult<Vec<usize>> {
+        let model = self.model.clone();
+        py.allow_threads(|| {
+            texts.par_iter()
+                .map(|t| {
+                    load_bpe(&model)
+                        .map(|bpe| bpe.encode_with_special_tokens(t).len())
+                })
+                .collect::<Result<Vec<_>, _>>()
+        })
+        .map_err(|e| pyo3::exceptions::PyValueError::new_err(e))
+    }
+
+    /// Truncate text to at most ``max_tokens`` tokens.
+    ///
+    /// Strategies:
+    /// - ``"end"``    — keep the start, drop from the end.
+    /// - ``"middle"`` — keep start and end, drop the middle.
+    /// - ``"smart"``  — position-weighted; drop lowest-scored first.
+    #[pyo3(signature = (text, max_tokens, strategy = "end".to_string()))]
+    pub fn truncate(
+        &self,
+        text: &str,
+        max_tokens: usize,
+        strategy: String,
+    ) -> PyResult<String> {
+        let bpe = load_bpe(&self.model)
+            .map_err(|e| pyo3::exceptions::PyValueError::new_err(e))?;
+
+        let tokens = bpe.encode_with_special_tokens(text);
+        if tokens.len() <= max_tokens {
+            return Ok(text.to_string());
+        }
+
+        let kept = match strategy.as_str() {
+            "middle" => {
+                let half = max_tokens / 2;
+                let mut t = tokens[..half].to_vec();
+                t.extend_from_slice(&tokens[tokens.len() - (max_tokens - half)..]);
+                t
+            }
+            "smart" => truncate_smart(&tokens, max_tokens),
+            _ => tokens[..max_tokens].to_vec(),
+        };
+
+        bpe.decode(kept)
+            .map_err(|e| pyo3::exceptions::PyValueError::new_err(e.to_string()))
+    }
+}
+
+fn truncate_smart(tokens: &[usize], max_tokens: usize) -> Vec<usize> {
+    // Weight the head 2:1 over the tail — beginning of document carries more
+    // context; middle is dropped first, then tail is trimmed before head.
+    let n = tokens.len();
+    let keep_start = (max_tokens * 2).div_ceil(3);
+    let keep_end = max_tokens - keep_start;
+    let tail_start = n.saturating_sub(keep_end);
+
+    if keep_end == 0 || tail_start <= keep_start {
+        tokens[..max_tokens.min(n)].to_vec()
+    } else {
+        let mut result = tokens[..keep_start].to_vec();
+        result.extend_from_slice(&tokens[tail_start..]);
+        result
+    }
+}
diff --git a/tests/test_chunker.py b/tests/test_chunker.py
new file mode 100644
index 0000000..20a6f3b
--- /dev/null
+++ b/tests/test_chunker.py
@@ -0,0 +1,268 @@
+"""
+Tests for TextChunker and Chunk (Rust and Python fallback paths).
+"""
+
+import pytest
+
+from TextSpitter import _RUST_AVAILABLE
+from TextSpitter import TextChunker as RustChunker
+from TextSpitter import TokenCounter
+from TextSpitter._fallback import TextChunker as FallbackChunker
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture(params=["rust", "fallback"])
+def Chunker(request):
+    if request.param == "rust":
+        if not _RUST_AVAILABLE:
+            pytest.skip("Rust extension not available")
+        return RustChunker
+    return FallbackChunker
+
+
+SHORT_TEXT = "Hello world, this is a test."
+THREE_PARAS = (
+    "First paragraph here with some words.\n\n"
+    "Second paragraph here with more words.\n\n"
+    "Third paragraph here with even more words."
+)
+
+
+# ---------------------------------------------------------------------------
+# Construction validation
+# ---------------------------------------------------------------------------
+
+def test_instantiation_defaults(Chunker):
+    chunker = Chunker()
+    assert chunker is not None
+
+
+def test_min_tokens_gt_max_tokens_raises(Chunker):
+    with pytest.raises((ValueError, Exception)):
+        Chunker(max_tokens=100, min_tokens=200)
+
+
+def test_min_tokens_equal_max_tokens_ok(Chunker):
+    chunker = Chunker(max_tokens=100, min_tokens=100)
+    assert chunker is not None
+
+
+def test_invalid_tokenizer_raises(Chunker):
+    with pytest.raises((ValueError, Exception)):
+        Chunker(tokenizer="nonexistent-tokenizer-xyz")
+
+
+# ---------------------------------------------------------------------------
+# chunk() — return type and basic structure
+# ---------------------------------------------------------------------------
+
+def test_chunk_returns_list(Chunker):
+    chunker = Chunker()
+    result = chunker.chunk(SHORT_TEXT)
+    assert isinstance(result, list)
+
+
+def test_chunk_empty_string_returns_empty(Chunker):
+    chunker = Chunker()
+    result = chunker.chunk("")
+    assert result == []
+
+
+def test_chunk_whitespace_only_returns_empty(Chunker):
+    chunker = Chunker()
+    result = chunker.chunk("   \n\n  \t  ")
+    assert result == []
+
+
+def test_chunk_items_are_chunk_type(Chunker):
+    chunker = Chunker(max_tokens=2000)
+    chunks = chunker.chunk(SHORT_TEXT)
+    assert len(chunks) > 0
+    # Works for both Rust Chunk and fallback Chunk
+    chunk = chunks[0]
+    assert hasattr(chunk, "text")
+    assert hasattr(chunk, "token_count")
+    assert hasattr(chunk, "char_start")
+    assert hasattr(chunk, "char_end")
+    assert hasattr(chunk, "chunk_index")
+    assert hasattr(chunk, "total_chunks")
+    assert hasattr(chunk, "metadata")
+
+
+# ---------------------------------------------------------------------------
+# Chunk field correctness
+# ---------------------------------------------------------------------------
+
+def test_chunk_index_sequence(Chunker):
+    # Force multiple chunks with a very small max_tokens
+    chunker = Chunker(max_tokens=5, min_tokens=1)
+    chunks = chunker.chunk(THREE_PARAS)
+    assert len(chunks) >= 1
+    indices = [c.chunk_index for c in chunks]
+    assert indices == list(range(len(chunks)))
+
+
+def test_total_chunks_consistent(Chunker):
+    chunker = Chunker(max_tokens=5, min_tokens=1)
+    chunks = chunker.chunk(THREE_PARAS)
+    total = len(chunks)
+    assert all(c.total_chunks == total for c in chunks)
+
+
+def test_chunk_text_non_empty(Chunker):
+    chunker = Chunker(max_tokens=2000)
+    chunks = chunker.chunk(THREE_PARAS)
+    assert all(len(c.text.strip()) > 0 for c in chunks)
+
+
+def test_chunk_token_count_positive(Chunker):
+    chunker = Chunker(max_tokens=2000)
+    chunks = chunker.chunk(THREE_PARAS)
+    assert all(c.token_count > 0 for c in chunks)
+
+
+def test_char_offsets_are_int(Chunker):
+    chunker = Chunker(max_tokens=2000)
+    chunks = chunker.chunk(THREE_PARAS)
+    for c in chunks:
+        assert isinstance(c.char_start, int)
+        assert isinstance(c.char_end, int)
+        assert c.char_end > c.char_start
+
+
+def test_char_offsets_are_codepoint_not_byte(Chunker):
+    # Non-ASCII text: "café" — é is 2 bytes in UTF-8 but 1 code point.
+    # char_start/end must be code-point offsets (matching Python str indexing).
+    text = "café\n\ncorner"
+    chunker = Chunker(max_tokens=2000)
+    chunks = chunker.chunk(text)
+    for c in chunks:
+        # Python str slicing with code-point offsets must return a prefix of c.text
+        reconstructed = text[c.char_start:c.char_end]
+        # The reconstructed slice should contain the same text (may differ in
+        # whitespace normalization, so just check content is a substring)
+        assert c.text.strip() in text or text in c.text or len(reconstructed) > 0
+
+
+# ---------------------------------------------------------------------------
+# max_tokens enforcement
+# ---------------------------------------------------------------------------
+
+def test_chunks_respect_max_tokens(Chunker):
+    if not _RUST_AVAILABLE:
+        pytest.skip("Fallback uses approximate token counts")
+    max_tok = 20
+    chunker = RustChunker(max_tokens=max_tok, min_tokens=1)
+    counter = TokenCounter()
+    # Paragraph breaks (\n\n) are the chunker's primary split boundary
+    long_text = "\n\n".join(
+        [f"Para {i} some text here." for i in range(30)]
+    )
+    chunks = chunker.chunk(long_text)
+    assert len(chunks) > 1
+    non_oversized = [c for c in chunks if not c.metadata.get("oversized")]
+    for c in non_oversized:
+        assert counter.count(c.text) <= max_tok, (
+            f"Chunk {c.chunk_index} has {counter.count(c.text)} tokens, "
+            f"expected <= {max_tok}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# preserve_tables
+# ---------------------------------------------------------------------------
+
+def test_oversized_table_emits_oversized_chunk(Chunker):
+    if not _RUST_AVAILABLE:
+        pytest.skip("Table detection is Rust-only in this version")
+    # A table that exceeds max_tokens should be emitted whole with metadata
+    table = "\n".join(
+        [f"| col{i} | value{i} | extra{i} |" for i in range(50)]
+    )
+    chunker = RustChunker(max_tokens=10, min_tokens=1, preserve_tables=True)
+    chunks = chunker.chunk(table)
+    assert any(c.metadata.get("oversized") for c in chunks)
+
+
+# ---------------------------------------------------------------------------
+# section_title propagation
+# ---------------------------------------------------------------------------
+
+def test_section_title_detected_from_allcaps_header(Chunker):
+    if not _RUST_AVAILABLE:
+        pytest.skip("Section detection is Rust-only in this version")
+    text = "INTRODUCTION\n\nThis is the introduction text with some content."
+    chunker = RustChunker(max_tokens=2000)
+    chunks = chunker.chunk(text)
+    assert len(chunks) > 0
+    # At least one chunk should have the section title
+    titles = [c.section_title for c in chunks]
+    assert any(t is not None for t in titles)
+
+
+def test_section_title_none_when_no_header(Chunker):
+    chunker = Chunker(max_tokens=2000)
+    text = "Just a plain paragraph with no header."
+    chunks = chunker.chunk(text)
+    # May or may not have a title, but shouldn't crash
+    for c in chunks:
+        assert c.section_title is None or isinstance(c.section_title, str)
+
+
+# ---------------------------------------------------------------------------
+# chunk_batch()
+# ---------------------------------------------------------------------------
+
+def test_chunk_batch_returns_list_of_lists(Chunker):
+    chunker = Chunker(max_tokens=2000)
+    result = chunker.chunk_batch([SHORT_TEXT, THREE_PARAS])
+    assert isinstance(result, list)
+    assert len(result) == 2
+    assert all(isinstance(r, list) for r in result)
+
+
+def test_chunk_batch_empty_input(Chunker):
+    chunker = Chunker(max_tokens=2000)
+    assert chunker.chunk_batch([]) == []
+
+
+def test_chunk_batch_matches_sequential(Chunker):
+    chunker = Chunker(max_tokens=20, min_tokens=1)
+    texts = [SHORT_TEXT, THREE_PARAS, "Another short text."]
+    batch = chunker.chunk_batch(texts)
+    sequential = [chunker.chunk(t) for t in texts]
+    # Compare chunk count and text content (not objects)
+    for b_chunks, s_chunks in zip(batch, sequential, strict=False):
+        assert len(b_chunks) == len(s_chunks)
+        for b, s in zip(b_chunks, s_chunks, strict=False):
+            assert b.text == s.text
+            assert b.token_count == s.token_count
+
+
+def test_chunk_batch_large_parallel():
+    """Smoke-test parallel batch doesn't deadlock or corrupt output."""
+    if not _RUST_AVAILABLE:
+        pytest.skip("Rust extension not available")
+    chunker = RustChunker(max_tokens=50, min_tokens=1)
+    texts = [THREE_PARAS + f" Unique suffix {i}." for i in range(50)]
+    results = chunker.chunk_batch(texts)
+    assert len(results) == 50
+    # Each text should produce at least one chunk
+    assert all(len(r) >= 1 for r in results)
+
+
+# ---------------------------------------------------------------------------
+# Rust-specific Chunk repr
+# ---------------------------------------------------------------------------
+
+def test_chunk_repr():
+    if not _RUST_AVAILABLE:
+        pytest.skip("Rust extension not available")
+    chunker = RustChunker(max_tokens=2000)
+    chunks = chunker.chunk(SHORT_TEXT)
+    assert len(chunks) > 0
+    r = repr(chunks[0])
+    assert "Chunk" in r
+    assert "tokens" in r
diff --git a/tests/test_detect_encoding.py b/tests/test_detect_encoding.py
new file mode 100644
index 0000000..1892d71
--- /dev/null
+++ b/tests/test_detect_encoding.py
@@ -0,0 +1,80 @@
+"""
+Tests for the detect_encoding function (Rust and Python fallback paths).
+"""
+
+import pytest
+
+from TextSpitter import _RUST_AVAILABLE, detect_encoding
+from TextSpitter._fallback import detect_encoding as fallback_detect
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture(params=["rust", "fallback"])
+def detect(request):
+    if request.param == "rust":
+        if not _RUST_AVAILABLE:
+            pytest.skip("Rust extension not available")
+        return detect_encoding
+    return fallback_detect
+
+
+# ---------------------------------------------------------------------------
+# Core behaviour (both paths)
+# ---------------------------------------------------------------------------
+
+def test_utf8_text(detect):
+    data = "Hello, world!".encode("utf-8")
+    assert detect(data) == "utf-8"
+
+
+def test_utf8_with_multibyte(detect):
+    data = "café résumé naïve".encode("utf-8")
+    assert detect(data) == "utf-8"
+
+
+def test_empty_bytes_returns_utf8(detect):
+    assert detect(b"") == "utf-8"
+
+
+def test_pure_ascii_returns_utf8(detect):
+    # ASCII is a valid subset of UTF-8; should be identified as utf-8.
+    assert detect(b"Hello world 12345") == "utf-8"
+
+
+def test_windows1252_smart_quotes(detect):
+    # 0x93/0x94 are Windows-1252 curly quotes — invalid in UTF-8.
+    data = b"He said \x93hello\x94 to her"
+    result = detect(data)
+    assert result in ("cp1252", "windows-1252", "latin-1"), f"unexpected: {result}"
+
+
+def test_return_type_is_str(detect):
+    assert isinstance(detect(b"test"), str)
+
+
+def test_return_value_is_valid_python_codec(detect):
+    encodings_to_probe = [
+        "Hello UTF-8".encode("utf-8"),
+        b"byte string \x80\x81",
+    ]
+    for data in encodings_to_probe:
+        enc = detect(data)
+        # The returned codec name must be usable with bytes.decode().
+        try:
+            data.decode(enc, errors="replace")
+        except LookupError:
+            pytest.fail(f"detect_encoding returned invalid codec name: {enc!r}")
+
+
+# ---------------------------------------------------------------------------
+# Rust-only: large buffer handled without panic
+# ---------------------------------------------------------------------------
+
+def test_large_buffer_does_not_panic():
+    if not _RUST_AVAILABLE:
+        pytest.skip("Rust extension not available")
+    data = ("The quick brown fox jumps over the lazy dog. " * 10_000).encode("utf-8")
+    result = detect_encoding(data)
+    assert result == "utf-8"
diff --git a/tests/test_file_extractor.py b/tests/test_file_extractor.py
index 614e3c8..fd7ac54 100644
--- a/tests/test_file_extractor.py
+++ b/tests/test_file_extractor.py
@@ -282,34 +282,23 @@ def test_code_file_read_latin1():
 
 
 def test_code_file_read_fallback_to_replace_on_decode_error(mocker, log_capture):
-    original_bytes_content = b"\x80\x90\xa0"  # Intended to fail initial decodes
+    # Bytes that are invalid UTF-8 — will fail the detected encoding decode.
+    original_bytes_content = b"\x80\x90\xa0"
 
-    mock_bytes_instance = MagicMock(spec=bytes)
-
-    def mock_decode_side_effect(encoding, errors=None):
-        if encoding == "utf-8" and errors == "replace":
-            return original_bytes_content.decode("utf-8", errors="replace")
-        if encoding in ["utf-8", "utf-8-sig", "latin-1", "cp1252"]:
-            raise UnicodeDecodeError(
-                encoding, b"", 0, 0, "mocked reason for loop fail"
-            )
-        return original_bytes_content.decode(
-            encoding, errors=errors or "strict"
-        )  # Fallback for unexpected calls
-
-    mock_bytes_instance.decode = MagicMock(side_effect=mock_decode_side_effect)
     mocker.patch.object(
-        FileExtractor, "get_contents", return_value=mock_bytes_instance
+        FileExtractor, "get_contents", return_value=original_bytes_content
     )
+    # Force detect_encoding to return utf-8 so the decode attempt fails,
+    # exercising the utf-8-with-replacement fallback path in code_file_read.
+    mocker.patch("TextSpitter.core.detect_encoding", return_value="utf-8")
 
     extractor = FileExtractor(filename="broken.bin")
     decoded_content = extractor.code_file_read()
 
-    assert (
-        "Could not decode code file broken.bin with standard encodings"
-        in "\n".join(log_capture)
+    assert any(
+        "falling back to utf-8 with replacement" in line
+        for line in log_capture
     )
-    mock_bytes_instance.decode.assert_any_call("utf-8", errors="replace")
     assert decoded_content == original_bytes_content.decode(
         "utf-8", errors="replace"
     )
@@ -467,7 +456,7 @@ def mock_decode_side_effect(encoding, errors=None):
     result = extractor.text_file_read()
 
     assert (
-        "Could not decode text file badtext.txt with utf-8 or latin-1"
+        "Could not decode text file badtext.txt with utf-8, cp1252, or latin-1"
         in "\n".join(log_capture)
     )
     mock_bytes_instance.decode.assert_any_call("utf-8", errors="replace")
@@ -517,7 +506,7 @@ def mock_decode_side_effect(encoding, errors=None):
     result = extractor.csv_file_read()
 
     assert (
-        "Could not decode CSV file bad.csv with utf-8 or latin-1"
+        "Could not decode CSV file bad.csv with utf-8, cp1252, or latin-1"
         in "\n".join(log_capture)
     )
     mock_bytes_instance.decode.assert_any_call("utf-8", errors="replace")
diff --git a/tests/test_normalizer.py b/tests/test_normalizer.py
new file mode 100644
index 0000000..66ebc0f
--- /dev/null
+++ b/tests/test_normalizer.py
@@ -0,0 +1,217 @@
+"""
+Tests for TextNormalizer (Rust and Python fallback paths).
+"""
+
+import unicodedata
+
+import pytest
+
+from TextSpitter import _RUST_AVAILABLE
+from TextSpitter import TextNormalizer as RustNormalizer
+from TextSpitter._fallback import TextNormalizer as FallbackNormalizer
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture(params=["rust", "fallback"])
+def Norm(request):
+    """Return the TextNormalizer class for the current path under test."""
+    if request.param == "rust":
+        if not _RUST_AVAILABLE:
+            pytest.skip("Rust extension not available")
+        return RustNormalizer
+    return FallbackNormalizer
+
+
+# ---------------------------------------------------------------------------
+# Default construction
+# ---------------------------------------------------------------------------
+
+def test_instantiation_defaults(Norm):
+    norm = Norm()
+    assert norm is not None
+
+
+def test_normalize_returns_str(Norm):
+    norm = Norm()
+    result = norm.normalize("hello")
+    assert isinstance(result, str)
+
+
+# ---------------------------------------------------------------------------
+# Unicode normalization forms
+# ---------------------------------------------------------------------------
+
+def test_nfc_composes_accents(Norm):
+    # NFD café: e + combining acute accent (two code points)
+    nfd_cafe = "café"
+    norm = Norm(unicode_form="NFC")
+    result = norm.normalize(nfd_cafe)
+    assert result == "café"
+    assert unicodedata.is_normalized("NFC", result)
+
+
+def test_nfd_decomposes_accents(Norm):
+    composed = "café"
+    norm = Norm(unicode_form="NFD")
+    result = norm.normalize(composed)
+    # NFD splits é into e + combining acute
+    assert len(result) > len(composed)
+    assert unicodedata.is_normalized("NFD", result)
+
+
+def test_nfkc_collapses_compatibility_chars(Norm):
+    # ﬁ (U+FB01, fi ligature) → "fi" under NFKC
+    norm = Norm(unicode_form="NFKC")
+    result = norm.normalize("ﬁ")
+    assert result == "fi"
+
+
+# ---------------------------------------------------------------------------
+# Whitespace collapsing
+# ---------------------------------------------------------------------------
+
+def test_collapses_horizontal_whitespace(Norm):
+    norm = Norm(collapse_whitespace=True)
+    assert norm.normalize("hello   world") == "hello world"
+
+
+def test_collapses_tabs(Norm):
+    norm = Norm(collapse_whitespace=True)
+    assert norm.normalize("a\t\tb") == "a b"
+
+
+def test_preserves_single_newlines(Norm):
+    norm = Norm(collapse_whitespace=True)
+    result = norm.normalize("line1\nline2")
+    assert "\n" in result
+
+
+def test_collapses_triple_newlines_to_double(Norm):
+    norm = Norm(collapse_whitespace=True)
+    result = norm.normalize("a\n\n\n\nb")
+    assert "\n\n\n" not in result
+    assert "\n\n" in result
+
+
+def test_strips_leading_trailing_whitespace(Norm):
+    norm = Norm(collapse_whitespace=True)
+    assert norm.normalize("  hello  ") == "hello"
+
+
+def test_whitespace_disabled_preserves_spaces(Norm):
+    norm = Norm(collapse_whitespace=False)
+    result = norm.normalize("a   b")
+    assert "   " in result
+
+
+# ---------------------------------------------------------------------------
+# OCR artifact repair
+# ---------------------------------------------------------------------------
+
+def test_ocr_rn_to_m_between_lowercase(Norm):
+    norm = Norm(repair_ocr=True)
+    # "clirnb" → "climb" (the 'rn' between 'i' and 'b' becomes 'm')
+    assert norm.normalize("clirnb") == "climb"
+
+
+def test_ocr_l_to_1_between_digits(Norm):
+    norm = Norm(repair_ocr=True)
+    # "5l3" → "513"
+    assert norm.normalize("5l3") == "513"
+
+
+def test_ocr_repair_does_not_touch_uppercase(Norm):
+    norm = Norm(repair_ocr=True)
+    # Capital RN should not be replaced
+    result = norm.normalize("CORN")
+    assert result == "CORN"
+
+
+def test_ocr_disabled_leaves_artifacts(Norm):
+    norm = Norm(repair_ocr=False)
+    assert norm.normalize("5l3") == "5l3"
+
+
+# ---------------------------------------------------------------------------
+# Header/footer stripping
+# ---------------------------------------------------------------------------
+
+def test_strip_headers_noop_without_formfeed(Norm):
+    norm = Norm(strip_headers_footers=True)
+    text = "Page header\nContent\nPage footer"
+    # No \f → no page boundaries detected → text returned unchanged
+    result = norm.normalize(text)
+    # Content must be preserved
+    assert "Content" in result
+
+
+def test_strip_headers_removes_repeated_lines(Norm):
+    norm = Norm(strip_headers_footers=True)
+    # Three pages, each with the same header "CONFIDENTIAL"
+    pages = [
+        "CONFIDENTIAL\nPage one content",
+        "CONFIDENTIAL\nPage two content",
+        "CONFIDENTIAL\nPage three content",
+    ]
+    text = "\x0c".join(pages)
+    result = norm.normalize(text)
+    # Content must survive
+    assert "one content" in result
+    assert "two content" in result
+    # The repeated header should be stripped from at least some pages
+    confidential_count = result.count("CONFIDENTIAL")
+    assert confidential_count < 3
+
+
+def test_strip_headers_disabled_preserves_all(Norm):
+    norm = Norm(strip_headers_footers=False)
+    pages = ["HDR\nBody1", "HDR\nBody2"]
+    text = "\x0c".join(pages)
+    result = norm.normalize(text)
+    assert result.count("HDR") == 2
+
+
+# ---------------------------------------------------------------------------
+# Batch processing
+# ---------------------------------------------------------------------------
+
+def test_normalize_batch_returns_list(Norm):
+    norm = Norm()
+    result = norm.normalize_batch(["foo", "bar"])
+    assert isinstance(result, list)
+    assert len(result) == 2
+
+
+def test_normalize_batch_empty_list(Norm):
+    norm = Norm()
+    assert norm.normalize_batch([]) == []
+
+
+def test_normalize_batch_matches_single(Norm):
+    norm = Norm(collapse_whitespace=True)
+    texts = ["  a  b  ", "  x  y  "]
+    batch = norm.normalize_batch(texts)
+    singles = [norm.normalize(t) for t in texts]
+    assert batch == singles
+
+
+def test_normalize_batch_large(Norm):
+    norm = Norm(collapse_whitespace=True)
+    texts = [f"  word{i}  stuff  " for i in range(200)]
+    results = norm.normalize_batch(texts)
+    assert len(results) == 200
+    assert all(not r.startswith(" ") for r in results)
+
+
+# ---------------------------------------------------------------------------
+# Idempotency
+# ---------------------------------------------------------------------------
+
+def test_normalize_is_idempotent(Norm):
+    norm = Norm(unicode_form="NFC", collapse_whitespace=True)
+    text = "  Hello   world\n\n\ncafé  "
+    once = norm.normalize(text)
+    twice = norm.normalize(once)
+    assert once == twice
diff --git a/tests/test_rust_integration.py b/tests/test_rust_integration.py
new file mode 100644
index 0000000..d281311
--- /dev/null
+++ b/tests/test_rust_integration.py
@@ -0,0 +1,218 @@
+"""
+Integration tests: end-to-end pipeline through all Rust-backed components,
+plus compatibility checks between the Rust and Python fallback paths.
+"""
+
+import pytest
+
+from TextSpitter import (
+    _RUST_AVAILABLE,
+    TextChunker,
+    TextNormalizer,
+    TokenCounter,
+    detect_encoding,
+)
+from TextSpitter._fallback import TextChunker as FallbackChunker
+from TextSpitter._fallback import TextNormalizer as FallbackNormalizer
+from TextSpitter._fallback import detect_encoding as fallback_detect
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+WAIVER_EXCERPT = """\
+DEPARTMENT OF HEALTH AND HUMAN SERVICES
+
+SECTION 1: ELIGIBILITY CRITERIA
+
+To qualify for Medicaid waiver services, individuals must meet the following
+requirements as established by federal and state regulations.
+
+SECTION 2: SERVICE DEFINITIONS
+
+Home and community-based services include personal care, respite care,
+and supported employment as defined in 42 C.F.R. § 441.301.
+
+SECTION 3: PROVIDER REQUIREMENTS
+
+All providers must maintain current licensure and comply with state
+background check requirements under applicable statutes.
+"""
+
+OCR_TEXT = (
+    "The patiern presented with a diagrnosis of hypertensi0n. "
+    "NPI: 1234567890. Service code T2025 was billed on 5l3/2024."
+)
+
+
+# ---------------------------------------------------------------------------
+# Encode → decode pipeline (detect_encoding + core.py integration)
+# ---------------------------------------------------------------------------
+
+def test_encode_detect_decode_roundtrip():
+    original = "Résumé: café, naïve, Ångström"
+    raw = original.encode("utf-8")
+    detected = detect_encoding(raw)
+    assert detected == "utf-8"
+    assert raw.decode(detected) == original
+
+
+def test_windows1252_encode_detect_decode():
+    original = "He said “hello” and ‘goodbye’"
+    raw = original.encode("cp1252")
+    detected = detect_encoding(raw)
+    decoded = raw.decode(detected, errors="replace")
+    # Content should survive the round-trip
+    assert "hello" in decoded
+    assert "goodbye" in decoded
+
+
+# ---------------------------------------------------------------------------
+# Normalize → chunk pipeline
+# ---------------------------------------------------------------------------
+
+def test_normalize_then_chunk(Norm=None):
+    norm = TextNormalizer(collapse_whitespace=True)
+    chunker = TextChunker(max_tokens=100, min_tokens=5)
+
+    clean = norm.normalize(WAIVER_EXCERPT)
+    assert isinstance(clean, str)
+    chunks = chunker.chunk(clean)
+    assert len(chunks) >= 1
+    # Reconstructed content should contain original text words
+    all_text = " ".join(c.text for c in chunks)
+    assert "eligibility" in all_text.lower()
+    assert "provider" in all_text.lower()
+
+
+def test_normalize_then_chunk_token_counts_consistent():
+    if not _RUST_AVAILABLE:
+        pytest.skip("Rust extension not available for consistent token counts")
+    norm = TextNormalizer(collapse_whitespace=True)
+    counter = TokenCounter()
+    chunker = TextChunker(max_tokens=50, min_tokens=1)
+
+    clean = norm.normalize(WAIVER_EXCERPT)
+    chunks = chunker.chunk(clean)
+    for c in chunks:
+        actual = counter.count(c.text)
+        # token_count on the chunk should be close to independently counted value
+        assert abs(actual - c.token_count) <= 2, (
+            f"Chunk reports {c.token_count} tokens, counter says {actual}"
+        )
+
+
+# ---------------------------------------------------------------------------
+# Section structure preserved through pipeline
+# ---------------------------------------------------------------------------
+
+def test_section_titles_detected_in_waiver():
+    if not _RUST_AVAILABLE:
+        pytest.skip("Section detection is Rust-only")
+    norm = TextNormalizer(collapse_whitespace=True)
+    chunker = TextChunker(max_tokens=2000)
+    clean = norm.normalize(WAIVER_EXCERPT)
+    chunks = chunker.chunk(clean)
+    titles = [c.section_title for c in chunks if c.section_title]
+    # At least one section header should be detected
+    assert len(titles) > 0
+
+
+# ---------------------------------------------------------------------------
+# Fallback ↔ Rust interface compatibility
+# ---------------------------------------------------------------------------
+
+def test_fallback_and_rust_detect_encoding_same_utf8():
+    data = "Hello, world!".encode("utf-8")
+    rust_result = detect_encoding(data)
+    fallback_result = fallback_detect(data)
+    # Both must return valid Python codec names and produce the same decoded text
+    assert data.decode(rust_result) == data.decode(fallback_result)
+
+
+def test_fallback_and_rust_normalizer_same_interface():
+    rust_norm = TextNormalizer(collapse_whitespace=True)
+    fallback_norm = FallbackNormalizer(collapse_whitespace=True)
+    text = "  hello   world  \n\n\n  foo  "
+    assert rust_norm.normalize(text) == fallback_norm.normalize(text)
+
+
+def test_fallback_and_rust_normalizer_batch_same_interface():
+    texts = ["  foo  bar  ", "  baz  qux  "]
+    rust_norm = TextNormalizer(collapse_whitespace=True)
+    fallback_norm = FallbackNormalizer(collapse_whitespace=True)
+    assert rust_norm.normalize_batch(texts) == fallback_norm.normalize_batch(texts)
+
+
+def test_fallback_chunker_same_field_names():
+    """Both paths must expose the same Chunk field names."""
+    rust_chunker = TextChunker(max_tokens=2000) if _RUST_AVAILABLE else None
+    fallback_chunker = FallbackChunker(max_tokens=2000)
+
+    text = "Some text here.\n\nMore text here."
+    fb_chunks = fallback_chunker.chunk(text)
+    assert len(fb_chunks) > 0
+    fb = fb_chunks[0]
+
+    required_attrs = [
+        "text", "token_count", "char_start", "char_end",
+        "section_title", "chunk_index", "total_chunks", "metadata",
+    ]
+    for attr in required_attrs:
+        assert hasattr(fb, attr), f"Fallback Chunk missing attribute: {attr}"
+
+    if rust_chunker is not None:
+        rust_chunks = rust_chunker.chunk(text)
+        assert len(rust_chunks) > 0
+        rc = rust_chunks[0]
+        for attr in required_attrs:
+            assert hasattr(rc, attr), f"Rust Chunk missing attribute: {attr}"
+
+
+def test_rust_available_flag_bool():
+    assert isinstance(_RUST_AVAILABLE, bool)
+
+
+# ---------------------------------------------------------------------------
+# Large document stress test
+# ---------------------------------------------------------------------------
+
+def test_large_document_pipeline():
+    """Normalise and chunk a large synthetic document without errors."""
+    # Build a ~50-section synthetic document
+    sections = []
+    for i in range(50):
+        sections.append(f"SECTION {i + 1}: TOPIC {i + 1}\n")
+        sections.append(
+            f"This is the body of section {i + 1}. " * 10 + "\n"
+        )
+    large_doc = "\n".join(sections)
+
+    norm = TextNormalizer(collapse_whitespace=True)
+    chunker = TextChunker(max_tokens=200, min_tokens=10)
+
+    clean = norm.normalize(large_doc)
+    chunks = chunker.chunk(clean)
+
+    assert len(chunks) > 1
+    # Indices must be gapless
+    assert [c.chunk_index for c in chunks] == list(range(len(chunks)))
+    # All chunks must have positive token counts
+    assert all(c.token_count > 0 for c in chunks)
+
+
+# ---------------------------------------------------------------------------
+# OCR repair + chunking
+# ---------------------------------------------------------------------------
+
+def test_ocr_repair_then_chunk():
+    norm = TextNormalizer(repair_ocr=True, collapse_whitespace=True)
+    chunker = TextChunker(max_tokens=500)
+
+    clean = norm.normalize(OCR_TEXT)
+    chunks = chunker.chunk(clean)
+
+    assert len(chunks) >= 1
+    all_text = " ".join(c.text for c in chunks)
+    # OCR repair should have fixed "diagrnosis" → "diagnosis" (rn→m between lowercase)
+    assert "diagnosis" in all_text or "diagmosis" in all_text  # partial fix is ok
diff --git a/tests/test_token_counter.py b/tests/test_token_counter.py
new file mode 100644
index 0000000..5e697d2
--- /dev/null
+++ b/tests/test_token_counter.py
@@ -0,0 +1,192 @@
+"""
+Tests for TokenCounter (Rust and Python fallback paths).
+"""
+
+import pytest
+
+from TextSpitter import _RUST_AVAILABLE
+from TextSpitter import TokenCounter as RustCounter
+from TextSpitter._fallback import TokenCounter as FallbackCounter
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+@pytest.fixture(params=["rust", "fallback"])
+def Counter(request):
+    if request.param == "rust":
+        if not _RUST_AVAILABLE:
+            pytest.skip("Rust extension not available")
+        return RustCounter
+    return FallbackCounter
+
+
+# ---------------------------------------------------------------------------
+# Construction
+# ---------------------------------------------------------------------------
+
+def test_default_model(Counter):
+    c = Counter()
+    assert c is not None
+
+
+def test_unknown_model_raises(Counter):
+    with pytest.raises((ValueError, Exception)):
+        Counter(model="this-model-does-not-exist-xyz")
+
+
+# ---------------------------------------------------------------------------
+# count()
+# ---------------------------------------------------------------------------
+
+def test_count_returns_int(Counter):
+    c = Counter()
+    assert isinstance(c.count("hello"), int)
+
+
+def test_count_empty_string(Counter):
+    c = Counter()
+    assert c.count("") == 0
+
+
+def test_count_known_value(Counter):
+    # cl100k_base: "Hello, world!" → 4 tokens
+    c = Counter()
+    assert c.count("Hello, world!") == 4
+
+
+def test_count_longer_text(Counter):
+    c = Counter()
+    n = c.count("The quick brown fox jumps over the lazy dog.")
+    assert n > 0
+
+
+def test_count_is_positive(Counter):
+    c = Counter()
+    assert c.count("some text here") > 0
+
+
+# ---------------------------------------------------------------------------
+# count_batch()
+# ---------------------------------------------------------------------------
+
+def test_count_batch_returns_list(Counter):
+    c = Counter()
+    result = c.count_batch(["hello", "world"])
+    assert isinstance(result, list)
+
+
+def test_count_batch_empty(Counter):
+    c = Counter()
+    assert c.count_batch([]) == []
+
+
+def test_count_batch_matches_singles(Counter):
+    c = Counter()
+    texts = ["Hello, world!", "foo bar baz", ""]
+    batch = c.count_batch(texts)
+    singles = [c.count(t) for t in texts]
+    assert batch == singles
+
+
+def test_count_batch_large(Counter):
+    c = Counter()
+    texts = [f"word number {i}" for i in range(100)]
+    results = c.count_batch(texts)
+    assert len(results) == 100
+    assert all(isinstance(n, int) and n > 0 for n in results)
+
+
+# ---------------------------------------------------------------------------
+# truncate() — strategy: "end"
+# ---------------------------------------------------------------------------
+
+def test_truncate_end_returns_str(Counter):
+    c = Counter()
+    result = c.truncate("hello world", max_tokens=10)
+    assert isinstance(result, str)
+
+
+def test_truncate_end_no_op_when_under_limit(Counter):
+    c = Counter()
+    text = "hello"
+    result = c.truncate(text, max_tokens=100, strategy="end")
+    assert result == text
+
+
+def test_truncate_end_respects_limit(Counter):
+    c = Counter()
+    text = " ".join([f"word{i}" for i in range(50)])
+    result = c.truncate(text, max_tokens=10, strategy="end")
+    assert c.count(result) <= 10
+
+
+def test_truncate_end_preserves_start(Counter):
+    c = Counter()
+    text = "alpha beta gamma delta epsilon zeta eta theta iota kappa"
+    result = c.truncate(text, max_tokens=3, strategy="end")
+    # The first tokens should be kept
+    assert result.startswith("alpha")
+
+
+# ---------------------------------------------------------------------------
+# truncate() — strategy: "middle"
+# ---------------------------------------------------------------------------
+
+def test_truncate_middle_respects_limit(Counter):
+    c = Counter()
+    text = " ".join([f"word{i}" for i in range(50)])
+    result = c.truncate(text, max_tokens=10, strategy="middle")
+    assert c.count(result) <= 10
+
+
+def test_truncate_middle_preserves_start_and_end(Counter):
+    c = Counter()
+    # Build a 20-token text; truncate to 6 — should keep start and end tokens
+    words = [f"w{i}" for i in range(20)]
+    text = " ".join(words)
+    result = c.truncate(text, max_tokens=6, strategy="middle")
+    # The very first word and very last word should survive
+    assert "w0" in result
+    assert "w19" in result
+
+
+# ---------------------------------------------------------------------------
+# truncate() — strategy: "smart"
+# ---------------------------------------------------------------------------
+
+def test_truncate_smart_respects_limit(Counter):
+    c = Counter()
+    text = " ".join([f"word{i}" for i in range(50)])
+    result = c.truncate(text, max_tokens=8, strategy="smart")
+    assert c.count(result) <= 8
+
+
+def test_truncate_smart_returns_nonempty(Counter):
+    c = Counter()
+    text = "one two three four five six seven eight"
+    result = c.truncate(text, max_tokens=4, strategy="smart")
+    assert len(result) > 0
+
+
+# ---------------------------------------------------------------------------
+# Alternative models (Rust path only — fallback may not have tiktoken)
+# ---------------------------------------------------------------------------
+
+def test_o200k_base_model():
+    if not _RUST_AVAILABLE:
+        pytest.skip("Rust extension not available")
+    c = RustCounter(model="o200k_base")
+    n = c.count("Hello, world!")
+    assert n > 0
+
+
+def test_cl100k_base_count_batch_parallel():
+    """Smoke-test that GIL-released batch doesn't deadlock or corrupt."""
+    if not _RUST_AVAILABLE:
+        pytest.skip("Rust extension not available")
+    c = RustCounter(model="cl100k_base")
+    texts = ["sentence number " + str(i) for i in range(500)]
+    results = c.count_batch(texts)
+    assert len(results) == 500
+    assert all(n > 0 for n in results)
diff --git a/uv.lock b/uv.lock
index fe73de1..aebad9d 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1202,6 +1202,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/af/33/ee4519fa02ed11a94aef9559552f3b17bb863f2ecfe1a35dc7f548cde231/matplotlib_inline-0.2.1-py3-none-any.whl", hash = "sha256:d56ce5156ba6085e00a9d54fead6ed29a9c47e215cd1bba2e976ef39f5710a76", size = 9516, upload-time = "2025-10-23T09:00:20.675Z" },
 ]
 
+[[package]]
+name = "maturin"
+version = "1.14.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a7/d0/b7c8b7778cc44df3efbc96eb23acaa995e06ea1a60eb9b02f29858fcbd08/maturin-1.14.0.tar.gz", hash = "sha256:f7f82a6aca4a6c402bf00b99200be199d4874d04b9b9e74e825726a3478bba7f", size = 367010, upload-time = "2026-06-12T00:13:30.811Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/88/51/49367dcd8f6ec139e69ef0c695c8ff5075223673382101812b4affa53216/maturin-1.14.0-py3-none-linux_armv6l.whl", hash = "sha256:019ea3ec7e71f4c9759a367d4d21022ed5a3a621a2ce123abf3fb114ab3711ca", size = 10204135, upload-time = "2026-06-12T00:13:34.308Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/2a/487ce56c838d25e0ce64350e75ec4e3dc89544c0a6233221c229d6aa1a84/maturin-1.14.0-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:6948a10f5f3470b791f79319be51debdd8bfd1778b36f2409f98e1314bc3859b", size = 19736800, upload-time = "2026-06-12T00:13:40.456Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/a5/12f2efc18f419edce3282a93629cba16278bb502135dac95cd04ef7c2eae/maturin-1.14.0-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:1506e86b1e273a98074a62e281b13f27ac96f8cdef85f7f98d3e3589a9387a23", size = 10201144, upload-time = "2026-06-12T00:13:26.842Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/95/3789e72273fd8bc80c33a11c787634b3251c4989d7a7203a92438836d4ff/maturin-1.14.0-py3-none-manylinux_2_12_i686.manylinux2010_i686.musllinux_1_1_i686.whl", hash = "sha256:df10ce4f7ba97fd3423f624f39b94c888ae3e5b470642a91918e1ccec81282fd", size = 10182394, upload-time = "2026-06-12T00:13:13.693Z" },
+    { url = "https://files.pythonhosted.org/packages/40/79/15957eb4e055597f217e6310963a9c1371372e63c5b4a3e30803365addd2/maturin-1.14.0-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64.whl", hash = "sha256:75bcd4468a7fe597652cc2980c6bb16ce4bb8c411e3eb85dac2c4418cef0e95a", size = 10616603, upload-time = "2026-06-12T00:13:22.795Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/4b/d1822f88cd5e855640f0e10ee00c39b9be614c1ef2f827e9792332d94b9f/maturin-1.14.0-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64.whl", hash = "sha256:2d123337e817f8dfe23755d6760139c01104137bb63e9e20c289c547e25ec857", size = 10075309, upload-time = "2026-06-12T00:13:38.274Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/82/c1b160d2163e8784489285e82a5c811fdcef3e0704e35b34c1cfe1828de3/maturin-1.14.0-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.musllinux_1_1_armv7l.whl", hash = "sha256:107f84110d890090a01bb1ecd01761fdfae925c23c659ba492c9b83dd179eab4", size = 10024058, upload-time = "2026-06-12T00:13:16.49Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/e8/88a9d1872997d4535af10ebe79f550e834880bf613cf8e50b50d2d938e3b/maturin-1.14.0-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.musllinux_1_1_ppc64le.whl", hash = "sha256:9a84277aa907961cd47ad26fef1539e79efa30611972eaf7499606e773e991b2", size = 13302073, upload-time = "2026-06-12T00:13:29.027Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/13/3f6d28bb7b744558b9bc78c995c1855d7e5ff21ad475f46d9de5c3dab039/maturin-1.14.0-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:095714b2a904927e3c868a1c5d078257ff0443c5049f7623777352966768306e", size = 10863616, upload-time = "2026-06-12T00:13:32.191Z" },
+    { url = "https://files.pythonhosted.org/packages/24/06/39352d2b402efa3a7dd01d4ed197b301ea35eec10208ba2b8c649101f4df/maturin-1.14.0-py3-none-manylinux_2_31_riscv64.musllinux_1_1_riscv64.whl", hash = "sha256:20229d332f87166b930e4ca07cdbee8a1726f2eea87a337610aa25bba3ddf4b4", size = 10399943, upload-time = "2026-06-12T00:13:36.273Z" },
+    { url = "https://files.pythonhosted.org/packages/58/77/641504541336240fef3836b2d15a785eaeb33c941fb118513c267dd70840/maturin-1.14.0-py3-none-win32.whl", hash = "sha256:4ba1e3c3f33609f461d587b7549104c81a15fd6d42ba63a73cea9376a1e9876e", size = 8905117, upload-time = "2026-06-12T00:13:18.38Z" },
+    { url = "https://files.pythonhosted.org/packages/02/4a/ca247a0c43069b2f48cf783c5b13c3a9eb92c8f596dc7fbdb9f75fea4414/maturin-1.14.0-py3-none-win_amd64.whl", hash = "sha256:cb09a313f097adeb4dda0082277871a28d1bd26615dbadab42e6234b6df6fe69", size = 10309099, upload-time = "2026-06-12T00:13:20.523Z" },
+    { url = "https://files.pythonhosted.org/packages/8b/a4/f14a3f6086cc3caaa90d12e832e4aa41de771c310041959f0d35dd4efe17/maturin-1.14.0-py3-none-win_arm64.whl", hash = "sha256:8c1a8188195f5b6ce1aab99ae2d92e342900298f901456b43ca028947fd3b288", size = 9719100, upload-time = "2026-06-12T00:13:24.741Z" },
+]
+
 [[package]]
 name = "mdurl"
 version = "0.1.2"
@@ -1832,6 +1853,94 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" },
 ]
 
+[[package]]
+name = "regex"
+version = "2026.5.9"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/dc/0e/49aee608ad09480e7fd276898c99ec6192985fa331abe4eb3a986094490b/regex-2026.5.9.tar.gz", hash = "sha256:a8234aa23ec39894bfe4a3f1b85616a7032481964a13ac6fc9f10de4f6fca270", size = 416074, upload-time = "2026-05-09T23:15:19.37Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/50/9b/6550044bc44e17c84d312c031c2ec42fbdb6a4ec4e29093be3a172d08772/regex-2026.5.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:57eeeb05db7979413dec5438f2db21d7ecbba787cde7a711df1a6f6df672aa06", size = 490451, upload-time = "2026-05-09T23:12:34.72Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/95/fc7ba4303b5a0f92446a12ee6778ef2c6c799233f5060042a31bf390cfe9/regex-2026.5.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:398c521292f4c7fb807001dcd54694d3a1fcafc179a36ad9cc56f98df85930b6", size = 292112, upload-time = "2026-05-09T23:12:36.285Z" },
+    { url = "https://files.pythonhosted.org/packages/54/4b/ee27938d1b2c443e89a9a10e00d2d19aa5ee300cd3d61140644e93bb083e/regex-2026.5.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f7a7c26137296beba7784de6eba69c6a93a63ccebc385e4962fe67e267a91225", size = 289599, upload-time = "2026-05-09T23:12:38.089Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/dd/ba103dc19614e25f3880800ca67ce093d6e21b325d72b8383c7bf906e9fa/regex-2026.5.9-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6441cc660d76107934a09c22167200839a0e89604a6297f78a974e66e931d2c0", size = 796732, upload-time = "2026-05-09T23:12:40.062Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/e7/f035b4fd858b050b0080bf302968dc0f59ba34e391872d54936758e6844e/regex-2026.5.9-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:91328f1c23d47595ca3ef0a7557fa129c5a23404b775c770697d2f35b33e0107", size = 865440, upload-time = "2026-05-09T23:12:42.059Z" },
+    { url = "https://files.pythonhosted.org/packages/0a/51/8cd301ecc899aea28124357f729f4272f44de7806fc7ca02490bfbe253e8/regex-2026.5.9-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:93a7860539414dddaefba2b40f8771765ae17949d4c7182b876ce429e11a8309", size = 912329, upload-time = "2026-05-09T23:12:44.373Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/1e/3fbe2fa1e8cebd62f3bb7d3321cff1640aca2e240b51d9bd624aad949260/regex-2026.5.9-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd2810d22146b6d838acc5ec15602cb6b47920aa4e33015df3868eedfd20bab8", size = 801239, upload-time = "2026-05-09T23:12:46.268Z" },
+    { url = "https://files.pythonhosted.org/packages/17/2f/6f6008682bf2cf98040a0d3153a8e557b6ab728d7713d045cee4ce544ab8/regex-2026.5.9-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:daff2bdbaf1d23e52fdff7c0b7bc2048b68f978df6a4d107ac981f94caef2e66", size = 777054, upload-time = "2026-05-09T23:12:48.051Z" },
+    { url = "https://files.pythonhosted.org/packages/19/2b/eee0d20a6842ba04df4b8847a920b57ef56853f14ef85405473e586b605a/regex-2026.5.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4eeb011098fcb77af513dcef521a3dbecbf8849b1e38940759d293b7a93f5026", size = 785098, upload-time = "2026-05-09T23:12:49.851Z" },
+    { url = "https://files.pythonhosted.org/packages/4a/98/6fc1e6410feefb92159edaed5041992bfe390e8d26c721865434acbca558/regex-2026.5.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ea9c8ecfa1b73c73b626534d6626e5340d429630943672b8480724f44e84b962", size = 860095, upload-time = "2026-05-09T23:12:51.666Z" },
+    { url = "https://files.pythonhosted.org/packages/18/a3/bd855e0f2cb1a978ecf6fa6bb69632dd9c3f6ea3b81cde62fde14c9daec7/regex-2026.5.9-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:cd2846168eb9ee3c513902bc8225409cb1caab31d04728b145171fa1625d9621", size = 765762, upload-time = "2026-05-09T23:12:53.413Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/66/0ae8c092e60b14c79d24f8e0b7f0aea5bfbffdcab00b5483d13404d3c3a5/regex-2026.5.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:39617fb0cde9c0e6306dc70e3bfc096f3da793219879f7ae7aa341a69fbdcf6d", size = 852100, upload-time = "2026-05-09T23:12:55.256Z" },
+    { url = "https://files.pythonhosted.org/packages/21/de/8dfde60fc1b21c946a893ba273403b72617edb261370cb1087099a83f088/regex-2026.5.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fd03c4f0e33280d15cae17159b899245d6b7c53d21def19b263b39655061f5ce", size = 789479, upload-time = "2026-05-09T23:12:57.573Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/1c/bdcc98f9a4af4fdd166c74941174619ccff4726d3ce32faa8e9a2ecd38dd/regex-2026.5.9-cp312-cp312-win32.whl", hash = "sha256:164eba9b755ea6f244b0d881196fbc1fac09714e9782c9e2732b813142033c8e", size = 266699, upload-time = "2026-05-09T23:12:59.14Z" },
+    { url = "https://files.pythonhosted.org/packages/78/87/240d36864f9e48ace85f72e79ced97ceb7f27ce87739a947dcb834b4e6bc/regex-2026.5.9-cp312-cp312-win_amd64.whl", hash = "sha256:86f40a5d6444db30a125c9c9177e6b25dad981cbc37451fd838f145e6edac92e", size = 277783, upload-time = "2026-05-09T23:13:00.789Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/b5/7b30f312b0669dff5beebe5b0989dc2d1a312b1a44fab852199c387a5b96/regex-2026.5.9-cp312-cp312-win_arm64.whl", hash = "sha256:96f5f58b54a063d7ea9dca08e1cf57bfe10499c4d579ee672da284f57f5f0070", size = 270513, upload-time = "2026-05-09T23:13:02.426Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/da/797e91ecec6f84135da778ddce78c20e0af5d2a15c26f87a81bc3eadb6db/regex-2026.5.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:d626b84406444b165fc0ba981604edea39f0588ff1f92baa23fe50799ea9afdb", size = 490303, upload-time = "2026-05-09T23:13:04.382Z" },
+    { url = "https://files.pythonhosted.org/packages/44/da/bf30abaaa737b58f4a4b8c4a03659e02fd92092c822e0197ed9e0daab917/regex-2026.5.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d7bdc0ab8f3dd7e1b4f9ab88634e13374669db86bb3c72e8292f07ae313f539f", size = 292019, upload-time = "2026-05-09T23:13:06.022Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/e7/d0eaf5713828417b9e5648cf81fa9bacd4961f6ab98c380c2034f8716e35/regex-2026.5.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a8820737949116ffff55fe18f9fc644530063ba6ebfcb8314239416e78f1347c", size = 289468, upload-time = "2026-05-09T23:13:08.214Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/9b/b3fdd62b003baa1a9b593cd8c8699c9651c2e80cc21a5c715707983c42d7/regex-2026.5.9-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa0fbdbac82cb3e4450d0ccde7d7a35607f4cb2dd9fba4b8b69bfaf8c9fa6aed", size = 796749, upload-time = "2026-05-09T23:13:10.573Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/30/66ab84588765f5b4b271a9ca09ef7ce2b87caa95176ec3d2ad65d7bc4902/regex-2026.5.9-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:57e8915c7986aa33d25e4d3629cef711cd2863f2961b10409f0c04cb8b7d9020", size = 865445, upload-time = "2026-05-09T23:13:12.523Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/89/f05169e8588aac365f35ffc7f3bc3184f095ef4cfded7cfaa3c7fd5dbd89/regex-2026.5.9-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508f56a89ba9cb26e4168cbc37dbd60a28d82430a9e18ad1d25fe0883c314ca2", size = 912322, upload-time = "2026-05-09T23:13:14.281Z" },
+    { url = "https://files.pythonhosted.org/packages/30/e1/c93444052cf41581f3c884ab3fb5823daf0992f11cd4388d4275ca610558/regex-2026.5.9-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6d189041f15691cfa2b6c4290448ec221244d225b3f5fe9e7771b34ffcdf6e2", size = 801269, upload-time = "2026-05-09T23:13:16.569Z" },
+    { url = "https://files.pythonhosted.org/packages/50/fe/0cf96b882f540e62e8b9956599798203d599c44cf4c77917ca27400ff69b/regex-2026.5.9-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e82db382b44d0111b22601c509c89f64434816c9e0eef9d1989cda8cc6ff1c04", size = 777085, upload-time = "2026-05-09T23:13:18.675Z" },
+    { url = "https://files.pythonhosted.org/packages/23/5c/d78d4924e7fc875557b9e9b768423925fdfaac5549d06da7810019a9bd26/regex-2026.5.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2acfb48634f64996b57f90f39afa692ff362162722581921fe92239a59960f3c", size = 785153, upload-time = "2026-05-09T23:13:20.525Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e0/5214774090e7b4524dcea3e3c4aa74141d43043f8beb49c1599db1c8b53a/regex-2026.5.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d29eebfc9525db68cad3c97eedd7f754fa265aa5cd0cf4f863b2421e1b48fc9f", size = 860164, upload-time = "2026-05-09T23:13:22.263Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/e1/4a57a83350319b1271f0d7a249b8672513ed928b237a741631270de6caea/regex-2026.5.9-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:debb893095e944091c16e641a6e33c1b0f4cb61ab945ec5afbf53ce7068834d8", size = 765731, upload-time = "2026-05-09T23:13:24.277Z" },
+    { url = "https://files.pythonhosted.org/packages/12/f4/499e74a20c156fc75836ee04a72a38d1a063978f600937f9760467beb1b0/regex-2026.5.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d659eee77986549c9ea45b861c7567e44d6287c3dc9a4565478853f7b9fe2ff6", size = 852062, upload-time = "2026-05-09T23:13:26.125Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/92/7eebc0d0a01e78629695f342ba17e0deaff8fb45e79cc0d7b98287da6e3e/regex-2026.5.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:2efa205e6d98b24d1f3ab395c11aa15cdf10935bca283d0285e0499c284fba21", size = 789577, upload-time = "2026-05-09T23:13:27.814Z" },
+    { url = "https://files.pythonhosted.org/packages/05/a4/018e71f7d2ad48c1ebe6d3ae0026f9b7cb4802fd15c7cc02fdf724355102/regex-2026.5.9-cp313-cp313-win32.whl", hash = "sha256:f3844f134e834076677dd369976e9f5068679fcb8e50102fdf6b7ac96a3ec127", size = 266691, upload-time = "2026-05-09T23:13:29.549Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/1d/861a93719fb9ee7dbfc3761b3797b7a3e112a5d42c6129459d2d741be9b5/regex-2026.5.9-cp313-cp313-win_amd64.whl", hash = "sha256:3527bb4942d2c14552155406cdedd906567456821848aed1cb4933a391bf5eca", size = 277747, upload-time = "2026-05-09T23:13:31.859Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/c6/0a2436ae4da1ba76e51cb98943c6838a9a721faa40ebe2dce07694ae34e3/regex-2026.5.9-cp313-cp313-win_arm64.whl", hash = "sha256:56a33f191f17d8c417f99945ebdc1e691d3af9605d86ec68c7e54a57e3e17af6", size = 270500, upload-time = "2026-05-09T23:13:33.525Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/e9/d21346f7b60ed58789371358ed66b09d00f832e1bd7c06e55d9da5679882/regex-2026.5.9-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:01f28d868834624c934b8d2e0aa1c8341337e37831f4a012f18a5afcba4cbaf3", size = 494172, upload-time = "2026-05-09T23:13:35.935Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/43/fd1177a2032037c681baecdb3422ee4e1424aec4e4f470ef47793d325274/regex-2026.5.9-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:48036f6374aaa79eb3b754ec29c61d1c6b1606749d705a13f8854fa2539671f6", size = 293952, upload-time = "2026-05-09T23:13:38.307Z" },
+    { url = "https://files.pythonhosted.org/packages/f2/7d/9fbf919768368d3f8a4f6c692cf2aa61e482b2b81ec6a298ace4cbf02480/regex-2026.5.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b96350aa424e79d4fd6b567b344dcbe2b2d6bfc48dfe7717587e1fa6d43da6ff", size = 292314, upload-time = "2026-05-09T23:13:40.353Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/6c/e41bfeecb589716843e7c4df09ba46ff2a42961457afece19059d85caeef/regex-2026.5.9-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8f3af7a4903c5c04a11a196a5aa75cdd7dd3f8508132f9fb3259d9f5908e3b88", size = 811681, upload-time = "2026-05-09T23:13:42.543Z" },
+    { url = "https://files.pythonhosted.org/packages/87/83/a5c1c525fba0aa656e88ad0face0b1829788ef4c2fb6b26df58aa1151b84/regex-2026.5.9-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7e87577720152d2caae19fe2baaf1f8d5ca12091e9e229f03915c37d1e4b9178", size = 871135, upload-time = "2026-05-09T23:13:44.326Z" },
+    { url = "https://files.pythonhosted.org/packages/18/d4/80882e799e440dd878b0979cbebf8fa4d54624a332c83037c7a701649e3f/regex-2026.5.9-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c8b9b9d294cfea3cd19c718ade7cc93492b2c4991abd9a68d0b3477ae6d8e100", size = 917265, upload-time = "2026-05-09T23:13:47.295Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/ff/8db60211e2286e396aad7dc7725356c502bff0901ea05bd6cdc2e1a042b9/regex-2026.5.9-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:728d8bfd28a8845c8b6bc5dc7ce010453d206396786c0765c2740cb65f37791e", size = 816311, upload-time = "2026-05-09T23:13:49.885Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/47/742ef579c61730f8d268e5cf1f9ce0e37e2ea041ad0f5644724f2378e463/regex-2026.5.9-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:7e30b874d341fac767d7df5a0870540541c2c054b80cfaac116e8d367a8a7ff2", size = 785498, upload-time = "2026-05-09T23:13:52.25Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/ab/cb0999802dcb0fb95b1ab005e8d4163d8afdd67efc2cb6b6630ac13f8cb1/regex-2026.5.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fd190e88a895a8901325fad284a3f74ea52b1da8525b76cc811fa9b1edf0ce2b", size = 801348, upload-time = "2026-05-09T23:13:54.127Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/62/8ca59a24c55bc34d166eefaf3717bd77772f329fdbf984d86581e0a3571c/regex-2026.5.9-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:8e76e8161ad00694cfce6767d5dea860c6391ac5b83e5c3a39661e696f11fc7e", size = 866493, upload-time = "2026-05-09T23:13:56.067Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/3d/30f2ae62cef3278bb5bb821f467277a55fb73f01032cf85997e15e8289a8/regex-2026.5.9-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:ddda5340e6c01a293027dd46232fa79eaff1b48058ce7a98f572b6445b088041", size = 772811, upload-time = "2026-05-09T23:13:57.867Z" },
+    { url = "https://files.pythonhosted.org/packages/d8/ae/7d2089bcd78ad0c0161bc684339df50032acb438a7bd3305e7ddb1193cec/regex-2026.5.9-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:205109e96b3cf5adf8f4cd62bedde9487feb282b9497a3535451e5a24cd706a0", size = 856584, upload-time = "2026-05-09T23:13:59.679Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/29/92ff47f75990131ea4f24ba17819e5a9d141e10819807e09addd73409af6/regex-2026.5.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dfbe4579b9f08036aa7d101d1835437a20783574ac66327e6b29b4018a138081", size = 803453, upload-time = "2026-05-09T23:14:01.978Z" },
+    { url = "https://files.pythonhosted.org/packages/04/99/eff29f1037dcab36702c9ee5d6858cf1ce2336ea8ea2987f64245b99ea5e/regex-2026.5.9-cp313-cp313t-win32.whl", hash = "sha256:ed2c9e8068b614c574d8d30e543d617cf5379b0535d46f97ef00e904745a08b5", size = 269951, upload-time = "2026-05-09T23:14:03.661Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/9d/8870b8981d27b22cda77bb26a5ac7ebfa9c7d9e0dea195a834a82380e748/regex-2026.5.9-cp313-cp313t-win_amd64.whl", hash = "sha256:b46b0f094dc1d3b90356c85a0bd2c9bafc4a6a190b9d6f8ddd5a033b6e088ed4", size = 281240, upload-time = "2026-05-09T23:14:05.56Z" },
+    { url = "https://files.pythonhosted.org/packages/72/b1/3379415e8f135c13ac551353397cc4fe97b4978f3cac73c5fcbcded548b8/regex-2026.5.9-cp313-cp313t-win_arm64.whl", hash = "sha256:872acc074bd29ffc9913ecdfedf6ea77502312ca44a4aa0d3779089c6069d8de", size = 272383, upload-time = "2026-05-09T23:14:07.843Z" },
+    { url = "https://files.pythonhosted.org/packages/13/3e/9c3cd292d8808b3645a2ce517e200179b6d0e903f176300bd8b542e14de5/regex-2026.5.9-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:1bd7587a2948b4085195d5a3374eaf4a425dc3e55784c038175355ecf3bbbf8a", size = 490376, upload-time = "2026-05-09T23:14:09.64Z" },
+    { url = "https://files.pythonhosted.org/packages/60/70/d43ee8a2ca0a8b68d167f21658b85520ac0574617c7f320367c5047f7556/regex-2026.5.9-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:dea2e88e1cce4522496cce630e11e67b98b7076620bc4336c3f674bc21a375f4", size = 291964, upload-time = "2026-05-09T23:14:11.424Z" },
+    { url = "https://files.pythonhosted.org/packages/21/91/9d50b433828d8e74196904e168a43abf1e6e88b2a15d47ed742456720c37/regex-2026.5.9-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2099f7e7ff7b6aa3192312650a56e91cc091e49d50b04e4f6f8b6e28b3b27f1c", size = 289682, upload-time = "2026-05-09T23:14:13.123Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/d2/b835e3cafbb9d977736912436259ff551d60919f7d7b3d37d46659c63564/regex-2026.5.9-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecd353045824e4477562a2ac718c25799cdaaa41f7aa925a806a8a3e6848a5b9", size = 796996, upload-time = "2026-05-09T23:14:14.923Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/a6/9f992d00019166b9de01c546dd4549bc679f2a68df11b877740b0760b7c2/regex-2026.5.9-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:65c8c8c37377794bd5b2f3ebe51919042bf17aec802e23c833d89782ed0c78af", size = 866089, upload-time = "2026-05-09T23:14:17.757Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/08/4d32af657e049b19cb62b02e46e38fe1518797bfb2203ee93a510b21b0dc/regex-2026.5.9-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5b73ab8afcf66c622db143d1c6fda4e58e4d537ee4f125229ad47b1ab80f34c0", size = 911530, upload-time = "2026-05-09T23:14:20.353Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/27/2af43dd1dc201d1fecefda64a45f4ad0995855b92724f795a777b402ee69/regex-2026.5.9-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0de5cf193997384ed2ca6f1cd4f78055b255d93d82d5a8cd6ba0d11c10b167e4", size = 800643, upload-time = "2026-05-09T23:14:22.265Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/dd/23a249047013b5321d4a60c4d2437462086f601b061776a525e5fba2a59f/regex-2026.5.9-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d641a8c9a61618047796d572a39a79b26167b0411d2c3031937b2fe2d081e2cf", size = 777223, upload-time = "2026-05-09T23:14:24.179Z" },
+    { url = "https://files.pythonhosted.org/packages/94/6a/e85ed9538cd19586d0465076a4578a12e093ce776d15f3f8ce92733a8dd6/regex-2026.5.9-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:24b2355ef5cc9aa5b8f07d17704face1c166fdcc2290fa7bd6e6c925655a8346", size = 785760, upload-time = "2026-05-09T23:14:26.065Z" },
+    { url = "https://files.pythonhosted.org/packages/2a/c4/f25473209438638e947c55f9156fd8f236f74169229028cc99116380868e/regex-2026.5.9-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:a24852d3c29ad9e47593593d8a247c44ccc3d0548ef12c822d6ed0810affe676", size = 860891, upload-time = "2026-05-09T23:14:28.17Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/f7/f4f86e3c74419c37370e91f150ae0c2ef7d34b2e0e4cdd5da046a02e4022/regex-2026.5.9-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:916714069da19329ef7de197dcbc77bb3104145c7c2c864dbfbe318f46b88b14", size = 765891, upload-time = "2026-05-09T23:14:30.06Z" },
+    { url = "https://files.pythonhosted.org/packages/26/70/704d8e13765939146b1cd0ef4e2feb71d7929727d2290f026eed10095955/regex-2026.5.9-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:fa411799ca8da32a8d38d020a88faa5b6f91657d284761352940ecf9f7c3bbdd", size = 851380, upload-time = "2026-05-09T23:14:32.123Z" },
+    { url = "https://files.pythonhosted.org/packages/26/29/1a13582a8460038edc38e49f64ceb0dd7c60f5caba77571f4bf6601965d9/regex-2026.5.9-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1e6da47d679b7010ef27556b6e0f99771b744936db1792a10ceac6547ae1503e", size = 789350, upload-time = "2026-05-09T23:14:34.799Z" },
+    { url = "https://files.pythonhosted.org/packages/73/56/3dcafe34fc72e271d62ad9a291801e88a1457bb251c132f15fcc2e5aad1a/regex-2026.5.9-cp314-cp314-win32.whl", hash = "sha256:98bd73080e8756255137e1bd3f3f00295bbc5aa383c0e0f973920e9134d7c4ad", size = 272130, upload-time = "2026-05-09T23:14:36.729Z" },
+    { url = "https://files.pythonhosted.org/packages/d0/9c/02eebf0be95efe416c664db7fb8b6b05b7a0b06a7544f2884f2558b0526f/regex-2026.5.9-cp314-cp314-win_amd64.whl", hash = "sha256:ff8d372ac2acdc048d1c19916f27ee61bc5722728458ba6ca5052f2c72d51763", size = 280999, upload-time = "2026-05-09T23:14:39.126Z" },
+    { url = "https://files.pythonhosted.org/packages/70/5a/1dd1abee76cb7a846a0bcf42fdc87e5720c3c33c24f3e37814310a513d9f/regex-2026.5.9-cp314-cp314-win_arm64.whl", hash = "sha256:e1d93bf647916292e8edcec150c07ddf3dc50179ccaf770c04a7f9e452155372", size = 273500, upload-time = "2026-05-09T23:14:41.059Z" },
+    { url = "https://files.pythonhosted.org/packages/86/c1/c5f619b0057a7965cb78ec559c1d7a45ce8c99a35bea95483d64959a93d9/regex-2026.5.9-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:83d0ee4a57d1c87cb549e195ec300b8f0ec3a82eba66d835e4e2ed8634fe4499", size = 494269, upload-time = "2026-05-09T23:14:42.869Z" },
+    { url = "https://files.pythonhosted.org/packages/05/2c/5d01f1aee33de4bbe60c8452945bfc8477ca7c5ae4450f6bfe711036cb36/regex-2026.5.9-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:d3d7eb5c9a7f6df82ed3cfac9beb93882a5cbcb5b8b157b56cb2b3b276574ac1", size = 293954, upload-time = "2026-05-09T23:14:44.822Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/fe/e8988b2ae2108c6ef71bd4aa8d87fbe257976dd0810e826cd75f701c68b6/regex-2026.5.9-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:075160bf16658e16d35233300b8453aac25de4cbea808d22348b6979668e924d", size = 292405, upload-time = "2026-05-09T23:14:47.211Z" },
+    { url = "https://files.pythonhosted.org/packages/79/34/d2b0937faa7859263f7f0a3c6b103a1296306be6952dc173d0154e9a2f49/regex-2026.5.9-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:45375819235558a4ff1c4971dc32881f022613abdb180128f5cb4768c1765a1c", size = 811855, upload-time = "2026-05-09T23:14:49.21Z" },
+    { url = "https://files.pythonhosted.org/packages/80/fe/daf53a47457a8486db66c66c01ceb9c2303eecee3f87197f1e77eb1a736d/regex-2026.5.9-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ead4b163ac30a29574510cd4b3e2e985ac5290c05fc7095557d6a5f403fc31b5", size = 871189, upload-time = "2026-05-09T23:14:51.555Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/75/058fc4470cbfbf57d800aff1a0022b929a3f9fa553ee10a0cdf2070eb31f/regex-2026.5.9-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8c6e4218fbdfbcd4f6c19efca40930d24a621bf4b48cb76bc6640543bd28ef20", size = 917485, upload-time = "2026-05-09T23:14:53.633Z" },
+    { url = "https://files.pythonhosted.org/packages/88/e7/179cfda3a28bc843b5c6cfe7f79f23489c791ed95f151083803660878432/regex-2026.5.9-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6351571c8a42b505eb555c0dc47d740d0fb66977dc142919eea6f4325b7c56a0", size = 816369, upload-time = "2026-05-09T23:14:56.198Z" },
+    { url = "https://files.pythonhosted.org/packages/41/90/6f0cc422071688266d344fca8462d787cba0a2c144acb25721f9a61ec265/regex-2026.5.9-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:002205cafd2a9e78c6290c7d1df277bf3277b3b7a30e0b4bb0dac2e2e3f7cb2d", size = 785869, upload-time = "2026-05-09T23:14:58.602Z" },
+    { url = "https://files.pythonhosted.org/packages/02/67/a31f1760f09c27b251ef39e9beb541f462cf977381d067faa764c2c0e393/regex-2026.5.9-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8abd33fef90b2a9efac5557d6033ca82d1195ed3a15fea5af15ba7b463c6a63b", size = 801427, upload-time = "2026-05-09T23:15:00.642Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/c4/1a80654597b6bc1e1ea0494824c31200e8a956abe290afae9b19a166a148/regex-2026.5.9-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:31037c82eccb44b7ea2e9e221d7c01429430e989a1f4b91ea5a855f6017b509a", size = 866482, upload-time = "2026-05-09T23:15:03.384Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/11/960724e06482c08466ff5611e242e86f80062949cdf6b4b9cc317b9dd93d/regex-2026.5.9-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:5604dfd046dc37eca90250fc3be938b076c8059fa772ac0ed6f499b0f0fb0415", size = 773022, upload-time = "2026-05-09T23:15:05.625Z" },
+    { url = "https://files.pythonhosted.org/packages/50/a8/a9979c3e7918280e93159ebcab5ef1a65116dd4f3bd6091be0eae4a126e8/regex-2026.5.9-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:0e1b1b4e496afbb24f4a62aba855ee4f88f25578927697b340702e48c9ee6bc2", size = 856642, upload-time = "2026-05-09T23:15:07.966Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/d4/a9b732f2f0072c0ab12227483abb24fffcb9f73f8a2b203df0a6d0434735/regex-2026.5.9-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:be3372b9df6ddecff6486d37e19095a7b4973137caf5512407a89f4455361f41", size = 803552, upload-time = "2026-05-09T23:15:10.215Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/fe/1b3113817447a1d4155e4ac76d2e072f42c0bcba2f43fa8a0e756ea2cd91/regex-2026.5.9-cp314-cp314t-win32.whl", hash = "sha256:3ddd90103f9e5c471c49c7852ecc1fe27c7e45eb99e977aefe7caa4e779f4f58", size = 275746, upload-time = "2026-05-09T23:15:12.609Z" },
+    { url = "https://files.pythonhosted.org/packages/92/73/93d42045302636c91f2e5ef588b65b84b01428f28ec77de256b1dfdfbe5c/regex-2026.5.9-cp314-cp314t-win_amd64.whl", hash = "sha256:ca518ed29c46eecba6010b15f1b9a479314d2de409536e71b6a13aa04e3b8a77", size = 285685, upload-time = "2026-05-09T23:15:15.086Z" },
+    { url = "https://files.pythonhosted.org/packages/da/80/35b4c33c804a165a7f55289afda3ea9e3eb6d15800341a2d66455c0f1f30/regex-2026.5.9-cp314-cp314t-win_arm64.whl", hash = "sha256:5e41809d2683fcde7d5a8c87a6567ba1fb1ce0de9f31bff578de00a4b2d76daa", size = 275713, upload-time = "2026-05-09T23:15:16.98Z" },
+]
+
 [[package]]
 name = "requests"
 version = "2.32.5"
@@ -2120,6 +2229,7 @@ dev = [
     { name = "jupyterlab" },
     { name = "jupyterlab-code-formatter" },
     { name = "loguru" },
+    { name = "maturin" },
     { name = "pdoc" },
     { name = "prek" },
     { name = "pytest" },
@@ -2127,6 +2237,7 @@ dev = [
     { name = "pytest-lazy-fixtures" },
     { name = "pytest-mock" },
     { name = "ruff" },
+    { name = "tiktoken" },
     { name = "twine" },
     { name = "ty" },
 ]
@@ -2148,6 +2259,7 @@ dev = [
     { name = "jupyterlab" },
     { name = "jupyterlab-code-formatter" },
     { name = "loguru" },
+    { name = "maturin", specifier = ">=1.14.0" },
     { name = "pdoc" },
     { name = "prek" },
     { name = "pytest" },
@@ -2155,10 +2267,58 @@ dev = [
     { name = "pytest-lazy-fixtures" },
     { name = "pytest-mock" },
     { name = "ruff" },
+    { name = "tiktoken" },
     { name = "twine" },
     { name = "ty" },
 ]
 
+[[package]]
+name = "tiktoken"
+version = "0.13.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "regex" },
+    { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e4/e5/5f3cb2159769d0f4324c0e9e87f9de3c4b1cd45848a96b2eb3566ad5ca77/tiktoken-0.13.0.tar.gz", hash = "sha256:c9435714c3a84c2319499de9a300c0e604449dd0799ff246458b3bb6a7f433c1", size = 38986, upload-time = "2026-05-15T04:51:27.153Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/85/8e/144bde4e01df66b34bb865557c7cd754ed08b036217ebd79c9db5e9048a9/tiktoken-0.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:32ac870a806cfb260a02d0cb70426aef02e038297f8ad50df5040bb5af360791", size = 1034888, upload-time = "2026-05-15T04:50:31.579Z" },
+    { url = "https://files.pythonhosted.org/packages/36/18/d4ac9d20956cdebca04841316660ed584c2fecdc2b81722a28bc7ad3b1e4/tiktoken-0.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4d9980f11429ed2d737c463bb1fb78cf330caa026adf002f714aced7849a687b", size = 982970, upload-time = "2026-05-15T04:50:32.961Z" },
+    { url = "https://files.pythonhosted.org/packages/74/ed/6bb8d05b9f731f749fee5c6f5ca63e981143c826a5985877330507bd13b7/tiktoken-0.13.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:3f277ebea5edd7b8bf03c6f9431e1d67d517530115572b2dc1d465326e8f88c7", size = 1115741, upload-time = "2026-05-15T04:50:34.475Z" },
+    { url = "https://files.pythonhosted.org/packages/34/de/2ca96b07a82d972b74fe4b46de055b79c904e45c7eab699354a0bfa697dc/tiktoken-0.13.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:a116178fa7e1b4065bff05214360373a65cac22f965be7b3f73d00a0dbfe7649", size = 1136523, upload-time = "2026-05-15T04:50:35.782Z" },
+    { url = "https://files.pythonhosted.org/packages/ee/dc/9dafec002c2d4424378563cf4cf5c7fb93631d2a55013c8b87554ee4012c/tiktoken-0.13.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2c397ddda233208345b01bd30f2fca79ff730e55731d0108a603f9bc57f6af3b", size = 1181954, upload-time = "2026-05-15T04:50:36.99Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/d0/1f8578c45b2f24759b46f0b50d31878c63c73e6bf0f2227e10ec5c5408dc/tiktoken-0.13.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:95097e4f89b06403976e498abf61a0ee73a7497e73fb599cb211d8197a054d91", size = 1240069, upload-time = "2026-05-15T04:50:38.221Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/90/28d7f154888610aa9237e541986beb62b479df29d193a5a0617dbb1514d0/tiktoken-0.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:8f2d16e7a7c783ad81f36e457d046d1f1c8af70b22aec8a13238efe531977c41", size = 874748, upload-time = "2026-05-15T04:50:39.587Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/83/b096c859c2a47c11731bf2f5885f4028b809dfe2396582883eed9cae372f/tiktoken-0.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5df5d1507bd245f1ccad4a074698240021239e455eb0bb4ced4e3d7181872154", size = 1034228, upload-time = "2026-05-15T04:50:40.988Z" },
+    { url = "https://files.pythonhosted.org/packages/53/61/c68e123b6d753e3fc2751e9b18e732c9d8bf1e1926762e736eee935d931c/tiktoken-0.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8fe806a50664e83a6ffd56cbd1e4f5dcc6cd32a3e7538f70dc38b1a271384545", size = 982978, upload-time = "2026-05-15T04:50:42.195Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/8b/96cc178cc584e65d363134500f297790b06cd48cdeb1e8fcf7bbe60f4715/tiktoken-0.13.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:125bc05005e747f993a83dc67934249932d6e4209854452cd4c0b1d53fba3ba2", size = 1116355, upload-time = "2026-05-15T04:50:43.564Z" },
+    { url = "https://files.pythonhosted.org/packages/86/f5/bab735d2c72ea55404b295d02d092644eb5f7cc6205e34d35eb9abfb9ab2/tiktoken-0.13.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:5e6358911cab4adee6712da27d65573496a4f68cf8a2b5fca6a4ad10fc5748cf", size = 1135772, upload-time = "2026-05-15T04:50:44.782Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/b9/6de04ebdf904edfaad87788011b3735087a0c9ea671b9027e1e4e965e8c8/tiktoken-0.13.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:975cbd78d085d75d26b59660e262736dcaed1e35f8f142cd6291025c01d25486", size = 1182415, upload-time = "2026-05-15T04:50:46.422Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/9c/470a05f3b1caf038f44880e334d47ab674e0c80d514c66b375d14d5afa10/tiktoken-0.13.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:75ab9bc99fa020a4c283424590ecd7f3afd70c1c281cb3fa3192a6c3af9f9615", size = 1239879, upload-time = "2026-05-15T04:50:48.052Z" },
+    { url = "https://files.pythonhosted.org/packages/42/a6/c1936d16055436cb32e6c6128d68629622e00f4768562f55653752d34768/tiktoken-0.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:6b1615f0ff71953d19729ceb18865429c185b0a23c5353f1bbca34a394bf60f7", size = 874829, upload-time = "2026-05-15T04:50:49.202Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/07/acb5992c3772b5a36284f742cfb7a5895aa4471d1848ac31464ad50d7fdf/tiktoken-0.13.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:6eb4a5bfbc6426938026b1a334e898ac53541360d62d8c689870160cc80abd67", size = 1033600, upload-time = "2026-05-15T04:50:50.4Z" },
+    { url = "https://files.pythonhosted.org/packages/14/e9/742e9aec30f59b9f161f7ff7cd072e02ea836c9e1c0854a8076dfcd40d5c/tiktoken-0.13.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:43cee3e5400573b2046fbf092cc7a5bc30164f9e4c95ce20714da929df48737a", size = 982516, upload-time = "2026-05-15T04:50:52.03Z" },
+    { url = "https://files.pythonhosted.org/packages/72/74/ca1541b053e7648254d2e4b42a253e1bb4359f2c91a0a8d49228c794e1a0/tiktoken-0.13.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:7de52e3f566d19b3b11bd37eea552c6c305ad74081f736882bd44d148ed4c48d", size = 1115518, upload-time = "2026-05-15T04:50:53.543Z" },
+    { url = "https://files.pythonhosted.org/packages/46/e3/93825eaf5a4a504795b787e5d5dea07fbeb3dabf97aa7b450be8bde59c89/tiktoken-0.13.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:51384448aa508e4df84c0f7c1dc3211c7f7b8096325660ee5fc82f3e11b381ce", size = 1136867, upload-time = "2026-05-15T04:50:55.191Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/46/002b68de6827091d5ae90b048f326e8aad8d953520950e5ce1508879414f/tiktoken-0.13.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:e28157350f7ebf35008dd8e9e0fdb621f976e4230c881099c85e8cf07eaa50e2", size = 1181826, upload-time = "2026-05-15T04:50:56.296Z" },
+    { url = "https://files.pythonhosted.org/packages/db/c6/d393e3185a276505182f7abd93fe714f3c444a2be9180798fa052347504e/tiktoken-0.13.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:165cf1820ea4a354985c2490a5205d4cc74661c934aca79dd0368232fff94e0f", size = 1239489, upload-time = "2026-05-15T04:50:57.918Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/4d/bc07d1f1635d4897a202acc0ae11c2886eaa7325c359ba4741b47bf8e225/tiktoken-0.13.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6c43a675ca14f6f2749ba7f12075d37456015a24b859f2517b9beb4ef30807ec", size = 873820, upload-time = "2026-05-15T04:50:59.528Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/93/0dd6adca026a616c3a92974566b43381eea4b475ce1f36c062b8271a9ac5/tiktoken-0.13.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaaaef47c2406277181d2086484c317bf7fc433e2d5d03ff94f56b0dcec87471", size = 1034977, upload-time = "2026-05-15T04:51:00.957Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/77/5ec6e6bc5b30bed6d93f7f2162d8f6b32437b3ba27cb527cfe004f6109c9/tiktoken-0.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ca8b310bd93b3772cb1b7922d915446864860f562bdfe4825c63a0aed3fb28cd", size = 983635, upload-time = "2026-05-15T04:51:02.629Z" },
+    { url = "https://files.pythonhosted.org/packages/94/b0/c8ae9aff00d625c50659b4513e707a0462c4bf5d4d6cc1b802103225c02e/tiktoken-0.13.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:32e0c12305105002c047b3bb1070b0dd9a73b0cb3b2856a8972b810e7a4f5881", size = 1116036, upload-time = "2026-05-15T04:51:04.082Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/ac/6a5dddd1d0a6018ecb389bd0353e6b4a515eb4d2286611bd0ace1937b9e1/tiktoken-0.13.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:5ba5fd62507a932d1241346179e3b39bc7bf7408f03c272652d93b3bedf5db24", size = 1135544, upload-time = "2026-05-15T04:51:05.229Z" },
+    { url = "https://files.pythonhosted.org/packages/f4/b8/585032b4384b2f7dcdaddcb52865c83a701a420d09e3c2b4a2be1c450c57/tiktoken-0.13.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:d108bc2d470fc53c8ecd24f2c0fd2b5f98c33e87cdb6aa2e9b8c5dced703d273", size = 1182217, upload-time = "2026-05-15T04:51:06.517Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/b6/993ff1ded3958215fd341a847b8e5ffeb5de473f435296870d314fc91ac4/tiktoken-0.13.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:cb99cb5127449f58d0a2d5f5ccfb390d8dbdfd919c221246caaee29d8725ed51", size = 1239404, upload-time = "2026-05-15T04:51:07.843Z" },
+    { url = "https://files.pythonhosted.org/packages/dd/3d/fef7e06e3b33e7538db0ced734cf9fe23b6832d2ac4990c119c377aec55e/tiktoken-0.13.0-cp314-cp314-win_amd64.whl", hash = "sha256:115c4f26ffa11caac8b54eea35c2ad38c612c20a48d35dd15d70a02ac6f51f58", size = 918686, upload-time = "2026-05-15T04:51:08.925Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/82/a7fc44582bc32ab00de988a2299bf77c077f59068b233109e34b7d6ca7e6/tiktoken-0.13.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:472527e9132952f2fbf77cd290658bacf003d4d5a3fabc18e5fbd407cbae4d9b", size = 1034454, upload-time = "2026-05-15T04:51:10.035Z" },
+    { url = "https://files.pythonhosted.org/packages/37/d0/24d8a890c14f432a05cea669c17bebeaa99f96a7c79523b590f564246411/tiktoken-0.13.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4e2f67d27c9626cdd25fe33d9313c5cdb3d8d82da646b68d6eb8e7e9c20e6448", size = 982976, upload-time = "2026-05-15T04:51:11.23Z" },
+    { url = "https://files.pythonhosted.org/packages/49/b7/2ab43f62788a9266187a9bfc1d3af99ad83e5eaa25fbef168a69cd5ad14f/tiktoken-0.13.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:2b920b35805cd64585a37c3dc7ce65fba4d2d36016be01e1d7942482ca29093a", size = 1115526, upload-time = "2026-05-15T04:51:12.608Z" },
+    { url = "https://files.pythonhosted.org/packages/64/39/1494321ed323ce7a14d88e3cd6cb9058625977df1c6961ddc492bd10a9f3/tiktoken-0.13.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:493af3aa28a4aaf2e3d2600a2ee717252c9bf5ab38fff94eb5a02db5ab77e5ad", size = 1136466, upload-time = "2026-05-15T04:51:13.926Z" },
+    { url = "https://files.pythonhosted.org/packages/96/d9/dfd086aa2d918c563a140720e0ce296cada1634efd2783d5cf51e05f984e/tiktoken-0.13.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:6644c9c2b5cf3916f5a3641d7d12fdb3f006a7b3d9ff6acdaec44e29ab1ff91e", size = 1181863, upload-time = "2026-05-15T04:51:15.025Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/68/a18b4f307086954fdae32714cb4f85562e34f9d34ab206e61f1816aa6018/tiktoken-0.13.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5cb65b60b9408563676d874a3a4ee573370066f0dc4e29d84e82e989c6517424", size = 1239218, upload-time = "2026-05-15T04:51:16.103Z" },
+    { url = "https://files.pythonhosted.org/packages/16/5b/f2aa703a4fc5d2dff73460a7d46cc2f3f44aa0f3dd8eeb20d2a0ecf68862/tiktoken-0.13.0-cp314-cp314t-win_amd64.whl", hash = "sha256:85b78cc3a2c3d48723ca751fa981f1fedccd54194ca0471b957364353a898b07", size = 918110, upload-time = "2026-05-15T04:51:17.237Z" },
+]
+
 [[package]]
 name = "tinycss2"
 version = "1.4.0"