securelayer7
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 39 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 109 additions & 0 deletions b/‎.github/workflows/release.yml‎
Lines changed: 109 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 66 additions & 0 deletions b/‎.gitignore‎
Lines changed: 66 additions & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 49 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎LICENSE‎
Lines changed: 21 additions & 0 deletions b/‎LICENSE‎
Lines changed: 21 additions & 0 deletions
@@ -0,0 +1,39 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+
+jobs:
+  build-test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        node: [20, 22]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: ${{ matrix.node }}
+          cache: npm
+      - run: npm ci
+      - run: npm run typecheck
+      - run: npm test
+      - run: npm run build
+
+  bench:
+    runs-on: ubuntu-latest
+    needs: build-test
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: npm
+      - run: npm ci
+      - run: npm run build
+      - name: Verify model checksum
+        run: cd models/l5e && sha256sum -c SHA256SUMS
+      - name: Run public benchmark
+        run: node scripts/bench.mjs
@@ -0,0 +1,109 @@
+name: Release
+
+on:
+  push:
+    tags:
+      - 'v*'
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: 'Release tag (e.g. v0.0.1)'
+        required: true
+
+permissions:
+  contents: write      # GitHub Release upload
+  id-token: write      # cosign keyless + npm provenance + SLSA attestation
+  attestations: write  # SLSA build provenance
+
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          registry-url: 'https://registry.npmjs.org'
+          cache: npm
+
+      - run: npm ci
+      - run: npm run typecheck
+      - run: npm test
+      - run: npm run build
+
+      # ---------- Model artifact tarball ----------
+      - name: Build model tarball
+        run: |
+          tar -czf promptpurify-model.tar.gz \
+            models/l5e/model.int8.onnx \
+            models/l5e/vocab.txt \
+            models/l5e/l5e.json \
+            models/l5e/SHA256SUMS
+          sha256sum promptpurify-model.tar.gz > promptpurify-model.tar.gz.sha256
+
+      - name: Verify model SHA256SUMS
+        run: cd models/l5e && sha256sum -c SHA256SUMS
+
+      # ---------- cosign keyless signature ----------
+      - uses: sigstore/cosign-installer@v3
+
+      - name: cosign-sign model tarball
+        run: |
+          cosign sign-blob --yes \
+            --bundle promptpurify-model.tar.gz.cosign.bundle \
+            promptpurify-model.tar.gz
+
+      # ---------- SLSA build provenance ----------
+      - uses: actions/attest-build-provenance@v1
+        with:
+          subject-path: |
+            promptpurify-model.tar.gz
+            dist/**/*
+
+      # ---------- SBOM ----------
+      - name: Generate CycloneDX SBOM
+        run: npx --yes @cyclonedx/cyclonedx-npm --output-file SBOM.cdx.json --output-format JSON
+
+      # ---------- GitHub Release ----------
+      - name: Upload release artifacts
+        uses: softprops/action-gh-release@v2
+        with:
+          files: |
+            promptpurify-model.tar.gz
+            promptpurify-model.tar.gz.sha256
+            promptpurify-model.tar.gz.cosign.bundle
+            SBOM.cdx.json
+          generate_release_notes: true
+
+      # ---------- npm publish with provenance ----------
+      - name: Publish to npm with provenance
+        run: npm publish --provenance --access public
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+
+      # ---------- Hugging Face Hub mirror ----------
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Push to Securelayer7/promptpurify on HF Hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          pip install --quiet huggingface_hub
+          python - <<'PY'
+          import os, shutil
+          from huggingface_hub import HfApi
+          repo = "Securelayer7/promptpurify"
+          api = HfApi(token=os.environ["HF_TOKEN"])
+          api.create_repo(repo, repo_type="model", exist_ok=True)
+          # MODEL_CARD.md becomes the HF README (has the YAML frontmatter HF needs)
+          shutil.copy("MODEL_CARD.md", "models/l5e/README.md")
+          api.upload_folder(
+              repo_id=repo,
+              folder_path="models/l5e",
+              path_in_repo=".",
+              commit_message=f"release {os.environ.get('GITHUB_REF_NAME', 'manual')}",
+              allow_patterns=["model.int8.onnx", "vocab.txt", "l5e.json", "SHA256SUMS", "README.md"],
+          )
+          PY
@@ -0,0 +1,66 @@
+node_modules/
+dist/
+*.log
+.DS_Store
+coverage/
+*.tgz
+.vitest/
+.env
+.env.*
+# EVAL-ONLY adversarial benchmark: github.com/elder-plinius (Pliny/BASI) raw
+# jailbreak + system-prompt-leak payloads. AGPL-3.0 / unlicensed — NOT
+# permissively licensed, so NEVER trained, NEVER shipped, NEVER redistributed.
+# Pulled locally only for measuring detector recall (training/eval_plinius.mjs).
+# Reproducible from training/fetch_plinius_eval.py. Only the survey table,
+# license inventory, harness + PLINIUS_BENCH.md are committed — never the raw.
+training/.eval_cache/
+# OOD eval raw datasets — third-party, not redistributed (size/licensing)
+training/.ood_cache/
+# V3 real-data retrain raw pool + held-out benchmark — third-party, not
+# redistributed (size/licensing). Splits derived deterministically downstream.
+training/.real_cache/
+# Versioned per-experiment train.jsonl dirs (V32/V33/V34/V35 derived data)
+training/.real_cache_v*/
+training/.real_cache_th/
+# Versioned model artifact export dirs (l5e_v33, l5e_v35 — large ONNX)
+models/l5e_v*/
+# ONNX export temp blobs / shape-inference scratch
+*.data
+sym_shape_infer_temp.onnx
+# fp32 distill intermediate (export_onnx.py strips it; never committed)
+models/l5b/_student_fp32/
+# STAGE-5 strictly-from-scratch L5c artifact: opt-in, npm-excluded
+# (files:["dist"]), large ONNX — gitignored from the shipped path exactly
+# like models/l5b. Reproducible from training/train_scratch.py (seed 1337).
+models/l5c/
+# STAGE-7 "intelligent" L5d artifact (fine-tuned Apache-2.0 distil-mBERT):
+# opt-in, npm-excluded (files:["dist"]), large INT8 ONNX — gitignored from
+# the shipped path exactly like models/l5b, l5c. Reproducible from
+# training/train_intelligent.py + export_intelligent.py (seed 1337).
+models/l5d/
+# STAGE-8 OUR-OWN pretrained backbone: sampled open pretrain corpus
+# (permissive third-party, not redistributed — size/licensing) + the
+# resulting L5e artifact (opt-in, npm-excluded, large INT8 ONNX). Both
+# gitignored from the shipped path exactly like .real_cache / models/l5d.
+# Reproducible from training/pretrain.py + train_intelligent.py
+# + export_intelligent.py (seed 1337).
+training/.pretrain_cache/
+# models/l5e/ — public release shipped: model.int8.onnx, vocab.txt,
+# l5e.json, SHA256SUMS. Ignore everything else under it (training
+# intermediates: _corpus/, _pretrained/, _hf_fp32_*, *.bak, etc).
+models/l5e/*
+!models/l5e/model.int8.onnx
+!models/l5e/vocab.txt
+!models/l5e/l5e.json
+!models/l5e/SHA256SUMS
+# isolated offline training venv (stable pinned CPU stack; never shipped)
+training/.venv/
+# python bytecode cache (training scripts; never shipped)
+training/__pycache__/
+**/__pycache__/
+
+# Session-local — claude state, screenshots, big intermediates
+.claude/
+.tmp/
+training/.real_cache_th/
+examples/sample-app/public/hero.png
@@ -0,0 +1,49 @@
+# Changelog
+
+All notable changes to promptpurify will be documented here.
+Format: [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) +
+[SemVer](https://semver.org).
+
+## [Unreleased]
+
+## [0.0.1] — 2026-05-29
+
+First public release.
+
+### Added
+
+- **promptpurify model** (13.9 MB INT8 ONNX, 4-layer transformer
+  encoder, ~13.7M params, trained from scratch by SecureLayer7).
+- **Structural firewall** — Unicode normalization, role-fenced messages,
+  sink-aware policy, tripwire regex. Deterministic, zero-dep, idempotent.
+- **`buildMessages()`** for role-separated, nonce-fenced LLM calls.
+- **`purifyOutput()`** for markdown-image / tracking-link exfil stripping.
+- **`createL5eRunner()`** ONNX runner with sliding-window inference
+  (500 chars wide, 250 stride) and `[UNTRUSTED]` marker invariance.
+- **Browser bundle** — 7 KB IIFE, structural-only, no ONNX bytes.
+- **`scripts/bench.mjs`** — public benchmark script with `--threshold`,
+  `--behavior`, `--by-behavior` flags.
+- **Documentation** — README, QUICKSTART, HOW-IT-WORKS, BENCHMARKS,
+  SAMPLE-DATA, REPRODUCE, HONEST-LIMITS, MODEL_CARD, SECURITY.
+- **Sample frozen eval** — 922-row scored JSONL (`training/FROZEN_EVAL_SCORED.jsonl`).
+- **Supply-chain hygiene** — `models/l5e/SHA256SUMS`, cosign-signed
+  release tarball, `npm publish --provenance`, CycloneDX SBOM.
+- **Customer-support sample app** — `examples/customer-support/`,
+  drop-in fintech chatbot with promptpurify guard.
+
+### Benchmarks (threshold 0.95)
+
+- Pliny 20% held-out: **95.66%** recall
+- Buried-tail injection: **85.54%** recall
+- Wikipedia long-paste: **4.67%** false-positive rate
+- p50 inference latency: ~5 ms (M4 Max CPU)
+
+Full per-cell vs ProtectAI v2 / deepset / fmops / Meta Prompt-Guard-2:
+[docs/BENCHMARKS.md](docs/BENCHMARKS.md).
+
+### Known limitations
+
+See [docs/HONEST-LIMITS.md](docs/HONEST-LIMITS.md).
+
+[Unreleased]: https://github.com/securelayer7/PROMPTPurify/compare/v0.0.1...HEAD
+[0.0.1]: https://github.com/securelayer7/PROMPTPurify/releases/tag/v0.0.1
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 promptpurify contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.