securelayer7
diff --git a/‎.github/workflows/ci.yml‎
Lines changed: 39 additions & 0 deletions b/‎.github/workflows/ci.yml‎
Lines changed: 39 additions & 0 deletions
diff --git a/‎.github/workflows/release.yml‎
Lines changed: 109 additions & 0 deletions b/‎.github/workflows/release.yml‎
Lines changed: 109 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 66 additions & 0 deletions b/‎.gitignore‎
Lines changed: 66 additions & 0 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 20 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎LICENSE‎
Lines changed: 21 additions & 0 deletions b/‎LICENSE‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎MODEL_CARD.md‎
Lines changed: 107 additions & 0 deletions b/‎MODEL_CARD.md‎
Lines changed: 107 additions & 0 deletions
@@ -0,0 +1,39 @@
+name: CI
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+
+jobs:
+  build-test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        node: [20, 22]
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: ${{ matrix.node }}
+          cache: npm
+      - run: npm ci
+      - run: npm run typecheck
+      - run: npm test
+      - run: npm run build
+
+  bench:
+    runs-on: ubuntu-latest
+    needs: build-test
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          cache: npm
+      - run: npm ci
+      - run: npm run build
+      - name: Verify model checksum
+        run: cd models/l5e && sha256sum -c SHA256SUMS
+      - name: Run public benchmark
+        run: node scripts/bench.mjs
@@ -0,0 +1,109 @@
+name: Release
+
+on:
+  push:
+    tags:
+      - 'v*'
+  workflow_dispatch:
+    inputs:
+      tag:
+        description: 'Release tag (e.g. v0.0.1)'
+        required: true
+
+permissions:
+  contents: write      # GitHub Release upload
+  id-token: write      # cosign keyless + npm provenance + SLSA attestation
+  attestations: write  # SLSA build provenance
+
+jobs:
+  release:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 22
+          registry-url: 'https://registry.npmjs.org'
+          cache: npm
+
+      - run: npm ci
+      - run: npm run typecheck
+      - run: npm test
+      - run: npm run build
+
+      # ---------- Model artifact tarball ----------
+      - name: Build model tarball
+        run: |
+          tar -czf promptpurify-model.tar.gz \
+            models/l5e/model.int8.onnx \
+            models/l5e/vocab.txt \
+            models/l5e/l5e.json \
+            models/l5e/SHA256SUMS
+          sha256sum promptpurify-model.tar.gz > promptpurify-model.tar.gz.sha256
+
+      - name: Verify model SHA256SUMS
+        run: cd models/l5e && sha256sum -c SHA256SUMS
+
+      # ---------- cosign keyless signature ----------
+      - uses: sigstore/cosign-installer@v3
+
+      - name: cosign-sign model tarball
+        run: |
+          cosign sign-blob --yes \
+            --bundle promptpurify-model.tar.gz.cosign.bundle \
+            promptpurify-model.tar.gz
+
+      # ---------- SLSA build provenance ----------
+      - uses: actions/attest-build-provenance@v1
+        with:
+          subject-path: |
+            promptpurify-model.tar.gz
+            dist/**/*
+
+      # ---------- SBOM ----------
+      - name: Generate CycloneDX SBOM
+        run: npx --yes @cyclonedx/cyclonedx-npm --output-file SBOM.cdx.json --output-format JSON
+
+      # ---------- GitHub Release ----------
+      - name: Upload release artifacts
+        uses: softprops/action-gh-release@v2
+        with:
+          files: |
+            promptpurify-model.tar.gz
+            promptpurify-model.tar.gz.sha256
+            promptpurify-model.tar.gz.cosign.bundle
+            SBOM.cdx.json
+          generate_release_notes: true
+
+      # ---------- npm publish with provenance ----------
+      - name: Publish to npm with provenance
+        run: npm publish --provenance --access public
+        env:
+          NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
+
+      # ---------- Hugging Face Hub mirror ----------
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Push to Securelayer7/promptpurify on HF Hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          pip install --quiet huggingface_hub
+          python - <<'PY'
+          import os, shutil
+          from huggingface_hub import HfApi
+          repo = "Securelayer7/promptpurify"
+          api = HfApi(token=os.environ["HF_TOKEN"])
+          api.create_repo(repo, repo_type="model", exist_ok=True)
+          # MODEL_CARD.md becomes the HF README (has the YAML frontmatter HF needs)
+          shutil.copy("MODEL_CARD.md", "models/l5e/README.md")
+          api.upload_folder(
+              repo_id=repo,
+              folder_path="models/l5e",
+              path_in_repo=".",
+              commit_message=f"release {os.environ.get('GITHUB_REF_NAME', 'manual')}",
+              allow_patterns=["model.int8.onnx", "vocab.txt", "l5e.json", "SHA256SUMS", "README.md"],
+          )
+          PY
@@ -0,0 +1,66 @@
+node_modules/
+dist/
+*.log
+.DS_Store
+coverage/
+*.tgz
+.vitest/
+.env
+.env.*
+# EVAL-ONLY adversarial benchmark: github.com/elder-plinius (Pliny/BASI) raw
+# jailbreak + system-prompt-leak payloads. AGPL-3.0 / unlicensed — NOT
+# permissively licensed, so NEVER trained, NEVER shipped, NEVER redistributed.
+# Pulled locally only for measuring detector recall (training/eval_plinius.mjs).
+# Reproducible from training/fetch_plinius_eval.py. Only the survey table,
+# license inventory, harness + PLINIUS_BENCH.md are committed — never the raw.
+training/.eval_cache/
+# OOD eval raw datasets — third-party, not redistributed (size/licensing)
+training/.ood_cache/
+# V3 real-data retrain raw pool + held-out benchmark — third-party, not
+# redistributed (size/licensing). Splits derived deterministically downstream.
+training/.real_cache/
+# Versioned per-experiment train.jsonl dirs (V32/V33/V34/V35 derived data)
+training/.real_cache_v*/
+training/.real_cache_th/
+# Versioned model artifact export dirs (l5e_v33, l5e_v35 — large ONNX)
+models/l5e_v*/
+# ONNX export temp blobs / shape-inference scratch
+*.data
+sym_shape_infer_temp.onnx
+# fp32 distill intermediate (export_onnx.py strips it; never committed)
+models/l5b/_student_fp32/
+# STAGE-5 strictly-from-scratch L5c artifact: opt-in, npm-excluded
+# (files:["dist"]), large ONNX — gitignored from the shipped path exactly
+# like models/l5b. Reproducible from training/train_scratch.py (seed 1337).
+models/l5c/
+# STAGE-7 "intelligent" L5d artifact (fine-tuned Apache-2.0 distil-mBERT):
+# opt-in, npm-excluded (files:["dist"]), large INT8 ONNX — gitignored from
+# the shipped path exactly like models/l5b, l5c. Reproducible from
+# training/train_intelligent.py + export_intelligent.py (seed 1337).
+models/l5d/
+# STAGE-8 OUR-OWN pretrained backbone: sampled open pretrain corpus
+# (permissive third-party, not redistributed — size/licensing) + the
+# resulting L5e artifact (opt-in, npm-excluded, large INT8 ONNX). Both
+# gitignored from the shipped path exactly like .real_cache / models/l5d.
+# Reproducible from training/pretrain.py + train_intelligent.py
+# + export_intelligent.py (seed 1337).
+training/.pretrain_cache/
+# models/l5e/ — public release shipped: model.int8.onnx, vocab.txt,
+# l5e.json, SHA256SUMS. Ignore everything else under it (training
+# intermediates: _corpus/, _pretrained/, _hf_fp32_*, *.bak, etc).
+models/l5e/*
+!models/l5e/model.int8.onnx
+!models/l5e/vocab.txt
+!models/l5e/l5e.json
+!models/l5e/SHA256SUMS
+# isolated offline training venv (stable pinned CPU stack; never shipped)
+training/.venv/
+# python bytecode cache (training scripts; never shipped)
+training/__pycache__/
+**/__pycache__/
+
+# Session-local — claude state, screenshots, big intermediates
+.claude/
+.tmp/
+training/.real_cache_th/
+examples/sample-app/public/hero.png
@@ -0,0 +1,20 @@
+# Changelog
+
+[Keep a Changelog](https://keepachangelog.com/en/1.1.0/) +
+[SemVer](https://semver.org).
+
+## [0.0.1]
+
+First public release.
+
+- promptpurify model (~14 MB INT8 ONNX, CPU inference, built from
+  scratch by SecureLayer7).
+- SDK on npm — structural firewall, ONNX runner, browser IIFE.
+- Public eval slice + bench script.
+- Documentation: README + docs/ (QUICKSTART, HOW-IT-WORKS, BENCHMARKS,
+  SAMPLE-DATA, REPRODUCE, HONEST-LIMITS), MODEL_CARD, SECURITY.
+- CI + release workflows: cosign keyless signing, SLSA build
+  provenance, CycloneDX SBOM, npm publish --provenance, Hugging Face
+  mirror.
+
+[0.0.1]: https://github.com/securelayer7/PROMPTPurify/releases/tag/v0.0.1
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 SecureLayer7
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,107 @@
+---
+license: mit
+language:
+  - en
+library_name: onnx
+pipeline_tag: text-classification
+tags:
+  - prompt-injection
+  - jailbreak
+  - llm-security
+  - guardrail
+  - onnx
+metrics:
+  - recall
+  - false_positive_rate
+---
+
+# promptpurify model card
+
+**Tiny prompt-injection detector. ~14 MB. CPU. Built from scratch by
+[SecureLayer7](https://securelayer7.net).**
+
+## Intended use
+
+Single-turn classification of untrusted text into `benign` vs
+`prompt-injection`. Sits between user input (or a retrieved RAG chunk,
+or a tool output) and your LLM call. Outputs a probability score; you
+decide the threshold and the policy.
+
+```ts
+import { createL5eRunner } from "promptpurify/l5";
+const guard = await createL5eRunner();
+const score = await guard.score(userMessage);
+if (score >= 0.95) return refusal();
+```
+
+Full integration patterns: [docs/QUICKSTART.md](docs/QUICKSTART.md).
+
+## At a glance
+
+| | |
+|---|---|
+| Type | ONNX transformer classifier |
+| Size on disk | **~14 MB (INT8)** |
+| Inference | CPU, single-digit ms |
+| Runtime | `onnxruntime-node` (optional peer) |
+| Network | **None.** In-process. |
+
+## Training
+
+Built from scratch by SecureLayer7 on curated internal corpora.
+
+## Evaluation
+
+Benchmarked against public datasets and OSS baselines. Comparison and
+methodology: [docs/BENCHMARKS.md](docs/BENCHMARKS.md). Reproducibility:
+[docs/REPRODUCE.md](docs/REPRODUCE.md). Bench script
+`scripts/bench.mjs` re-scores the shipped public eval slice with this
+exact model artifact.
+
+## Out of scope
+
+- Single-turn scoring only — pair with conversation-level monitoring.
+- Content moderation (toxicity, hate, CSAM, self-harm) — pair with a
+  content classifier.
+- Authentication and tool-scope enforcement are application
+  responsibilities, not the model's.
+
+See [docs/HONEST-LIMITS.md](docs/HONEST-LIMITS.md).
+
+## Bias
+
+The model is English-strongest. Operators serving multilingual traffic
+should calibrate the threshold per language. The model has no access
+to user identity, account state, or conversation history.
+
+## License
+
+MIT for both the SDK and the model weights.
+
+Public datasets we evaluate against (and the OSS baseline models we
+compare to) carry their own upstream licenses — see
+[`training/CORPUS_LICENSES.json`](training/CORPUS_LICENSES.json).
+
+## Integrity verification
+
+Every model artifact is checksummed. Verify before extracting:
+
+```bash
+sha256sum -c models/l5e/SHA256SUMS
+```
+
+The release tarball is additionally cosign-signed with keyless
+Sigstore.
+
+## Distribution mirrors
+
+| Mirror | URL |
+|---|---|
+| GitHub Releases | `https://github.com/securelayer7/PROMPTPurify/releases` |
+| Hugging Face Hub | [`Securelayer7/promptpurify`](https://huggingface.co/Securelayer7/promptpurify) |
+
+## Contact
+
+- Security disclosures: [`SECURITY.md`](SECURITY.md) →
+  `security@securelayer7.net`
+- General: [GitHub Issues](https://github.com/securelayer7/PROMPTPurify/issues)