From 7db0b89393c207d0a6fdeb7c072f9d5f8f06dbb7 Mon Sep 17 00:00:00 2001 From: hanhui Date: Wed, 3 Jun 2026 03:23:04 +0800 Subject: [PATCH] fix: keep outbound redaction on regex path --- package-lock.json | 4 +-- src/engines/gliner.ts | 3 +-- src/engines/regex.ts | 10 ++++++++ src/message-sending-handler.ts | 13 +++++----- src/scanner.ts | 7 ++++++ tests/message-sending-handler.test.ts | 36 +++++++++++++++++++++++++++ tests/regex.test.ts | 22 ++++++++++++++++ 7 files changed, 84 insertions(+), 11 deletions(-) diff --git a/package-lock.json b/package-lock.json index a58f329..24b2b72 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@datafog/fogclaw", - "version": "0.2.0", + "version": "0.3.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@datafog/fogclaw", - "version": "0.2.0", + "version": "0.3.0", "license": "MIT", "dependencies": { "gliner": "^0.0.19", diff --git a/src/engines/gliner.ts b/src/engines/gliner.ts index 1c1f7ec..06d03a3 100644 --- a/src/engines/gliner.ts +++ b/src/engines/gliner.ts @@ -1,6 +1,5 @@ import fs from "node:fs/promises"; import path from "node:path"; -import { env } from "@xenova/transformers"; import type { Entity } from "../types.js"; import { canonicalType } from "../types.js"; @@ -53,7 +52,7 @@ function toAbsolutePath(value: string): string { } function getModelCacheDir(): string { - return env.localModelPath ?? path.join(process.cwd(), ".cache"); + return process.env.TRANSFORMERS_CACHE ?? path.join(process.cwd(), ".cache"); } function sanitizeModelReference(modelPath: string): string { diff --git a/src/engines/regex.ts b/src/engines/regex.ts index 1560195..a2dee5c 100644 --- a/src/engines/regex.ts +++ b/src/engines/regex.ts @@ -26,6 +26,16 @@ const PATTERNS: PatternDef[] = [ pattern: /\b(?:4\d{12}(?:\d{3})?|5[1-5]\d{14}|3[47]\d{13}|(?:(?:4\d{3}|5[1-5]\d{2}|3[47]\d{2})[-\s]?\d{4}[-\s]?\d{4}[-\s]?\d{4})|(?:3[47]\d{2}[-\s]?\d{6}[-\s]?\d{5}))\b/g, }, + { + label: "SECRET", + pattern: + /(? { const threshold = this.getThresholdForLabel(entity.label); diff --git a/tests/message-sending-handler.test.ts b/tests/message-sending-handler.test.ts index 66486ac..f7234d5 100644 --- a/tests/message-sending-handler.test.ts +++ b/tests/message-sending-handler.test.ts @@ -79,6 +79,42 @@ describe("createMessageSendingHandler", () => { expect(result!.content).not.toContain("john@example.com"); }); + it("uses regex-only scanning to keep outbound delivery fast", async () => { + const config = makeConfig(); + const scanner = new Scanner(config); + const fullScan = vi.spyOn(scanner, "scan").mockRejectedValue(new Error("GLiNER path should not run")); + const handler = createMessageSendingHandler(config, scanner); + + const result = await handler( + { to: "user", content: "Send to john@example.com" }, + makeCtx(), + ); + + expect(result).toBeDefined(); + expect(result!.content).toContain("[EMAIL_1]"); + expect(fullScan).not.toHaveBeenCalled(); + }); + + it("redacts secrets and tokens in outbound message", async () => { + const config = makeConfig(); + const scanner = new Scanner(config); + const handler = createMessageSendingHandler(config, scanner); + + const result = await handler( + { + to: "user", + content: "secret=abcDEF123456 token: Bearer tok_1234567890", + }, + makeCtx(), + ); + + expect(result).toBeDefined(); + expect(result!.content).toContain("[SECRET_1]"); + expect(result!.content).toContain("[TOKEN_1]"); + expect(result!.content).not.toContain("abcDEF123456"); + expect(result!.content).not.toContain("tok_1234567890"); + }); + it("returns void when no PII found", async () => { const config = makeConfig(); const scanner = new Scanner(config); diff --git a/tests/regex.test.ts b/tests/regex.test.ts index 7a26271..5d7dcde 100644 --- a/tests/regex.test.ts +++ b/tests/regex.test.ts @@ -168,6 +168,28 @@ describe("CREDIT_CARD", () => { }); }); +// --------------------------------------------------------------------------- +// SECRET / TOKEN +// --------------------------------------------------------------------------- +describe("SECRET", () => { + it("detects assigned secrets", () => { + const entities = assertSpans("client_secret=abcDEF123456 and password: hunter2222"); + const secrets = entities.filter((e) => e.label === "SECRET"); + expect(secrets.map((e) => e.text)).toEqual(["client_secret=abcDEF123456", "password: hunter2222"]); + }); +}); + +describe("TOKEN", () => { + it("detects assigned tokens and bearer values", () => { + const entities = assertSpans("token: Bearer tok_1234567890 api-key=key_1234567890"); + const tokens = entities.filter((e) => e.label === "TOKEN"); + expect(tokens.map((e) => e.text)).toEqual([ + "token: Bearer tok_1234567890", + "api-key=key_1234567890", + ]); + }); +}); + // --------------------------------------------------------------------------- // IP_ADDRESS // ---------------------------------------------------------------------------