Skip to content

Commit b318ca5

Browse files
tofikwestclaudecursoragentMarfuen
authored
fix(tasks): prevent framework-specific content leaks in split header paragraphs (#2381)
* fix(tasks): hide framework-specific info irrelevant to organization Resolves SALE-3 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix(tasks): handle split framework header paragraphs * fix(tasks): support composite framework header labels --------- Signed-off-by: Tofik Hasanov <72318342+tofikwest@users.noreply.github.com> Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Cursor Agent <cursoragent@cursor.com> Co-authored-by: Mariano Fuentes <marfuen98@gmail.com>
1 parent c41e5c5 commit b318ca5

3 files changed

Lines changed: 405 additions & 42 deletions

File tree

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
import { filterDescriptionByFrameworks } from './description-framework-filter';
2+
3+
describe('filterDescriptionByFrameworks', () => {
4+
it('returns the description unchanged when no active frameworks are provided', () => {
5+
const desc =
6+
'General task.\n\nFor ISO 27001: Store NDA evidence.\n\nFor PCI: Document checks.';
7+
expect(filterDescriptionByFrameworks(desc, [])).toBe(desc);
8+
});
9+
10+
it('returns empty string for empty description', () => {
11+
expect(filterDescriptionByFrameworks('', ['SOC 2'])).toBe('');
12+
});
13+
14+
it('keeps paragraphs that match an active framework', () => {
15+
const desc =
16+
'Maintain a list.\n\nFor ISO 27001: Store NDA evidence.\n\nFor PCI: Document checks.';
17+
const result = filterDescriptionByFrameworks(desc, ['ISO 27001']);
18+
expect(result).toContain('Maintain a list.');
19+
expect(result).toContain('For ISO 27001: Store NDA evidence.');
20+
expect(result).not.toContain('For PCI');
21+
});
22+
23+
it('removes paragraphs for inactive frameworks', () => {
24+
const desc =
25+
'General description.\n\nFor HIPAA: Know which devices hold patient data.\n\nFor GDPR: Document lawful basis.';
26+
const result = filterDescriptionByFrameworks(desc, ['SOC 2']);
27+
expect(result).toBe('General description.');
28+
});
29+
30+
it('keeps all framework paragraphs when all are active', () => {
31+
const desc =
32+
'Base task.\n\nFor ISO 27001: ISO requirement.\n\nFor HIPAA: HIPAA requirement.';
33+
const result = filterDescriptionByFrameworks(desc, [
34+
'ISO 27001',
35+
'HIPAA',
36+
]);
37+
expect(result).toContain('For ISO 27001');
38+
expect(result).toContain('For HIPAA');
39+
expect(result).toContain('Base task.');
40+
});
41+
42+
it('handles alias matching (e.g. "PCI" matches "PCI DSS")', () => {
43+
const desc =
44+
'Base.\n\nFor PCI: PCI-specific info.\n\nFor ISO 27001: ISO info.';
45+
const result = filterDescriptionByFrameworks(desc, ['PCI DSS']);
46+
expect(result).toContain('For PCI');
47+
expect(result).not.toContain('For ISO 27001');
48+
});
49+
50+
it('handles case-insensitive matching', () => {
51+
const desc = 'Base.\n\nFor HIPAA: Some requirement.';
52+
const result = filterDescriptionByFrameworks(desc, ['hipaa']);
53+
expect(result).toContain('For HIPAA');
54+
});
55+
56+
it('keeps paragraphs without framework prefixes', () => {
57+
const desc =
58+
'Upload a screenshot.\n\nProvide documentation.\n\nFor GDPR: Document lawful basis.';
59+
const result = filterDescriptionByFrameworks(desc, ['SOC 2']);
60+
expect(result).toContain('Upload a screenshot.');
61+
expect(result).toContain('Provide documentation.');
62+
expect(result).not.toContain('GDPR');
63+
});
64+
65+
it('keeps unknown framework labels as a safe default', () => {
66+
const desc = 'Base.\n\nFor CustomFramework: Custom requirement.';
67+
const result = filterDescriptionByFrameworks(desc, ['SOC 2']);
68+
expect(result).toContain('For CustomFramework');
69+
});
70+
71+
it('handles the real-world Employee Verification example', () => {
72+
const desc =
73+
'Maintain a list of reference checks you made for every new hire. Verify the identity of every new hire.\n\nFor ISO 27001: Ensure you are also storing the NDA, candidate evaluation form and access creation request with its approval evidence\n\nFor PCI: For employees with potential access to the CDE, document background verification checks (e.g., reference check, prior employment) before granting access to CHD systems.';
74+
75+
// Org only has SOC 2 active
76+
const soc2Only = filterDescriptionByFrameworks(desc, ['SOC 2']);
77+
expect(soc2Only).toBe(
78+
'Maintain a list of reference checks you made for every new hire. Verify the identity of every new hire.',
79+
);
80+
81+
// Org has ISO 27001 active
82+
const iso = filterDescriptionByFrameworks(desc, ['ISO 27001']);
83+
expect(iso).toContain('For ISO 27001');
84+
expect(iso).not.toContain('For PCI');
85+
});
86+
87+
it('handles the real-world Asset Inventory with HIPAA example', () => {
88+
const desc =
89+
'Keep and maintain a list of your devices (laptops/servers). If you install the Comp AI agent on your devices, these will be automatically tracked in-app and you can mark this task as not-relevant.\n\nFor HIPAA: Know which devices hold your patient data is going and create a maintain a system to track it\n\nComp AI device agent is located at: portal.trycomp.ai';
90+
91+
const soc2Only = filterDescriptionByFrameworks(desc, ['SOC 2']);
92+
expect(soc2Only).toContain('Keep and maintain a list');
93+
expect(soc2Only).not.toContain('HIPAA');
94+
expect(soc2Only).toContain('Comp AI device agent');
95+
});
96+
97+
it('handles SOC 2 v.1 seed name variant', () => {
98+
const desc = 'Base.\n\nFor HIPAA: HIPAA info.';
99+
const result = filterDescriptionByFrameworks(desc, ['SOC 2 v.1']);
100+
expect(result).not.toContain('HIPAA');
101+
});
102+
103+
it('removes a framework section when header and content are split across paragraphs', () => {
104+
const desc =
105+
'General guidance.\n\nFor GDPR:\n\nMaintain a documented data breach response plan.';
106+
const result = filterDescriptionByFrameworks(desc, ['SOC 2']);
107+
108+
expect(result).toBe('General guidance.');
109+
});
110+
111+
it('removes leaked framework-specific content for Public Policies seed format', () => {
112+
const desc =
113+
'Add a comment with links to your privacy policy.\n\nFor GDPR:\n\nMaintain clear, transparent, and GDPR-compliant privacy notices.\n\nFor ISO 42001:\n\nEnsure policies identify stakeholder rights and obligations.';
114+
const result = filterDescriptionByFrameworks(desc, ['SOC 2']);
115+
116+
expect(result).toBe('Add a comment with links to your privacy policy.');
117+
});
118+
119+
it('removes leaked framework-specific content for Incident Response seed format', () => {
120+
const desc =
121+
'Keep a record of all security incidents and how they were resolved.\n\nFor GDPR:\n\nMaintain a documented data breach response plan.\n\nFor PCI:\n\nMaintain and annually test the incident response plan for cardholder data incidents.';
122+
const result = filterDescriptionByFrameworks(desc, ['SOC 2']);
123+
124+
expect(result).toBe(
125+
'Keep a record of all security incidents and how they were resolved.',
126+
);
127+
});
128+
129+
it('removes leaked framework-specific content for Board Meetings & Independence seed format', () => {
130+
const desc =
131+
'Submit board meeting evidence covering security topics.\n\nFor ISO 42001:\n\nEnsure board reviews discuss internal and external issues relevant to the AI MS.';
132+
const result = filterDescriptionByFrameworks(desc, ['SOC 2']);
133+
134+
expect(result).toBe(
135+
'Submit board meeting evidence covering security topics.',
136+
);
137+
});
138+
139+
it('removes leaked framework-specific content for Diagramming seed format', () => {
140+
const desc =
141+
'Architecture Diagram: Draw a single-page diagram.\n\nFor ISO 27001 and HIPAA:\n\nData Flow Diagram: Show exactly how user and sensitive data travels.\n\nFor ISO 42001:\n\nDocument how internal and external issues are reflected in diagrams.\n\nFor GDPR:\n\nMaintain an up-to-date data inventory and data flow map.\n\nFor PCI:\n\nMaintain current CDE network diagrams.';
142+
const result = filterDescriptionByFrameworks(desc, ['SOC 2']);
143+
144+
expect(result).toBe('Architecture Diagram: Draw a single-page diagram.');
145+
});
146+
});
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
/**
2+
* Well-known framework name patterns used in task descriptions.
3+
* Each entry maps a canonical label (used in "For <label>:" paragraphs)
4+
* to the possible names stored in FrameworkEditorFramework.name.
5+
*
6+
* Matching is case-insensitive.
7+
*/
8+
const FRAMEWORK_ALIASES: Record<string, string[]> = {
9+
'soc 2': ['soc 2', 'soc2', 'soc 2 v.1'],
10+
'iso 27001': ['iso 27001', 'iso27001'],
11+
'iso 42001': ['iso 42001', 'iso42001'],
12+
'iso 9001': ['iso 9001', 'iso9001'],
13+
hipaa: ['hipaa'],
14+
gdpr: ['gdpr'],
15+
'pci dss': ['pci dss', 'pci', 'pci v0', 'example pci'],
16+
'nen 7510': ['nen 7510', 'nen7510'],
17+
'nist csf': ['nist csf'],
18+
'nist 800-53': ['nist 800-53'],
19+
'nis 2': ['nis 2', 'nis2'],
20+
};
21+
22+
/**
23+
* Regex that matches a paragraph that starts with a framework-specific
24+
* prefix such as "For ISO 27001:" or "For HIPAA:".
25+
*
26+
* Capture group 1 = framework label (e.g. "ISO 27001", "PCI").
27+
*
28+
* The pattern is intentionally broad: it catches "For <words>:" at the
29+
* beginning of a paragraph (after optional whitespace / newlines).
30+
*/
31+
const FOR_FRAMEWORK_LINE_RE =
32+
/^[ \t]*For\s+([A-Za-z0-9][A-Za-z0-9 .\-/]*?)\s*:/im;
33+
const COMPOSITE_LABEL_SEPARATOR_RE = /\s*(?:,|\/|&|\band\b)\s*/i;
34+
35+
/**
36+
* Normalise a framework name for comparison.
37+
*/
38+
function normalise(name: string): string {
39+
return name.trim().toLowerCase();
40+
}
41+
42+
/**
43+
* Build a Set of normalised active framework labels from the org's
44+
* framework instance names. We expand each name through the alias map
45+
* so that both the canonical label and the DB name are included.
46+
*/
47+
function buildActiveLabels(activeFrameworkNames: string[]): Set<string> {
48+
const labels = new Set<string>();
49+
50+
for (const name of activeFrameworkNames) {
51+
const normName = normalise(name);
52+
labels.add(normName);
53+
54+
// Also add canonical labels that match this name
55+
for (const [canonical, aliases] of Object.entries(FRAMEWORK_ALIASES)) {
56+
if (aliases.some((a) => normalise(a) === normName)) {
57+
labels.add(normalise(canonical));
58+
for (const alias of aliases) {
59+
labels.add(normalise(alias));
60+
}
61+
}
62+
}
63+
}
64+
65+
return labels;
66+
}
67+
68+
/**
69+
* Check whether a framework label extracted from a "For <label>:" line
70+
* matches one of the active frameworks.
71+
*/
72+
function isLabelActive(label: string, activeLabels: Set<string>): boolean {
73+
const normLabel = normalise(label);
74+
const aliasEntries = Object.entries(FRAMEWORK_ALIASES);
75+
76+
// Direct match
77+
if (activeLabels.has(normLabel)) return true;
78+
79+
// Check alias map: if the label is a known canonical key or alias,
80+
// see if any of its counterparts are in the active set.
81+
for (const [canonical, aliases] of aliasEntries) {
82+
const allNames = [canonical, ...aliases].map(normalise);
83+
if (allNames.includes(normLabel)) {
84+
return allNames.some((n) => activeLabels.has(n));
85+
}
86+
}
87+
88+
// Handle headers like "For ISO 27001 and HIPAA:"
89+
const parts = normLabel
90+
.split(COMPOSITE_LABEL_SEPARATOR_RE)
91+
.map((part) => part.trim())
92+
.filter(Boolean);
93+
94+
if (parts.length > 1) {
95+
let hasKnownPart = false;
96+
let hasUnknownPart = false;
97+
let anyKnownPartIsActive = false;
98+
99+
for (const part of parts) {
100+
if (activeLabels.has(part)) {
101+
hasKnownPart = true;
102+
anyKnownPartIsActive = true;
103+
continue;
104+
}
105+
106+
const matchingAliasEntry = aliasEntries.find(([canonical, aliases]) => {
107+
const allNames = [canonical, ...aliases].map(normalise);
108+
return allNames.includes(part);
109+
});
110+
111+
if (!matchingAliasEntry) {
112+
hasUnknownPart = true;
113+
continue;
114+
}
115+
116+
hasKnownPart = true;
117+
const [canonical, aliases] = matchingAliasEntry;
118+
const allNames = [canonical, ...aliases].map(normalise);
119+
if (allNames.some((name) => activeLabels.has(name))) {
120+
anyKnownPartIsActive = true;
121+
}
122+
}
123+
124+
if (hasKnownPart && !hasUnknownPart) {
125+
return anyKnownPartIsActive;
126+
}
127+
}
128+
129+
// Unknown label - keep it visible (safe default)
130+
return true;
131+
}
132+
133+
/**
134+
* Filter framework-specific paragraphs from a task description.
135+
*
136+
* Paragraphs starting with "For <FrameworkName>:" are removed if the
137+
* framework is not among the organisation's active frameworks.
138+
*
139+
* @returns The filtered description string.
140+
*/
141+
export function filterDescriptionByFrameworks(
142+
description: string,
143+
activeFrameworkNames: string[],
144+
): string {
145+
if (!description) return description;
146+
if (activeFrameworkNames.length === 0) return description;
147+
148+
const activeLabels = buildActiveLabels(activeFrameworkNames);
149+
150+
// Split description into paragraphs (double-newline separated)
151+
const paragraphs = description.split(/\n\n+/);
152+
let pendingHeaderIsActive: boolean | null = null;
153+
154+
const filtered = paragraphs.filter((paragraph) => {
155+
if (pendingHeaderIsActive !== null) {
156+
const shouldKeep = pendingHeaderIsActive;
157+
pendingHeaderIsActive = null;
158+
return shouldKeep;
159+
}
160+
161+
const match = paragraph.match(FOR_FRAMEWORK_LINE_RE);
162+
if (!match) return true; // Not a framework-specific paragraph
163+
164+
const label = match[1];
165+
const isActive = isLabelActive(label, activeLabels);
166+
167+
// Handle seed data format where header and section content are split:
168+
// "For GDPR:\n\n<framework-specific paragraph>"
169+
const paragraphWithoutPrefix = paragraph
170+
.replace(FOR_FRAMEWORK_LINE_RE, '')
171+
.trim();
172+
if (!paragraphWithoutPrefix) {
173+
pendingHeaderIsActive = isActive;
174+
}
175+
176+
return isActive;
177+
});
178+
179+
return filtered.join('\n\n').trim();
180+
}

0 commit comments

Comments
 (0)