Skip to content

Commit 7ca1d16

Browse files
author
Růžička, David
committed
Merge branch 'dr-fix-tool-validator' into 'main'
feat: add html_description_policy to upstream MCP provider config See merge request ai-adoption/mcp/mcp4openapi!8
2 parents 8c5b424 + fd80f60 commit 7ca1d16

6 files changed

Lines changed: 165 additions & 11 deletions

File tree

profile-schema.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1847,6 +1847,11 @@
18471847
"type": "number",
18481848
"description": "Optional request timeout for upstream MCP calls."
18491849
},
1850+
"html_description_policy": {
1851+
"type": "string",
1852+
"enum": ["allow", "strip", "drop"],
1853+
"description": "Controls how HTML tags in upstream tool descriptions and inputSchema are handled.\n- drop (default): tools containing HTML chars (<, >, backtick) are dropped\n- strip: HTML tags are stripped from descriptions/schema string values; tool is kept\n- allow: HTML checks are skipped entirely; tool passes through as-is"
1854+
},
18501855
"validation_endpoint": {
18511856
"type": "string",
18521857
"description": "Optional endpoint to validate upstream credentials at session init (fail-fast)."

src/generated-schemas.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,7 @@ export const upstreamMcpServerConfigSchema = z.object({
359359
tool_prefix: z.string().optional(),
360360
tools: upstreamMcpToolPolicySchema.optional(),
361361
timeout_ms: z.number().optional(),
362+
html_description_policy: z.enum(['allow', 'strip', 'drop']).optional(),
362363
validation_endpoint: z.string().optional(),
363364
validation_method: z.union([z.literal("HEAD"), z.literal("GET")]).optional(),
364365
validation_timeout_ms: z.number().optional()

src/mcp/mcp-server.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1906,7 +1906,7 @@ export class MCPServer {
19061906
);
19071907
}
19081908
const rawTools = result.tools;
1909-
const sanitized = sanitizeToolList(rawTools, this.logger);
1909+
const sanitized = sanitizeToolList(rawTools, this.logger, provider.html_description_policy ?? 'drop');
19101910
const policyFiltered = applyProviderToolPolicy(sanitized.tools, provider.tools);
19111911
// Cache sanitized+policy-filtered tool names for tools/call gate enforcement.
19121912
// Tools dropped here (bad description/inputSchema) must not be callable via tools/call.

src/types/profile.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,14 @@ export interface UpstreamMcpServerConfig {
9696
/** Optional request timeout for upstream MCP calls. */
9797
timeout_ms?: number;
9898

99+
/**
100+
* Controls how HTML tags in upstream tool descriptions and inputSchema are handled.
101+
* - drop (default): tools containing HTML chars (<, >, backtick) are dropped
102+
* - strip: HTML tags are stripped from descriptions/schema values; tool is kept
103+
* - allow: HTML checks are skipped entirely; tool passes through as-is
104+
*/
105+
html_description_policy?: 'allow' | 'strip' | 'drop';
106+
99107
/** Optional endpoint to validate upstream credentials at session init (fail-fast). */
100108
validation_endpoint?: string;
101109
/** HTTP method for validation probe. Default: 'HEAD'. */

src/upstream/upstream-tool-sanitizer.test.ts

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,92 @@ describe('sanitizeToolList', () => {
417417
});
418418
});
419419

420+
describe('sanitizeToolList — html_description_policy', () => {
421+
let logger: Logger;
422+
423+
beforeEach(() => {
424+
logger = { debug: vi.fn(), info: vi.fn(), warn: vi.fn(), error: vi.fn() };
425+
});
426+
427+
describe('drop (default)', () => {
428+
it('drops tool with HTML in description', () => {
429+
const tool = makeTool('t', 'Creates <b>bold</b> text');
430+
const result = sanitizeToolList([tool], logger);
431+
expect(result.tools).toHaveLength(0);
432+
expect(result.dropped[0].reason).toBe('forbidden characters in description');
433+
});
434+
435+
it('drops tool with HTML in inputSchema string value', () => {
436+
const tool: Tool = { name: 't', inputSchema: { type: 'object', properties: { x: { type: 'string', description: '<b>bad</b>' } } } };
437+
const result = sanitizeToolList([tool], logger);
438+
expect(result.tools).toHaveLength(0);
439+
expect(result.dropped[0].reason).toBe('forbidden characters in input schema');
440+
});
441+
});
442+
443+
describe('strip', () => {
444+
it('strips HTML tags from description and keeps tool', () => {
445+
const tool = makeTool('t', 'Creates <b>bold</b> issue <br/> in project');
446+
const result = sanitizeToolList([tool], logger, 'strip');
447+
expect(result.tools).toHaveLength(1);
448+
expect(result.tools[0].description).toBe('Creates bold issue in project');
449+
expect(result.dropped).toHaveLength(0);
450+
expect(logger.warn).not.toHaveBeenCalled();
451+
});
452+
453+
it('strips HTML tags from inputSchema string values', () => {
454+
const tool: Tool = {
455+
name: 't',
456+
description: 'plain',
457+
inputSchema: { type: 'object', properties: { x: { type: 'string', description: 'Use <code>format</code> param' } } },
458+
};
459+
const result = sanitizeToolList([tool], logger, 'strip');
460+
expect(result.tools).toHaveLength(1);
461+
const xDesc = (result.tools[0].inputSchema as any).properties.x.description;
462+
expect(xDesc).toBe('Use format param');
463+
});
464+
465+
it('does not mutate original tool object', () => {
466+
const tool = makeTool('t', 'Has <b>HTML</b>');
467+
sanitizeToolList([tool], logger, 'strip');
468+
expect(tool.description).toBe('Has <b>HTML</b>');
469+
});
470+
471+
it('still drops tool with invalid name regardless of strip policy', () => {
472+
const tool = makeTool('bad name!', 'Has <b>HTML</b>');
473+
const result = sanitizeToolList([tool], logger, 'strip');
474+
expect(result.tools).toHaveLength(0);
475+
expect(result.dropped[0].reason).toBe('invalid characters in tool name');
476+
});
477+
});
478+
479+
describe('allow', () => {
480+
it('passes tool with HTML description through unchanged', () => {
481+
const tool = makeTool('t', 'Creates <b>bold</b> text');
482+
const result = sanitizeToolList([tool], logger, 'allow');
483+
expect(result.tools).toHaveLength(1);
484+
expect(result.tools[0].description).toBe('Creates <b>bold</b> text');
485+
expect(result.dropped).toHaveLength(0);
486+
expect(logger.warn).not.toHaveBeenCalled();
487+
});
488+
489+
it('passes tool with HTML in inputSchema through unchanged', () => {
490+
const schema = { type: 'object', properties: { x: { type: 'string', description: '<b>bad</b>' } } };
491+
const tool: Tool = { name: 't', inputSchema: schema };
492+
const result = sanitizeToolList([tool], logger, 'allow');
493+
expect(result.tools).toHaveLength(1);
494+
expect((result.tools[0].inputSchema as any).properties.x.description).toBe('<b>bad</b>');
495+
});
496+
497+
it('still drops tool with invalid name regardless of allow policy', () => {
498+
const tool = makeTool('bad name!', 'Has <b>HTML</b>');
499+
const result = sanitizeToolList([tool], logger, 'allow');
500+
expect(result.tools).toHaveLength(0);
501+
expect(result.dropped[0].reason).toBe('invalid characters in tool name');
502+
});
503+
});
504+
});
505+
420506
describe('applyProviderToolPolicy', () => {
421507
const tools = [makeTool('alpha'), makeTool('beta'), makeTool('gamma')];
422508

src/upstream/upstream-tool-sanitizer.ts

Lines changed: 64 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ import { sanitizeLogMessage } from '../core/logger.js';
1818
import type { Logger } from '../core/logger.js';
1919
import type { UpstreamMcpToolPolicy } from '../types/profile.js';
2020

21+
export type HtmlDescriptionPolicy = 'allow' | 'strip' | 'drop';
22+
2123
export interface SanitizationResult {
2224
tools: Tool[];
2325
dropped: { name: string; reason: string }[];
@@ -26,6 +28,38 @@ export interface SanitizationResult {
2628
// Data-driven constraints
2729
const TOOL_NAME_PATTERN = /^[a-zA-Z0-9_-]+$/;
2830
const DESCRIPTION_FORBIDDEN_CHARS = /[<>`]/;
31+
const HTML_TAG_PATTERN = /<[^>]*>/g;
32+
33+
const MAX_EXCERPT_CONTEXT = 40;
34+
35+
function firstForbiddenExcerpt(text: string): string {
36+
const idx = text.search(DESCRIPTION_FORBIDDEN_CHARS);
37+
if (idx === -1) return '';
38+
const start = Math.max(0, idx - MAX_EXCERPT_CONTEXT);
39+
const end = Math.min(text.length, idx + MAX_EXCERPT_CONTEXT + 1);
40+
const prefix = start > 0 ? '…' : '';
41+
const suffix = end < text.length ? '…' : '';
42+
return prefix + text.slice(start, end) + suffix;
43+
}
44+
45+
function stripHtmlTags(text: string): string {
46+
return text.replace(HTML_TAG_PATTERN, '');
47+
}
48+
49+
function stripHtmlFromSchema(value: unknown, depth = 0): unknown {
50+
if (depth > 10) return value;
51+
if (typeof value === 'string') return stripHtmlTags(value);
52+
if (Array.isArray(value)) return value.map(v => stripHtmlFromSchema(v, depth + 1));
53+
if (typeof value === 'object' && value !== null) {
54+
const obj = value as Record<string, unknown>;
55+
const result: Record<string, unknown> = {};
56+
for (const [k, v] of Object.entries(obj)) {
57+
result[k] = stripHtmlFromSchema(v, depth + 1);
58+
}
59+
return result;
60+
}
61+
return value;
62+
}
2963

3064
/**
3165
* Recursively scan a JSON Schema object for forbidden characters in both keys and string values.
@@ -64,17 +98,18 @@ const truncateName = (name: string): string =>
6498
* 1. Name length <= 255
6599
* 2. Name matches [a-zA-Z0-9_-]
66100
* 3. Description length <= 2048 (if present)
67-
* 4. Description contains no <, >, or backtick (if present)
68-
* 5. inputSchema contains no forbidden characters in any key or string value
69-
* (recursive scan to depth 10; schemas exceeding the depth limit are dropped)
101+
* 4. HTML policy (html_description_policy):
102+
* - drop (default): tools with <, >, or backtick in description/inputSchema are dropped
103+
* - strip: HTML tags stripped from description and inputSchema string values; tool kept
104+
* - allow: HTML checks skipped entirely; tool passes through as-is
70105
*
71106
* Offending tools are dropped and logged. Safe tools pass through unchanged.
72107
*/
73-
export function sanitizeToolList(tools: Tool[], logger?: Logger): SanitizationResult {
108+
export function sanitizeToolList(tools: Tool[], logger?: Logger, htmlPolicy: HtmlDescriptionPolicy = 'drop'): SanitizationResult {
74109
const safe: Tool[] = [];
75110
const dropped: { name: string; reason: string }[] = [];
76111

77-
for (const tool of tools) {
112+
for (let tool of tools) {
78113
// Guard: upstream may return null or non-object entries (e.g. null items in tools array)
79114
if (tool === null || typeof tool !== 'object') {
80115
const safeName = sanitizeLogMessage(truncateName(String(tool)));
@@ -85,6 +120,7 @@ export function sanitizeToolList(tools: Tool[], logger?: Logger): SanitizationRe
85120
}
86121

87122
let reason: string | undefined;
123+
let excerpt: string | undefined;
88124

89125
// Runtime type guards: upstream may return non-string fields despite SDK types
90126
if (typeof tool.name !== 'string') {
@@ -97,20 +133,38 @@ export function sanitizeToolList(tools: Tool[], logger?: Logger): SanitizationRe
97133
reason = 'malformed tool definition: description is not a string';
98134
} else if (tool.description && tool.description.length > MAX_DESCRIPTION_LENGTH) {
99135
reason = 'tool description too long';
100-
} else if (tool.description && DESCRIPTION_FORBIDDEN_CHARS.test(tool.description)) {
101-
reason = 'forbidden characters in description';
102136
} else if (tool.inputSchema !== undefined && (typeof tool.inputSchema !== 'object' || tool.inputSchema === null || Array.isArray(tool.inputSchema))) {
103137
reason = 'malformed tool definition: inputSchema is not an object';
104-
} else if (tool.inputSchema && schemaContainsForbiddenChars(tool.inputSchema)) {
105-
reason = 'forbidden characters in input schema';
138+
} else if (htmlPolicy === 'drop') {
139+
if (tool.description && DESCRIPTION_FORBIDDEN_CHARS.test(tool.description)) {
140+
reason = 'forbidden characters in description';
141+
excerpt = firstForbiddenExcerpt(tool.description);
142+
} else if (tool.inputSchema && schemaContainsForbiddenChars(tool.inputSchema)) {
143+
reason = 'forbidden characters in input schema';
144+
const schemaStr = JSON.stringify(tool.inputSchema);
145+
excerpt = firstForbiddenExcerpt(schemaStr);
146+
}
147+
} else if (htmlPolicy === 'strip') {
148+
if (tool.description) {
149+
tool = { ...tool, description: stripHtmlTags(tool.description) };
150+
}
151+
if (tool.inputSchema) {
152+
tool = { ...tool, inputSchema: stripHtmlFromSchema(tool.inputSchema) as Tool['inputSchema'] };
153+
}
106154
}
155+
// htmlPolicy === 'allow': skip all HTML checks, pass tool through unchanged
107156

108157
if (reason !== undefined) {
109158
// Coerce non-string names to string for safe logging
110159
const nameStr = typeof tool.name === 'string' ? tool.name : String(tool.name);
111160
const safeName = sanitizeLogMessage(truncateName(nameStr));
161+
const safeExcerpt = excerpt ? sanitizeLogMessage(excerpt) : undefined;
112162
dropped.push({ name: safeName, reason });
113-
logger?.warn('Dropped upstream tool due to sanitization failure', { name: safeName, reason });
163+
logger?.warn('Dropped upstream tool due to sanitization failure', {
164+
name: safeName,
165+
reason,
166+
...(safeExcerpt !== undefined && { excerpt: safeExcerpt }),
167+
});
114168
} else {
115169
safe.push(tool);
116170
}

0 commit comments

Comments
 (0)