@@ -18,6 +18,8 @@ import { sanitizeLogMessage } from '../core/logger.js';
1818import type { Logger } from '../core/logger.js' ;
1919import type { UpstreamMcpToolPolicy } from '../types/profile.js' ;
2020
21+ export type HtmlDescriptionPolicy = 'allow' | 'strip' | 'drop' ;
22+
2123export interface SanitizationResult {
2224 tools : Tool [ ] ;
2325 dropped : { name : string ; reason : string } [ ] ;
@@ -26,6 +28,38 @@ export interface SanitizationResult {
2628// Data-driven constraints
2729const TOOL_NAME_PATTERN = / ^ [ a - z A - Z 0 - 9 _ - ] + $ / ;
2830const DESCRIPTION_FORBIDDEN_CHARS = / [ < > ` ] / ;
31+ const HTML_TAG_PATTERN = / < [ ^ > ] * > / g;
32+
33+ const MAX_EXCERPT_CONTEXT = 40 ;
34+
35+ function firstForbiddenExcerpt ( text : string ) : string {
36+ const idx = text . search ( DESCRIPTION_FORBIDDEN_CHARS ) ;
37+ if ( idx === - 1 ) return '' ;
38+ const start = Math . max ( 0 , idx - MAX_EXCERPT_CONTEXT ) ;
39+ const end = Math . min ( text . length , idx + MAX_EXCERPT_CONTEXT + 1 ) ;
40+ const prefix = start > 0 ? '…' : '' ;
41+ const suffix = end < text . length ? '…' : '' ;
42+ return prefix + text . slice ( start , end ) + suffix ;
43+ }
44+
45+ function stripHtmlTags ( text : string ) : string {
46+ return text . replace ( HTML_TAG_PATTERN , '' ) ;
47+ }
48+
49+ function stripHtmlFromSchema ( value : unknown , depth = 0 ) : unknown {
50+ if ( depth > 10 ) return value ;
51+ if ( typeof value === 'string' ) return stripHtmlTags ( value ) ;
52+ if ( Array . isArray ( value ) ) return value . map ( v => stripHtmlFromSchema ( v , depth + 1 ) ) ;
53+ if ( typeof value === 'object' && value !== null ) {
54+ const obj = value as Record < string , unknown > ;
55+ const result : Record < string , unknown > = { } ;
56+ for ( const [ k , v ] of Object . entries ( obj ) ) {
57+ result [ k ] = stripHtmlFromSchema ( v , depth + 1 ) ;
58+ }
59+ return result ;
60+ }
61+ return value ;
62+ }
2963
3064/**
3165 * Recursively scan a JSON Schema object for forbidden characters in both keys and string values.
@@ -64,17 +98,18 @@ const truncateName = (name: string): string =>
6498 * 1. Name length <= 255
6599 * 2. Name matches [a-zA-Z0-9_-]
66100 * 3. Description length <= 2048 (if present)
67- * 4. Description contains no <, >, or backtick (if present)
68- * 5. inputSchema contains no forbidden characters in any key or string value
69- * (recursive scan to depth 10; schemas exceeding the depth limit are dropped)
101+ * 4. HTML policy (html_description_policy):
102+ * - drop (default): tools with <, >, or backtick in description/inputSchema are dropped
103+ * - strip: HTML tags stripped from description and inputSchema string values; tool kept
104+ * - allow: HTML checks skipped entirely; tool passes through as-is
70105 *
71106 * Offending tools are dropped and logged. Safe tools pass through unchanged.
72107 */
73- export function sanitizeToolList ( tools : Tool [ ] , logger ?: Logger ) : SanitizationResult {
108+ export function sanitizeToolList ( tools : Tool [ ] , logger ?: Logger , htmlPolicy : HtmlDescriptionPolicy = 'drop' ) : SanitizationResult {
74109 const safe : Tool [ ] = [ ] ;
75110 const dropped : { name : string ; reason : string } [ ] = [ ] ;
76111
77- for ( const tool of tools ) {
112+ for ( let tool of tools ) {
78113 // Guard: upstream may return null or non-object entries (e.g. null items in tools array)
79114 if ( tool === null || typeof tool !== 'object' ) {
80115 const safeName = sanitizeLogMessage ( truncateName ( String ( tool ) ) ) ;
@@ -85,6 +120,7 @@ export function sanitizeToolList(tools: Tool[], logger?: Logger): SanitizationRe
85120 }
86121
87122 let reason : string | undefined ;
123+ let excerpt : string | undefined ;
88124
89125 // Runtime type guards: upstream may return non-string fields despite SDK types
90126 if ( typeof tool . name !== 'string' ) {
@@ -97,20 +133,38 @@ export function sanitizeToolList(tools: Tool[], logger?: Logger): SanitizationRe
97133 reason = 'malformed tool definition: description is not a string' ;
98134 } else if ( tool . description && tool . description . length > MAX_DESCRIPTION_LENGTH ) {
99135 reason = 'tool description too long' ;
100- } else if ( tool . description && DESCRIPTION_FORBIDDEN_CHARS . test ( tool . description ) ) {
101- reason = 'forbidden characters in description' ;
102136 } else if ( tool . inputSchema !== undefined && ( typeof tool . inputSchema !== 'object' || tool . inputSchema === null || Array . isArray ( tool . inputSchema ) ) ) {
103137 reason = 'malformed tool definition: inputSchema is not an object' ;
104- } else if ( tool . inputSchema && schemaContainsForbiddenChars ( tool . inputSchema ) ) {
105- reason = 'forbidden characters in input schema' ;
138+ } else if ( htmlPolicy === 'drop' ) {
139+ if ( tool . description && DESCRIPTION_FORBIDDEN_CHARS . test ( tool . description ) ) {
140+ reason = 'forbidden characters in description' ;
141+ excerpt = firstForbiddenExcerpt ( tool . description ) ;
142+ } else if ( tool . inputSchema && schemaContainsForbiddenChars ( tool . inputSchema ) ) {
143+ reason = 'forbidden characters in input schema' ;
144+ const schemaStr = JSON . stringify ( tool . inputSchema ) ;
145+ excerpt = firstForbiddenExcerpt ( schemaStr ) ;
146+ }
147+ } else if ( htmlPolicy === 'strip' ) {
148+ if ( tool . description ) {
149+ tool = { ...tool , description : stripHtmlTags ( tool . description ) } ;
150+ }
151+ if ( tool . inputSchema ) {
152+ tool = { ...tool , inputSchema : stripHtmlFromSchema ( tool . inputSchema ) as Tool [ 'inputSchema' ] } ;
153+ }
106154 }
155+ // htmlPolicy === 'allow': skip all HTML checks, pass tool through unchanged
107156
108157 if ( reason !== undefined ) {
109158 // Coerce non-string names to string for safe logging
110159 const nameStr = typeof tool . name === 'string' ? tool . name : String ( tool . name ) ;
111160 const safeName = sanitizeLogMessage ( truncateName ( nameStr ) ) ;
161+ const safeExcerpt = excerpt ? sanitizeLogMessage ( excerpt ) : undefined ;
112162 dropped . push ( { name : safeName , reason } ) ;
113- logger ?. warn ( 'Dropped upstream tool due to sanitization failure' , { name : safeName , reason } ) ;
163+ logger ?. warn ( 'Dropped upstream tool due to sanitization failure' , {
164+ name : safeName ,
165+ reason,
166+ ...( safeExcerpt !== undefined && { excerpt : safeExcerpt } ) ,
167+ } ) ;
114168 } else {
115169 safe . push ( tool ) ;
116170 }
0 commit comments