Skip to content

Commit f18943d

Browse files
fix(mcp): cap search_metadata response size to prevent LLM context overflow (#28383)
* fix(mcp): cap search_metadata response size and truncate columnNames * fix(mcp): remove column truncation, guide LLM to use smaller page sizes
1 parent bc7449b commit f18943d

3 files changed

Lines changed: 115 additions & 3 deletions

File tree

openmetadata-mcp/src/main/java/org/openmetadata/mcp/tools/SearchMetadataTool.java

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ public class SearchMetadataTool implements McpTool {
3131
private static final int MAX_ALLOWED_AGGREGATION_BUCKETS = 50;
3232
private static final int DESCRIPTION_MAX_LENGTH = 500;
3333
private static final int DESCRIPTION_TRUNCATE_LENGTH = 450;
34+
private static final int MAX_RESPONSE_CHARS = 100_000;
3435

3536
private static final List<String> ESSENTIAL_FIELDS_ONLY =
3637
List.of(
@@ -332,11 +333,48 @@ static Map<String, Object> buildEnhancedSearchResponse(
332333
result.put(
333334
"message",
334335
String.format(
335-
"Found %d total results, showing first %d. Use pagination or refine your search for more specific results, you can call these 3 times by yourself with pagination , and then only if the user ask for more paginate.",
336+
"Found %d total results, showing first %d. "
337+
+ "There are many matching assets. Are you looking for something specific? "
338+
+ "Try narrowing with a service name, schema name, or more specific search term.",
336339
totalResults, cleanedResults.size()));
337340
result.put("hasMore", true);
338341
}
339342

343+
try {
344+
String serialized = JsonUtils.pojoToJson(result);
345+
LOG.debug(
346+
"[MCP] search_metadata response size: {} chars for query '{}'",
347+
serialized.length(),
348+
query);
349+
if (serialized.length() > MAX_RESPONSE_CHARS) {
350+
int targetCount =
351+
Math.min(
352+
Math.max(
353+
1,
354+
(int)
355+
(cleanedResults.size() * (MAX_RESPONSE_CHARS * 0.8) / serialized.length())),
356+
cleanedResults.size());
357+
List<Map<String, Object>> trimmed = new ArrayList<>(cleanedResults.subList(0, targetCount));
358+
LOG.warn(
359+
"[MCP] search_metadata response trimmed: {} chars -> {} results (was {})",
360+
serialized.length(),
361+
trimmed.size(),
362+
cleanedResults.size());
363+
result.put("results", trimmed);
364+
result.put("returnedCount", trimmed.size());
365+
result.put("hasMore", true);
366+
result.put(
367+
"message",
368+
String.format(
369+
"Response exceeded %d characters and was trimmed to %d of %d results. "
370+
+ "There are many matching assets. Are you looking for something specific? "
371+
+ "Try narrowing with a service name, schema, or specific name.",
372+
MAX_RESPONSE_CHARS, trimmed.size(), totalResults));
373+
}
374+
} catch (RuntimeException e) {
375+
LOG.warn("Failed to check response size for query '{}': {}", query, e.getMessage());
376+
}
377+
340378
return result;
341379
}
342380

openmetadata-mcp/src/main/resources/json/data/mcp/tools.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"size": {
2828
"type": "integer",
29-
"description": "Number of results to return. Default is 10, we can iteratively page through results by increasing 'from'. Maximum allowed is 50.",
29+
"description": "Number of results to return. Default is 10. For broad or generic queries (e.g. 'show all tables'), keep size small (5-10), as results are summaries; use get_entity_details for full entity info. Only increase size when the query is specific enough that results will be narrow and focused. Maximum allowed is 50.",
3030
"default": 10
3131
},
3232
"includeDeleted": {
@@ -73,7 +73,7 @@
7373
{
7474
"description": "Simple search with pagination",
7575
"query": "tables",
76-
"size": 20,
76+
"size": 10,
7777
"from": 0
7878
},
7979
{

openmetadata-mcp/src/test/java/org/openmetadata/mcp/tools/SearchMetadataAggregationTest.java

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,4 +242,78 @@ private Map<String, Object> createEmptyHits() {
242242
hits.put("total", total);
243243
return hits;
244244
}
245+
246+
@Test
247+
void testResponseTrimmedWhenExceedingCharLimit() {
248+
// 50 results each with 200 columns (~50 chars each) => ~500k chars, well over the 100k cap
249+
Map<String, Object> searchResponse = createSearchResponseWithLargeResults(50, 200);
250+
251+
Map<String, Object> result =
252+
SearchMetadataTool.buildEnhancedSearchResponse(
253+
searchResponse, "*", 50, Collections.emptyList(), false, 10);
254+
255+
@SuppressWarnings("unchecked")
256+
List<Map<String, Object>> results = (List<Map<String, Object>>) result.get("results");
257+
assertTrue(results.size() < 50, "Results should be trimmed below requested 50");
258+
assertEquals(true, result.get("hasMore"), "hasMore must be true when trimmed by size cap");
259+
assertEquals(results.size(), result.get("returnedCount"));
260+
String message = (String) result.get("message");
261+
assertTrue(message.contains("trimmed"), "Message should mention trimming");
262+
}
263+
264+
@Test
265+
void testResponseNotTrimmedWhenUnderCharLimit() {
266+
// 5 results with 2 columns each — well under 100k
267+
Map<String, Object> searchResponse = createSearchResponseWithLargeResults(5, 2);
268+
269+
Map<String, Object> result =
270+
SearchMetadataTool.buildEnhancedSearchResponse(
271+
searchResponse, "*", 50, Collections.emptyList(), false, 10);
272+
273+
@SuppressWarnings("unchecked")
274+
List<Map<String, Object>> results = (List<Map<String, Object>>) result.get("results");
275+
assertEquals(5, results.size(), "Results should not be trimmed when under char limit");
276+
}
277+
278+
@Test
279+
void testResponseTrimmedToAtLeastOneResult() {
280+
// Single result with enough columns to exceed 100k on its own
281+
Map<String, Object> searchResponse = createSearchResponseWithLargeResults(1, 3000);
282+
283+
Map<String, Object> result =
284+
SearchMetadataTool.buildEnhancedSearchResponse(
285+
searchResponse, "*", 50, Collections.emptyList(), false, 10);
286+
287+
@SuppressWarnings("unchecked")
288+
List<Map<String, Object>> results = (List<Map<String, Object>>) result.get("results");
289+
assertEquals(1, results.size(), "At least one result must always be returned");
290+
}
291+
292+
private Map<String, Object> createSearchResponseWithLargeResults(int count, int columnCount) {
293+
Map<String, Object> response = new HashMap<>();
294+
295+
List<Map<String, Object>> hits = new ArrayList<>();
296+
for (int i = 0; i < count; i++) {
297+
Map<String, Object> source = new HashMap<>();
298+
source.put("name", "table_" + i);
299+
source.put("fullyQualifiedName", "service.db.schema.table_" + i);
300+
source.put("entityType", "table");
301+
List<String> columnNames = new ArrayList<>();
302+
for (int c = 0; c < columnCount; c++) {
303+
columnNames.add("column_with_long_name_to_increase_payload_size_" + c);
304+
}
305+
source.put("columnNames", columnNames);
306+
Map<String, Object> hit = new HashMap<>();
307+
hit.put("_source", source);
308+
hits.add(hit);
309+
}
310+
311+
Map<String, Object> total = new HashMap<>();
312+
total.put("value", count);
313+
Map<String, Object> hitsObj = new HashMap<>();
314+
hitsObj.put("hits", hits);
315+
hitsObj.put("total", total);
316+
response.put("hits", hitsObj);
317+
return response;
318+
}
245319
}

0 commit comments

Comments
 (0)