Skip to content

Commit 196ee21

Browse files
SonAIengineclaude
andcommitted
feat: v0.19.0 — compress_tool_result() tool result 지능형 압축 (실제 XGEN 76K→116 tok, 97.6% 절감)
- compressor 모듈 신규: JSON list/dict, HTML, Error, Text 타입별 압축 - HTTP 응답 자동 감지 — headers 제거, status+body만 보존 - JSON list: 첫 샘플 full 구조 + 나머지 identity keys only - 4개 integration point: ToolGraph.execute, create_gateway_tools, MCPProxy, MCP server - 실제 XGEN API 12개 fixture 기준 301K→7.2K chars (97.6%) - zero-dependency 유지 (stdlib html.parser + re만 사용) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 449bf03 commit 196ee21

13 files changed

Lines changed: 1022 additions & 19 deletions

File tree

graph_tool_call/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66

77
__all__ = [
88
"CategorySummary",
9+
"CompressConfig",
910
"DuplicatePair",
1011
"GraphAnalysisReport",
1112
"GraphToolkit",
13+
"compress_tool_result",
1214
"create_gateway_tools",
1315
"MCPAnnotations",
1416
"MergeStrategy",
@@ -42,6 +44,8 @@
4244
"create_gateway_tools": ("graph_tool_call.langchain.gateway", "create_gateway_tools"),
4345
"filter_tools": ("graph_tool_call.toolkit", "filter_tools"),
4446
"GraphToolkit": ("graph_tool_call.toolkit", "GraphToolkit"),
47+
"compress_tool_result": ("graph_tool_call.compressor", "compress_tool_result"),
48+
"CompressConfig": ("graph_tool_call.compressor", "CompressConfig"),
4549
}
4650

4751

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
"""Tool result compressor: intelligently compress large tool outputs for LLM context.
2+
3+
Usage::
4+
5+
from graph_tool_call.compressor import compress_tool_result, CompressConfig
6+
7+
# Simple — auto-detect type, default 4000 chars
8+
compressed = compress_tool_result(huge_json)
9+
10+
# Custom config
11+
cfg = CompressConfig(max_chars=2000, max_list_items=5)
12+
compressed = compress_tool_result(data, config=cfg)
13+
"""
14+
15+
from graph_tool_call.compressor.base import CompressConfig
16+
from graph_tool_call.compressor.detector import compress_tool_result
17+
18+
__all__ = ["CompressConfig", "compress_tool_result"]

graph_tool_call/compressor/base.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"""Base types for the compressor module."""
2+
3+
from __future__ import annotations
4+
5+
from dataclasses import dataclass, field
6+
7+
8+
@dataclass
9+
class CompressConfig:
10+
"""Compression configuration.
11+
12+
Attributes:
13+
max_chars: Maximum output characters (~4 chars per token).
14+
max_list_items: Number of sample items to keep from JSON arrays.
15+
max_value_len: Maximum character length for individual JSON values.
16+
max_depth: Maximum nesting depth before summarising nested structures.
17+
preserve_keys: JSON keys whose values are always kept in full.
18+
"""
19+
20+
max_chars: int = 4000
21+
max_list_items: int = 3
22+
max_value_len: int = 80
23+
max_depth: int = 2
24+
preserve_keys: list[str] = field(default_factory=list)
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
"""Auto-detect content type and route to the appropriate compressor."""
2+
3+
from __future__ import annotations
4+
5+
import json
6+
from typing import Any
7+
8+
from graph_tool_call.compressor.base import CompressConfig
9+
from graph_tool_call.compressor.error_comp import (
10+
compress_error_dict,
11+
compress_error_text,
12+
is_error_dict,
13+
is_error_text,
14+
)
15+
from graph_tool_call.compressor.html_comp import compress_html, is_html
16+
from graph_tool_call.compressor.json_comp import compress_json_dict, compress_json_list
17+
from graph_tool_call.compressor.text_comp import compress_text
18+
19+
20+
def _detect_and_compress(content: Any, config: CompressConfig) -> str:
21+
"""Detect content type and compress accordingly."""
22+
# -- Already structured data --
23+
if isinstance(content, list):
24+
return compress_json_list(content, config)
25+
26+
if isinstance(content, dict):
27+
if is_error_dict(content):
28+
return compress_error_dict(content, config)
29+
return compress_json_dict(content, config)
30+
31+
# -- String content: try to parse / classify --
32+
if not isinstance(content, str):
33+
content = str(content)
34+
35+
# Short enough — no compression needed.
36+
if len(content) <= config.max_chars:
37+
return content
38+
39+
# Try JSON parse.
40+
try:
41+
parsed = json.loads(content)
42+
except (json.JSONDecodeError, ValueError):
43+
parsed = None
44+
45+
if parsed is not None:
46+
if isinstance(parsed, list):
47+
return compress_json_list(parsed, config)
48+
if isinstance(parsed, dict):
49+
if is_error_dict(parsed):
50+
return compress_error_dict(parsed, config)
51+
return compress_json_dict(parsed, config)
52+
53+
# HTML detection.
54+
if is_html(content):
55+
return compress_html(content, config)
56+
57+
# Error text detection.
58+
if is_error_text(content):
59+
return compress_error_text(content, config)
60+
61+
# Fallback: plain text.
62+
return compress_text(content, config)
63+
64+
65+
def compress_tool_result(
66+
content: str | dict | list | Any,
67+
*,
68+
config: CompressConfig | None = None,
69+
max_chars: int = 4000,
70+
content_type: str | None = None,
71+
) -> str:
72+
"""Intelligently compress a tool result for LLM context.
73+
74+
Parameters:
75+
content: The tool result — str, dict, list, or anything with ``__str__``.
76+
config: Compression configuration. When *None*, a default
77+
``CompressConfig(max_chars=max_chars)`` is created.
78+
max_chars: Shorthand for ``CompressConfig(max_chars=...)``.
79+
Ignored when *config* is provided.
80+
content_type: Force a specific compressor instead of auto-detecting.
81+
One of ``"json"``, ``"html"``, ``"error"``, ``"text"``.
82+
83+
Returns:
84+
The compressed string. If *content* is already short enough it is
85+
returned as-is (for strings) or serialised (for dicts/lists).
86+
"""
87+
if config is None:
88+
config = CompressConfig(max_chars=max_chars)
89+
90+
# Forced content type — skip auto-detection.
91+
if content_type is not None:
92+
return _compress_by_type(content, content_type, config)
93+
94+
return _detect_and_compress(content, config)
95+
96+
97+
def _compress_by_type(content: Any, content_type: str, config: CompressConfig) -> str:
98+
"""Route to a specific compressor by name."""
99+
if isinstance(content, str):
100+
text = content
101+
else:
102+
text = json.dumps(content, ensure_ascii=False, default=str)
103+
104+
if content_type == "json":
105+
try:
106+
parsed = json.loads(text) if isinstance(content, str) else content
107+
except (json.JSONDecodeError, ValueError):
108+
return compress_text(text, config)
109+
if isinstance(parsed, list):
110+
return compress_json_list(parsed, config)
111+
if isinstance(parsed, dict):
112+
return compress_json_dict(parsed, config)
113+
return compress_text(text, config)
114+
115+
if content_type == "html":
116+
return compress_html(text, config)
117+
118+
if content_type == "error":
119+
if isinstance(content, dict):
120+
return compress_error_dict(content, config)
121+
return compress_error_text(text, config)
122+
123+
# "text" or unknown
124+
return compress_text(text, config)
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
"""Error response compressor: extract status + message only."""
2+
3+
from __future__ import annotations
4+
5+
import re
6+
from typing import Any
7+
8+
from graph_tool_call.compressor.base import CompressConfig
9+
10+
# Keys that typically carry the error message, checked in priority order.
11+
_MESSAGE_KEYS = ("message", "detail", "error", "reason", "error_description", "msg")
12+
13+
# Keys that carry nested error detail dicts.
14+
_DETAIL_CONTAINER_KEYS = ("body", "response", "data", "error")
15+
16+
17+
def _extract_message(data: dict[str, Any]) -> str | None:
18+
"""Recursively look for an error message string."""
19+
for key in _MESSAGE_KEYS:
20+
val = data.get(key)
21+
if isinstance(val, str) and val:
22+
return val
23+
24+
# Check one level deeper in container keys.
25+
for key in _DETAIL_CONTAINER_KEYS:
26+
nested = data.get(key)
27+
if isinstance(nested, dict):
28+
msg = _extract_message(nested)
29+
if msg:
30+
return msg
31+
return None
32+
33+
34+
def is_error_dict(data: dict[str, Any]) -> bool:
35+
"""Heuristic: does *data* look like an error response?"""
36+
status = data.get("status") or data.get("status_code") or data.get("statusCode")
37+
if isinstance(status, int) and 400 <= status < 600:
38+
return True
39+
if "error" in data or "traceback" in data or "stack_trace" in data or "exception" in data:
40+
return True
41+
return False
42+
43+
44+
def compress_error_dict(data: dict[str, Any], config: CompressConfig) -> str:
45+
"""Compress an error-shaped dict to ``HTTP {status}: {message}``."""
46+
status = data.get("status") or data.get("status_code") or data.get("statusCode") or "?"
47+
48+
# Prefer the most specific nested message over generic top-level "error".
49+
message = None
50+
for key in _DETAIL_CONTAINER_KEYS:
51+
nested = data.get(key)
52+
if isinstance(nested, dict):
53+
message = _extract_message(nested)
54+
if message:
55+
break
56+
if not message:
57+
message = _extract_message(data) or "Unknown error"
58+
if isinstance(message, dict):
59+
message = str(message)
60+
61+
result = f"HTTP {status}: {message}"
62+
return result[: config.max_chars]
63+
64+
65+
def compress_error_text(text: str, config: CompressConfig) -> str:
66+
"""Compress an error-like text string (e.g. tracebacks)."""
67+
lines = text.strip().splitlines()
68+
if not lines:
69+
return text
70+
71+
# For Python tracebacks keep the last exception line.
72+
for line in reversed(lines):
73+
stripped = line.strip()
74+
if stripped and not stripped.startswith("File ") and not stripped.startswith("at "):
75+
return stripped[: config.max_chars]
76+
77+
return lines[-1][: config.max_chars]
78+
79+
80+
def is_error_text(text: str) -> bool:
81+
"""Heuristic: does *text* look like an error/traceback?"""
82+
if re.search(r"Traceback \(most recent call", text):
83+
return True
84+
if re.search(r"^[A-Z]\w*(Error|Exception):", text, re.MULTILINE):
85+
return True
86+
return False

0 commit comments

Comments
 (0)