Skip to content

Commit d9e9cd7

Browse files
authored
Merge pull request #184 from rostilos/1.5.7-rc
feat: Enhance error sanitization and add tests for provider error mes…
2 parents 582f8d6 + 40ec362 commit d9e9cd7

5 files changed

Lines changed: 139 additions & 9 deletions

File tree

java-ecosystem/services/pipeline-agent/src/main/java/org/rostilos/codecrow/pipelineagent/generic/processor/WebhookAsyncProcessor.java

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
import org.springframework.stereotype.Service;
2323
import org.springframework.transaction.annotation.Transactional;
2424

25+
import java.util.Map;
26+
2527
/**
2628
* Service for processing webhooks asynchronously.
2729
*
@@ -141,11 +143,9 @@ public void processWebhookInTransaction(
141143

142144
// Create event consumer that logs to job
143145
log.info("Calling handler.handle for job {}", job.getExternalId());
144-
WebhookHandler.WebhookResult result = handler.handle(payload, project, event -> {
145-
String message = (String) event.getOrDefault("message", "Processing...");
146-
String state = (String) event.getOrDefault("state", "processing");
147-
jobService.info(job, state, message);
148-
});
146+
WebhookHandler.WebhookResult result = handler.handle(payload, project, event ->
147+
logHandlerEvent(job, event)
148+
);
149149
log.info("handler.handle completed for job {}, result status={}", job.getExternalId(), result.status());
150150

151151
// Check if the webhook was ignored (e.g., branch not matching pattern, analysis disabled)
@@ -273,6 +273,19 @@ public void processWebhookInTransaction(
273273
}
274274
}
275275
}
276+
277+
private void logHandlerEvent(Job job, Map<String, Object> event) {
278+
String message = String.valueOf(event.getOrDefault("message", "Processing..."));
279+
String state = String.valueOf(event.getOrDefault("state", "processing"));
280+
String type = String.valueOf(event.getOrDefault("type", "info"));
281+
282+
switch (type) {
283+
case "error", "failed" -> jobService.error(job, state, message);
284+
case "warning", "warn" -> jobService.warn(job, state, message);
285+
case "debug" -> jobService.debug(job, state, message);
286+
default -> jobService.info(job, state, message);
287+
}
288+
}
276289

277290
/**
278291
* Initialize lazy associations that will be needed during webhook processing.

python-ecosystem/inference-orchestrator/src/llm/llm_factory.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,9 @@ def _normalize_cloudflare_chat_payload(payload: dict[str, Any]) -> dict[str, Any
163163
arrays in `messages[*].content`. It also expects tool-calling assistant messages
164164
to use `content: null`, matching OpenAI's own tool-call transcript shape.
165165
"""
166+
payload = dict(payload)
167+
payload.pop("parallel_tool_calls", None)
168+
166169
messages = payload.get("messages")
167170
if not isinstance(messages, list):
168171
return payload

python-ecosystem/inference-orchestrator/src/utils/error_sanitizer.py

Lines changed: 97 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,94 @@
33
Removes sensitive technical details like API keys, quotas, and internal stack traces.
44
"""
55

6+
import ast
67
import re
78
import logging
89

910
logger = logging.getLogger(__name__)
1011

1112

13+
def _redact_sensitive(text: str) -> str:
14+
"""Remove common secret-bearing fragments from an error message."""
15+
text = re.sub(r'sk-[a-zA-Z0-9]{20,}', '[API_KEY_REDACTED]', text)
16+
text = re.sub(
17+
r'api[_-]?key["\s:=]+["\']?[a-zA-Z0-9-_]+["\']?',
18+
'[API_KEY_REDACTED]',
19+
text,
20+
flags=re.IGNORECASE,
21+
)
22+
text = re.sub(
23+
r'authorization["\s:=]+["\']?bearer\s+[a-zA-Z0-9._-]+["\']?',
24+
'[AUTHORIZATION_REDACTED]',
25+
text,
26+
flags=re.IGNORECASE,
27+
)
28+
return text
29+
30+
31+
def _message_from_payload(payload) -> str | None:
32+
"""Extract a provider-facing message from nested error payloads."""
33+
if isinstance(payload, dict):
34+
for key in ("message", "detail"):
35+
value = payload.get(key)
36+
if isinstance(value, str) and value.strip():
37+
return value.strip()
38+
39+
error = payload.get("error")
40+
if isinstance(error, str) and error.strip():
41+
return error.strip()
42+
if isinstance(error, dict):
43+
message = _message_from_payload(error)
44+
if message:
45+
return message
46+
47+
errors = payload.get("errors")
48+
if isinstance(errors, list):
49+
for item in errors:
50+
message = _message_from_payload(item)
51+
if message:
52+
return message
53+
54+
if isinstance(payload, list):
55+
for item in payload:
56+
message = _message_from_payload(item)
57+
if message:
58+
return message
59+
60+
return None
61+
62+
63+
def _extract_provider_error_message(error_message: str) -> str | None:
64+
"""Parse common OpenAI-compatible provider exception bodies."""
65+
error_lower = error_message.lower()
66+
if not any(marker in error_lower for marker in (
67+
"error code:",
68+
"badrequesterror",
69+
"apiresponsevalidationerror",
70+
"status code",
71+
)):
72+
return None
73+
74+
start = error_message.find("{")
75+
end = error_message.rfind("}")
76+
if start == -1 or end <= start:
77+
return None
78+
79+
try:
80+
payload = ast.literal_eval(error_message[start:end + 1])
81+
except (SyntaxError, ValueError):
82+
return None
83+
84+
provider_message = _message_from_payload(payload)
85+
if not provider_message:
86+
return None
87+
88+
provider_message = _redact_sensitive(provider_message)
89+
if len(provider_message) > 180:
90+
provider_message = provider_message[:177].rstrip() + "..."
91+
return provider_message
92+
93+
1294
def sanitize_error_for_display(error_message: str) -> str:
1395
"""
1496
Sanitize error messages for user display.
@@ -24,6 +106,20 @@ def sanitize_error_for_display(error_message: str) -> str:
24106
return "An unexpected error occurred during processing."
25107

26108
error_lower = error_message.lower()
109+
110+
provider_message = _extract_provider_error_message(error_message)
111+
if provider_message:
112+
if any(term in provider_message.lower() for term in (
113+
"tool",
114+
"function",
115+
"parallel_tool_calls",
116+
"tools",
117+
)):
118+
return (
119+
"The AI provider rejected CodeCrow's tool-calling request: "
120+
f"{provider_message}"
121+
)
122+
return f"The AI provider rejected the request: {provider_message}"
27123

28124
# AI provider quota/rate limit errors
29125
if any(term in error_lower for term in ["quota", "rate limit", "rate_limit", "429", "exceeded", "too many requests"]):
@@ -135,10 +231,7 @@ def sanitize_error_for_display(error_message: str) -> str:
135231

136232
# If it looks safe, return a cleaned version
137233
# Remove any potential API keys or tokens
138-
sanitized = re.sub(r'sk-[a-zA-Z0-9]{20,}', '[API_KEY_REDACTED]', error_message)
139-
sanitized = re.sub(r'api[_-]?key["\s:=]+["\']?[a-zA-Z0-9-_]+["\']?', '[API_KEY_REDACTED]', sanitized, flags=re.IGNORECASE)
140-
141-
return sanitized
234+
return _redact_sensitive(error_message)
142235

143236

144237
def create_user_friendly_error(error: Exception) -> str:

python-ecosystem/inference-orchestrator/tests/test_error_sanitizer.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,25 @@ def test_json_structure(self):
102102
result = sanitize_error_for_display('{"error": "something"}')
103103
assert "error occurred" in result.lower()
104104

105+
def test_provider_error_message_extracted(self):
106+
msg = (
107+
"Error code: 400 - {'errors': [{'message': "
108+
"'Request body has an unknown field: parallel_tool_calls'}], "
109+
"'success': False}"
110+
)
111+
result = sanitize_error_for_display(msg)
112+
assert "tool-calling" in result.lower()
113+
assert "parallel_tool_calls" in result
114+
115+
def test_provider_error_message_redacts_keys(self):
116+
msg = (
117+
"Error code: 401 - {'error': {'message': "
118+
"'Invalid API key sk-abcdefghijklmnopqrstuvwxyz1234'}}"
119+
)
120+
result = sanitize_error_for_display(msg)
121+
assert "sk-" not in result
122+
assert "REDACTED" in result
123+
105124
# Very long message
106125
def test_long_message_truncated(self):
107126
result = sanitize_error_for_display("A" * 300)

python-ecosystem/inference-orchestrator/tests/test_llm_factory.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ def test_coerce_content_blocks_to_text(self):
242242

243243
def test_normalize_cloudflare_payload_content_blocks_and_tool_calls(self):
244244
payload = {
245+
"parallel_tool_calls": False,
245246
"messages": [
246247
{"role": "system", "content": [{"type": "text", "text": "sys"}]},
247248
{"role": "user", "content": [{"type": "text", "text": "question"}]},
@@ -260,6 +261,7 @@ def test_normalize_cloudflare_payload_content_blocks_and_tool_calls(self):
260261
assert normalized["messages"][1]["content"] == "question"
261262
assert normalized["messages"][2]["content"] is None
262263
assert normalized["messages"][3]["content"] == "result"
264+
assert "parallel_tool_calls" not in normalized
263265

264266

265267
# ── Constants ────────────────────────────────────────────────────

0 commit comments

Comments
 (0)