Skip to content

Commit 14c660c

Browse files
Address PR #1535 review comments
- Improve sanitize_reason to strip code blocks, backticks, HTML tags, and escape markdown special characters - Remove non-existent labels (enhancement, breaking-change) from TRIAGE_LABELS and SYSTEM_PROMPT to match repo issue templates - Add robust JSON parsing with try-except fallback to 'question' - Add retry with exponential backoff (3 attempts) for LLM API calls - Validate LLM response structure defensively before accessing fields - Handle missing 'label' key with parsed.get and fallback - Add timeout-minutes: 10 to workflow job - Add models: read permission for GitHub Models API access - Fix inference API endpoint to models.github.ai Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 9f68c00 commit 14c660c

File tree

2 files changed

+88
-35
lines changed

2 files changed

+88
-35
lines changed

.github/scripts/triage-issue.py

Lines changed: 86 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,15 @@
1010
import os
1111
import re
1212
import sys
13+
import time
1314
import urllib.error
1415
import urllib.request
1516

1617
GITHUB_API = "https://api.github.com"
17-
INFERENCE_API = "https://models.inference.ai.azure.com"
18+
INFERENCE_API = "https://models.github.ai/inference"
1819
MODEL = "gpt-4o-mini"
1920

20-
TRIAGE_LABELS = {"bug", "Missing Feature", "question", "enhancement", "breaking-change"}
21+
TRIAGE_LABELS = {"bug", "Missing Feature", "question"}
2122

2223
SYSTEM_PROMPT = """\
2324
You are an issue triage bot for TorchSharp, a .NET binding for PyTorch.
@@ -26,11 +27,9 @@
2627
- bug: Something is broken, crashes, throws an unexpected error, or produces wrong results.
2728
- Missing Feature: A PyTorch API or feature that is not yet available in TorchSharp.
2829
- question: The user is asking for help, guidance, or clarification on how to use TorchSharp.
29-
- enhancement: A suggestion to improve existing functionality (not a missing PyTorch API).
30-
- breaking-change: The issue reports or requests a change that would break existing public API.
3130
3231
Respond with ONLY a JSON object in this exact format, no other text:
33-
{"label": "<one of: bug, Missing Feature, question, enhancement, breaking-change>", "reason": "<one sentence explanation>"}
32+
{"label": "<one of: bug, Missing Feature, question>", "reason": "<one sentence explanation>"}
3433
"""
3534

3635
COMMENT_TEMPLATES = {
@@ -55,18 +54,6 @@
5554
"Please make sure to include the TorchSharp version and a code sample for context.\n\n"
5655
"*This comment was generated automatically by the issue triage bot.*"
5756
),
58-
"enhancement": (
59-
"Thank you for the suggestion! 🙏\n\n"
60-
"I've triaged this as an **enhancement** request. {reason}\n\n"
61-
"A maintainer will review this when they get a chance.\n\n"
62-
"*This comment was generated automatically by the issue triage bot.*"
63-
),
64-
"breaking-change": (
65-
"Thank you for reporting this! 🙏\n\n"
66-
"I've triaged this as a potential **breaking change**. {reason}\n\n"
67-
"A maintainer will review this carefully.\n\n"
68-
"*This comment was generated automatically by the issue triage bot.*"
69-
),
7057
}
7158

7259

@@ -91,9 +78,27 @@ def github_request(method, path, body=None):
9178

9279
def sanitize_reason(reason):
9380
"""Sanitize LLM-generated reason to prevent markdown injection."""
81+
# Limit length to avoid excessively long comments.
9482
reason = reason[:200]
95-
reason = re.sub(r"\[([^\]]+)\]\([^\)]+\)", r"\1", reason) # Strip links
96-
reason = re.sub(r"!\[([^\]]*)\]\([^\)]+\)", "", reason) # Strip images
83+
84+
# Strip markdown links: [text](url) -> text
85+
reason = re.sub(r"\[([^\]]+)\]\([^\)]+\)", r"\1", reason)
86+
87+
# Strip markdown images entirely: ![alt](url) -> ""
88+
reason = re.sub(r"!\[([^\]]*)\]\([^\)]+\)", "", reason)
89+
90+
# Remove fenced code blocks with triple backticks to prevent block injection.
91+
reason = re.sub(r"```.*?```", "", reason, flags=re.DOTALL)
92+
93+
# Remove any remaining standalone backticks used for inline code.
94+
reason = reason.replace("`", "")
95+
96+
# Strip simple HTML tags such as <script>, <b>, etc.
97+
reason = re.sub(r"<[^>]+>", "", reason)
98+
99+
# Escape markdown special characters so the text is rendered literally.
100+
reason = re.sub(r"([\\*_{}\[\]()>#+\-!])", r"\\\1", reason)
101+
97102
return reason.strip()
98103

99104

@@ -115,29 +120,75 @@ def classify_issue(title, body):
115120
}
116121

117122
data = json.dumps(payload).encode()
118-
req = urllib.request.Request(
119-
f"{INFERENCE_API}/chat/completions", data=data, method="POST"
120-
)
121-
req.add_header("Authorization", f"Bearer {token}")
122-
req.add_header("Content-Type", "application/json")
123-
124-
try:
125-
with urllib.request.urlopen(req, timeout=60) as resp:
126-
result = json.loads(resp.read())
127-
except urllib.error.HTTPError as e:
128-
error_body = e.read().decode(errors="replace") if e.fp else ""
129-
raise RuntimeError(f"LLM API call failed ({e.code}): {error_body}") from e
130123

131-
content = result["choices"][0]["message"]["content"].strip()
124+
# Retry with exponential backoff
125+
max_retries = 3
126+
result = None
127+
for attempt in range(max_retries):
128+
req = urllib.request.Request(
129+
f"{INFERENCE_API}/chat/completions", data=data, method="POST"
130+
)
131+
req.add_header("Authorization", f"Bearer {token}")
132+
req.add_header("Content-Type", "application/json")
133+
try:
134+
with urllib.request.urlopen(req, timeout=60) as resp:
135+
result = json.loads(resp.read())
136+
break
137+
except (urllib.error.HTTPError, urllib.error.URLError, TimeoutError) as e:
138+
if attempt < max_retries - 1:
139+
wait = 2 ** attempt
140+
print(f"::warning::LLM API attempt {attempt + 1} failed ({e}); retrying in {wait}s")
141+
time.sleep(wait)
142+
else:
143+
error_detail = ""
144+
if isinstance(e, urllib.error.HTTPError) and e.fp:
145+
error_detail = e.read().decode(errors="replace")
146+
raise RuntimeError(f"LLM API call failed after {max_retries} attempts: {error_detail or e}") from e
147+
148+
# Validate response structure
149+
if not isinstance(result, dict):
150+
raise RuntimeError("LLM API returned unexpected response format: top-level JSON is not an object.")
151+
152+
choices = result.get("choices")
153+
if not isinstance(choices, list) or not choices:
154+
raise RuntimeError("LLM API returned unexpected response format: missing or empty 'choices' array.")
155+
156+
first_choice = choices[0]
157+
if not isinstance(first_choice, dict):
158+
raise RuntimeError("LLM API returned unexpected response format: first choice is not an object.")
159+
160+
message = first_choice.get("message")
161+
if not isinstance(message, dict):
162+
raise RuntimeError("LLM API returned unexpected response format: missing or invalid 'message' in first choice.")
163+
164+
content = message.get("content")
165+
if not isinstance(content, str) or not content.strip():
166+
raise RuntimeError("LLM API returned unexpected response format: missing or invalid 'content' in message.")
167+
168+
content = content.strip()
132169

133170
# Parse the JSON response, stripping markdown fences if present
134171
json_match = re.search(r"\{.*\}", content, re.DOTALL)
135172
if json_match:
136173
content = json_match.group(0)
137174

138-
parsed = json.loads(content)
139-
label = parsed["label"]
140-
reason = sanitize_reason(parsed.get("reason", ""))
175+
try:
176+
parsed = json.loads(content)
177+
except (json.JSONDecodeError, TypeError) as e:
178+
print(f"::warning::Failed to parse LLM JSON response ({e}); defaulting to 'question'")
179+
return "question", "Could not parse LLM response; defaulting to 'question'."
180+
181+
if not isinstance(parsed, dict):
182+
print("::warning::LLM JSON content is not an object, defaulting to 'question'")
183+
return "question", "Could not determine the issue type."
184+
185+
label = parsed.get("label")
186+
if not isinstance(label, str) or not label:
187+
print("::warning::LLM response missing 'label', defaulting to 'question'")
188+
label = "question"
189+
190+
reason_raw = parsed.get("reason", "")
191+
reason = sanitize_reason(reason_raw if isinstance(reason_raw, str) else "")
141192

142193
if label not in TRIAGE_LABELS:
143194
print(f"::warning::LLM returned unknown label '{label}', defaulting to 'question'")

.github/workflows/issue-triage.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@ on:
1212

1313
permissions:
1414
issues: write
15+
models: read
1516

1617
jobs:
1718
triage:
1819
runs-on: ubuntu-latest
20+
timeout-minutes: 10
1921
steps:
2022
- uses: actions/checkout@v4
2123
with:

0 commit comments

Comments
 (0)