1010import os
1111import re
1212import sys
13+ import time
1314import urllib .error
1415import urllib .request
1516
1617GITHUB_API = "https://api.github.com"
17- INFERENCE_API = "https://models.inference .ai.azure.com "
18+ INFERENCE_API = "https://models.github .ai/inference "
1819MODEL = "gpt-4o-mini"
1920
20- TRIAGE_LABELS = {"bug" , "Missing Feature" , "question" , "enhancement" , "breaking-change" }
21+ TRIAGE_LABELS = {"bug" , "Missing Feature" , "question" }
2122
2223SYSTEM_PROMPT = """\
2324 You are an issue triage bot for TorchSharp, a .NET binding for PyTorch.
2627- bug: Something is broken, crashes, throws an unexpected error, or produces wrong results.
2728- Missing Feature: A PyTorch API or feature that is not yet available in TorchSharp.
2829- question: The user is asking for help, guidance, or clarification on how to use TorchSharp.
29- - enhancement: A suggestion to improve existing functionality (not a missing PyTorch API).
30- - breaking-change: The issue reports or requests a change that would break existing public API.
3130
3231Respond with ONLY a JSON object in this exact format, no other text:
33- {"label": "<one of: bug, Missing Feature, question, enhancement, breaking-change >", "reason": "<one sentence explanation>"}
32+ {"label": "<one of: bug, Missing Feature, question>", "reason": "<one sentence explanation>"}
3433"""
3534
3635COMMENT_TEMPLATES = {
5554 "Please make sure to include the TorchSharp version and a code sample for context.\n \n "
5655 "*This comment was generated automatically by the issue triage bot.*"
5756 ),
58- "enhancement" : (
59- "Thank you for the suggestion! 🙏\n \n "
60- "I've triaged this as an **enhancement** request. {reason}\n \n "
61- "A maintainer will review this when they get a chance.\n \n "
62- "*This comment was generated automatically by the issue triage bot.*"
63- ),
64- "breaking-change" : (
65- "Thank you for reporting this! 🙏\n \n "
66- "I've triaged this as a potential **breaking change**. {reason}\n \n "
67- "A maintainer will review this carefully.\n \n "
68- "*This comment was generated automatically by the issue triage bot.*"
69- ),
7057}
7158
7259
@@ -91,9 +78,27 @@ def github_request(method, path, body=None):
9178
9279def sanitize_reason (reason ):
9380 """Sanitize LLM-generated reason to prevent markdown injection."""
81+ # Limit length to avoid excessively long comments.
9482 reason = reason [:200 ]
95- reason = re .sub (r"\[([^\]]+)\]\([^\)]+\)" , r"\1" , reason ) # Strip links
96- reason = re .sub (r"!\[([^\]]*)\]\([^\)]+\)" , "" , reason ) # Strip images
83+
84+ # Strip markdown links: [text](url) -> text
85+ reason = re .sub (r"\[([^\]]+)\]\([^\)]+\)" , r"\1" , reason )
86+
87+ # Strip markdown images entirely:  -> ""
88+ reason = re .sub (r"!\[([^\]]*)\]\([^\)]+\)" , "" , reason )
89+
90+ # Remove fenced code blocks with triple backticks to prevent block injection.
91+ reason = re .sub (r"```.*?```" , "" , reason , flags = re .DOTALL )
92+
93+ # Remove any remaining standalone backticks used for inline code.
94+ reason = reason .replace ("`" , "" )
95+
96+ # Strip simple HTML tags such as <script>, <b>, etc.
97+ reason = re .sub (r"<[^>]+>" , "" , reason )
98+
99+ # Escape markdown special characters so the text is rendered literally.
100+ reason = re .sub (r"([\\*_{}\[\]()>#+\-!])" , r"\\\1" , reason )
101+
97102 return reason .strip ()
98103
99104
@@ -115,29 +120,75 @@ def classify_issue(title, body):
115120 }
116121
117122 data = json .dumps (payload ).encode ()
118- req = urllib .request .Request (
119- f"{ INFERENCE_API } /chat/completions" , data = data , method = "POST"
120- )
121- req .add_header ("Authorization" , f"Bearer { token } " )
122- req .add_header ("Content-Type" , "application/json" )
123-
124- try :
125- with urllib .request .urlopen (req , timeout = 60 ) as resp :
126- result = json .loads (resp .read ())
127- except urllib .error .HTTPError as e :
128- error_body = e .read ().decode (errors = "replace" ) if e .fp else ""
129- raise RuntimeError (f"LLM API call failed ({ e .code } ): { error_body } " ) from e
130123
131- content = result ["choices" ][0 ]["message" ]["content" ].strip ()
124+ # Retry with exponential backoff
125+ max_retries = 3
126+ result = None
127+ for attempt in range (max_retries ):
128+ req = urllib .request .Request (
129+ f"{ INFERENCE_API } /chat/completions" , data = data , method = "POST"
130+ )
131+ req .add_header ("Authorization" , f"Bearer { token } " )
132+ req .add_header ("Content-Type" , "application/json" )
133+ try :
134+ with urllib .request .urlopen (req , timeout = 60 ) as resp :
135+ result = json .loads (resp .read ())
136+ break
137+ except (urllib .error .HTTPError , urllib .error .URLError , TimeoutError ) as e :
138+ if attempt < max_retries - 1 :
139+ wait = 2 ** attempt
140+ print (f"::warning::LLM API attempt { attempt + 1 } failed ({ e } ); retrying in { wait } s" )
141+ time .sleep (wait )
142+ else :
143+ error_detail = ""
144+ if isinstance (e , urllib .error .HTTPError ) and e .fp :
145+ error_detail = e .read ().decode (errors = "replace" )
146+ raise RuntimeError (f"LLM API call failed after { max_retries } attempts: { error_detail or e } " ) from e
147+
148+ # Validate response structure
149+ if not isinstance (result , dict ):
150+ raise RuntimeError ("LLM API returned unexpected response format: top-level JSON is not an object." )
151+
152+ choices = result .get ("choices" )
153+ if not isinstance (choices , list ) or not choices :
154+ raise RuntimeError ("LLM API returned unexpected response format: missing or empty 'choices' array." )
155+
156+ first_choice = choices [0 ]
157+ if not isinstance (first_choice , dict ):
158+ raise RuntimeError ("LLM API returned unexpected response format: first choice is not an object." )
159+
160+ message = first_choice .get ("message" )
161+ if not isinstance (message , dict ):
162+ raise RuntimeError ("LLM API returned unexpected response format: missing or invalid 'message' in first choice." )
163+
164+ content = message .get ("content" )
165+ if not isinstance (content , str ) or not content .strip ():
166+ raise RuntimeError ("LLM API returned unexpected response format: missing or invalid 'content' in message." )
167+
168+ content = content .strip ()
132169
133170 # Parse the JSON response, stripping markdown fences if present
134171 json_match = re .search (r"\{.*\}" , content , re .DOTALL )
135172 if json_match :
136173 content = json_match .group (0 )
137174
138- parsed = json .loads (content )
139- label = parsed ["label" ]
140- reason = sanitize_reason (parsed .get ("reason" , "" ))
175+ try :
176+ parsed = json .loads (content )
177+ except (json .JSONDecodeError , TypeError ) as e :
178+ print (f"::warning::Failed to parse LLM JSON response ({ e } ); defaulting to 'question'" )
179+ return "question" , "Could not parse LLM response; defaulting to 'question'."
180+
181+ if not isinstance (parsed , dict ):
182+ print ("::warning::LLM JSON content is not an object, defaulting to 'question'" )
183+ return "question" , "Could not determine the issue type."
184+
185+ label = parsed .get ("label" )
186+ if not isinstance (label , str ) or not label :
187+ print ("::warning::LLM response missing 'label', defaulting to 'question'" )
188+ label = "question"
189+
190+ reason_raw = parsed .get ("reason" , "" )
191+ reason = sanitize_reason (reason_raw if isinstance (reason_raw , str ) else "" )
141192
142193 if label not in TRIAGE_LABELS :
143194 print (f"::warning::LLM returned unknown label '{ label } ', defaulting to 'question'" )
0 commit comments