1+ """Format PR comments for review context in prompts."""
2+
3+ from typing import Dict , Any , List
4+ from datetime import datetime
5+
6+ from claudecode .logger import get_logger
7+
8+ logger = get_logger (__name__ )
9+
10+ # Maximum characters for review context to avoid prompt bloat
11+ MAX_CONTEXT_CHARS = 15000
12+ # Maximum replies to include per thread
13+ MAX_REPLIES_PER_THREAD = 5
14+ # Bot comment marker pattern
15+ BOT_COMMENT_MARKER = "🤖 **Code Review Finding:"
16+
17+
18+ def is_bot_comment (comment : Dict [str , Any ]) -> bool :
19+ """Check if a comment was posted by this bot.
20+
21+ Args:
22+ comment: Comment dictionary from GitHub API.
23+
24+ Returns:
25+ True if this is a bot review comment.
26+ """
27+ body = comment .get ('body' , '' )
28+
29+ # Primary check: look for our marker
30+ if BOT_COMMENT_MARKER in body :
31+ return True
32+
33+ # Secondary check: user type is Bot
34+ user = comment .get ('user' , {})
35+ if user .get ('type' ) == 'Bot' :
36+ return True
37+
38+ return False
39+
40+
41+ def format_pr_comments_for_prompt (
42+ bot_comment_threads : List [Dict [str , Any ]],
43+ ) -> str :
44+ """Format bot comment threads as review context for the prompt.
45+
46+ Takes pre-built thread structures and formats them for Claude to consider
47+ during re-review.
48+
49+ Args:
50+ bot_comment_threads: List of thread dicts, each containing:
51+ - 'bot_comment': The original bot finding comment
52+ - 'replies': List of user reply comments
53+ - 'reactions': Dict of reaction counts (e.g., {'+1': 2, '-1': 1})
54+
55+ Returns:
56+ Formatted string with previous review threads, or empty string if none.
57+ """
58+ if not bot_comment_threads :
59+ logger .info ("No bot comment threads to format" )
60+ return ""
61+
62+ # Apply reply truncation
63+ threads = _truncate_replies (bot_comment_threads )
64+
65+ logger .info (f"Formatting { len (threads )} bot comment thread(s)" )
66+ return _format_threads_for_prompt (threads )
67+
68+
69+ def _truncate_replies (threads : List [Dict [str , Any ]]) -> List [Dict [str , Any ]]:
70+ """Truncate long reply threads to keep prompt size manageable.
71+
72+ Args:
73+ threads: List of thread dictionaries.
74+
75+ Returns:
76+ Threads with replies truncated to MAX_REPLIES_PER_THREAD.
77+ """
78+ result = []
79+ for thread in threads :
80+ thread_copy = dict (thread )
81+ replies = thread_copy .get ('replies' , [])
82+
83+ if len (replies ) > MAX_REPLIES_PER_THREAD :
84+ truncated_count = len (replies ) - MAX_REPLIES_PER_THREAD
85+ thread_copy ['replies' ] = replies [- MAX_REPLIES_PER_THREAD :]
86+ thread_copy ['truncated_replies' ] = truncated_count
87+
88+ result .append (thread_copy )
89+
90+ return result
91+
92+
93+ def _parse_bot_comment (body : str ) -> Dict [str , str ]:
94+ """Parse structured data from bot comment body.
95+
96+ Args:
97+ body: The comment body text.
98+
99+ Returns:
100+ Dictionary with extracted fields (title, severity, category, etc.)
101+ """
102+ result = {
103+ 'title' : '' ,
104+ 'severity' : '' ,
105+ 'category' : '' ,
106+ 'impact' : '' ,
107+ 'recommendation' : ''
108+ }
109+
110+ # Extract title from "🤖 **Code Review Finding: {title}**"
111+ if BOT_COMMENT_MARKER in body :
112+ start = body .find (BOT_COMMENT_MARKER ) + len (BOT_COMMENT_MARKER )
113+ end = body .find ('**' , start )
114+ if end > start :
115+ result ['title' ] = body [start :end ].strip ()
116+
117+ # Extract severity
118+ if '**Severity:**' in body :
119+ start = body .find ('**Severity:**' ) + len ('**Severity:**' )
120+ end = body .find ('\n ' , start )
121+ if end > start :
122+ result ['severity' ] = body [start :end ].strip ()
123+
124+ # Extract category
125+ if '**Category:**' in body :
126+ start = body .find ('**Category:**' ) + len ('**Category:**' )
127+ end = body .find ('\n ' , start )
128+ if end > start :
129+ result ['category' ] = body [start :end ].strip ()
130+
131+ # Extract impact
132+ if '**Impact:**' in body :
133+ start = body .find ('**Impact:**' ) + len ('**Impact:**' )
134+ end = body .find ('\n \n ' , start )
135+ if end == - 1 :
136+ end = body .find ('**Recommendation:**' , start )
137+ if end > start :
138+ result ['impact' ] = body [start :end ].strip ()
139+
140+ # Extract recommendation
141+ if '**Recommendation:**' in body :
142+ start = body .find ('**Recommendation:**' ) + len ('**Recommendation:**' )
143+ end = body .find ('\n \n ' , start )
144+ if end == - 1 :
145+ end = body .find ('```' , start ) # Code suggestion block
146+ if end == - 1 :
147+ end = len (body )
148+ if end > start :
149+ result ['recommendation' ] = body [start :end ].strip ()
150+
151+ return result
152+
153+
154+ def _format_timestamp (iso_timestamp : str ) -> str :
155+ """Format ISO timestamp to readable format.
156+
157+ Args:
158+ iso_timestamp: ISO 8601 timestamp string.
159+
160+ Returns:
161+ Human-readable timestamp.
162+ """
163+ try :
164+ dt = datetime .fromisoformat (iso_timestamp .replace ('Z' , '+00:00' ))
165+ return dt .strftime ('%Y-%m-%d %H:%M UTC' )
166+ except (ValueError , AttributeError ):
167+ return iso_timestamp
168+
169+
170+ def _format_threads_for_prompt (threads : List [Dict [str , Any ]]) -> str :
171+ """Format threads as readable text for Claude's prompt.
172+
173+ Args:
174+ threads: List of thread dictionaries.
175+
176+ Returns:
177+ Formatted string for inclusion in prompt.
178+ """
179+ if not threads :
180+ return ""
181+
182+ lines = [
183+ "" ,
184+ "═" * 65 ,
185+ "PREVIOUS REVIEW CONTEXT" ,
186+ "═" * 65 ,
187+ "" ,
188+ "The following findings were raised in previous reviews of this PR." ,
189+ "Review user responses to determine if issues should be re-raised." ,
190+ ""
191+ ]
192+
193+ for i , thread in enumerate (threads , 1 ):
194+ bot_comment = thread ['bot_comment' ]
195+ replies = thread ['replies' ]
196+ reactions = thread .get ('reactions' , {})
197+
198+ # Get file and line info
199+ file_path = bot_comment .get ('path' , 'unknown' )
200+ line = bot_comment .get ('line' ) or bot_comment .get ('original_line' , '?' )
201+
202+ # Parse bot comment structure
203+ parsed = _parse_bot_comment (bot_comment .get ('body' , '' ))
204+
205+ # Format thread header
206+ lines .append (f"THREAD { i } - { file_path } :{ line } " )
207+ lines .append ("─" * 65 )
208+
209+ # Bot finding
210+ timestamp = _format_timestamp (bot_comment .get ('created_at' , '' ))
211+ lines .append (f"Bot Finding ({ timestamp } )" )
212+
213+ if parsed ['severity' ]:
214+ lines .append (f" Severity: { parsed ['severity' ]} " )
215+ if parsed ['category' ]:
216+ lines .append (f" Category: { parsed ['category' ]} " )
217+ if parsed ['title' ]:
218+ lines .append (f" Title: { parsed ['title' ]} " )
219+ if parsed ['impact' ]:
220+ lines .append (f" Impact: { parsed ['impact' ][:500 ]} ..." ) # Truncate long impacts
221+ if parsed ['recommendation' ]:
222+ lines .append (f" Recommendation: { parsed ['recommendation' ][:500 ]} ..." )
223+
224+ # Add user reactions (excluding bot's own reactions)
225+ if reactions :
226+ thumbs_up = reactions .get ('+1' , 0 )
227+ thumbs_down = reactions .get ('-1' , 0 )
228+ other_reactions = {k : v for k , v in reactions .items () if k not in ['+1' , '-1' ]}
229+
230+ reaction_parts = []
231+ if thumbs_up > 0 :
232+ reaction_parts .append (f"👍 { thumbs_up } " )
233+ if thumbs_down > 0 :
234+ reaction_parts .append (f"👎 { thumbs_down } " )
235+ for reaction , count in other_reactions .items ():
236+ reaction_parts .append (f"{ reaction } { count } " )
237+
238+ if reaction_parts :
239+ lines .append (f" User Reactions: { ', ' .join (reaction_parts )} " )
240+
241+ lines .append ("" )
242+
243+ # Truncation notice
244+ if thread .get ('truncated_replies' ):
245+ lines .append (f" ({ thread ['truncated_replies' ]} earlier replies omitted)" )
246+ lines .append ("" )
247+
248+ # User replies
249+ for reply in replies :
250+ user = reply .get ('user' , {}).get ('login' , 'unknown' )
251+ reply_timestamp = _format_timestamp (reply .get ('created_at' , '' ))
252+ reply_body = reply .get ('body' , '' ).strip ()
253+
254+ # Truncate very long replies
255+ if len (reply_body ) > 1000 :
256+ reply_body = reply_body [:1000 ] + "... (truncated)"
257+
258+ lines .append (f"User Reply ({ user } , { reply_timestamp } ):" )
259+ # Indent reply text
260+ for reply_line in reply_body .split ('\n ' ):
261+ lines .append (f" { reply_line } " )
262+ lines .append ("" )
263+
264+ lines .append ("─" * 65 )
265+ lines .append ("" )
266+
267+ # Add instructions for re-review
268+ lines .extend ([
269+ "═" * 65 ,
270+ "INSTRUCTIONS FOR RE-REVIEW" ,
271+ "═" * 65 ,
272+ "" ,
273+ "When reviewing this PR with the above context:" ,
274+ "" ,
275+ "1. CHECK IF ISSUES WERE ADDRESSED: Compare previous findings against" ,
276+ " the current diff. If code was changed to fix the issue, do NOT" ,
277+ " re-raise it." ,
278+ "" ,
279+ "2. EVALUATE USER RESPONSES: Read user replies carefully. Valid" ,
280+ " dismissals include:" ,
281+ " - Demonstrating the issue is a false positive with evidence" ,
282+ " - Showing existing mitigations the bot missed" ,
283+ " - Providing strong technical justification" ,
284+ "" ,
285+ " Invalid dismissals include:" ,
286+ " - \" We'll fix this later\" (without code change)" ,
287+ " - Misunderstanding the vulnerability/issue" ,
288+ " - Ignoring the issue without explanation" ,
289+ "" ,
290+ "3. CONSIDER USER REACTIONS: Reactions provide additional signal:" ,
291+ " - 👎 (thumbs down) suggests users found the finding unhelpful" ,
292+ " - 👍 (thumbs up) suggests users found the finding valuable" ,
293+ " - High 👎 count with no reply may indicate obvious false positive" ,
294+ " - Use reactions as one input, but prioritize reply content" ,
295+ "" ,
296+ "4. RE-RAISE WHEN APPROPRIATE: If an issue was invalidly dismissed" ,
297+ " or remains unaddressed, re-raise it with:" ,
298+ " - Reference to the previous discussion" ,
299+ " - Response to the user's dismissal reasoning" ,
300+ " - Updated title: \" [Issue Title] (previously raised)\" " ,
301+ "" ,
302+ "5. DO NOT REPEAT RESOLVED ISSUES: If code was changed to address" ,
303+ " a finding, do not mention it unless the fix is incomplete." ,
304+ "" ,
305+ "═" * 65 ,
306+ ""
307+ ])
308+
309+ result = '\n ' .join (lines )
310+
311+ # Truncate if too long
312+ if len (result ) > MAX_CONTEXT_CHARS :
313+ logger .warning (f"Review context truncated from { len (result )} to { MAX_CONTEXT_CHARS } chars" )
314+ result = result [:MAX_CONTEXT_CHARS ] + "\n \n (Review context truncated due to length)\n "
315+
316+ return result
0 commit comments