@@ -208,39 +208,38 @@ def get_code_optimization_context(
208208 )
209209
210210
211- def get_code_optimization_context_for_language (
212- function_to_optimize : FunctionToOptimize ,
213- project_root_path : Path ,
214- optim_token_limit : int = OPTIMIZATION_CONTEXT_TOKEN_LIMIT ,
215- testgen_token_limit : int = TESTGEN_CONTEXT_TOKEN_LIMIT ,
216- ) -> CodeOptimizationContext :
217- """Extract code optimization context for non-Python languages.
211+ def _strip_javadoc_comments (source : str ) -> str :
212+ """Strip Javadoc (/** ... */) comments from Java source code.
218213
219- Uses the language support abstraction to extract code context and converts
220- it to the CodeOptimizationContext format expected by the pipeline.
214+ Preserves single-line comments (//) and regular block comments (/* ... */).
215+ """
216+ import re
221217
222- This function supports multi-file context extraction, grouping helpers by file
223- and creating proper CodeStringsMarkdown with file paths for multi-file replacement.
218+ return re .sub (r"/\*\*.*?\*/\s*" , "" , source , flags = re .DOTALL )
219+
220+
221+ def _build_code_strings_for_language (
222+ code_context ,
223+ function_to_optimize : FunctionToOptimize ,
224+ project_root_path : Path ,
225+ include_cross_file_helpers : bool = True ,
226+ strip_javadoc : bool = False ,
227+ include_same_file_helpers : bool = True ,
228+ ) -> tuple [list [CodeString ], list [FunctionSource ], str ]:
229+ """Build CodeString list from a CodeContext with configurable reduction.
224230
225231 Args:
226- function_to_optimize: The function to extract context for.
232+ code_context: CodeContext from language support.
233+ function_to_optimize: The target function.
227234 project_root_path: Root of the project.
228- optim_token_limit: Token limit for optimization context.
229- testgen_token_limit: Token limit for testgen context.
235+ include_cross_file_helpers: Whether to include helpers from other files.
236+ strip_javadoc: Whether to strip Javadoc comments from all code.
237+ include_same_file_helpers: Whether to include same-file helper methods.
230238
231239 Returns:
232- CodeOptimizationContext with target code and dependencies .
240+ Tuple of (code_strings, helper_function_sources, read_only_context) .
233241
234242 """
235- from codeflash .languages import get_language_support
236-
237- # Get language support for this function
238- language = Language (function_to_optimize .language )
239- lang_support = get_language_support (language )
240-
241- # Extract code context using language support
242- code_context = lang_support .extract_code_context (function_to_optimize , project_root_path , project_root_path )
243-
244243 # Build imports string if available
245244 imports_code = "\n " .join (code_context .imports ) if code_context .imports else ""
246245
@@ -251,82 +250,194 @@ def get_code_optimization_context_for_language(
251250 target_relative_path = function_to_optimize .file_path
252251
253252 # Group helpers by file path
254- helpers_by_file : dict [Path , list [ HelperFunction ] ] = defaultdict (list )
253+ helpers_by_file : dict [Path , list ] = defaultdict (list )
255254 helper_function_sources = []
256255
257256 for helper in code_context .helper_functions :
258257 helpers_by_file [helper .file_path ].append (helper )
259258
260259 # Convert to FunctionSource for pipeline compatibility
261- helper_function_sources .append (
262- FunctionSource (
263- file_path = helper .file_path ,
264- qualified_name = helper .qualified_name ,
265- fully_qualified_name = helper .qualified_name ,
266- only_function_name = helper .name ,
267- source_code = helper .source_code ,
268- jedi_definition = None ,
269- )
260+ should_include = (
261+ (helper .file_path == function_to_optimize .file_path and include_same_file_helpers )
262+ or (helper .file_path != function_to_optimize .file_path and include_cross_file_helpers )
270263 )
264+ if should_include :
265+ helper_function_sources .append (
266+ FunctionSource (
267+ file_path = helper .file_path ,
268+ qualified_name = helper .qualified_name ,
269+ fully_qualified_name = helper .qualified_name ,
270+ only_function_name = helper .name ,
271+ source_code = helper .source_code ,
272+ jedi_definition = None ,
273+ )
274+ )
271275
272- # Build read-writable code (target file + same-file helpers + global variables )
273- read_writable_code_strings = []
276+ # Build read-writable code (target file + same-file helpers)
277+ code_strings = []
274278
275279 # Combine target code with same-file helpers
276280 target_file_code = code_context .target_code
277- same_file_helpers = helpers_by_file .get (function_to_optimize .file_path , [])
278- if same_file_helpers :
279- helper_code = "\n \n " .join (h .source_code for h in same_file_helpers )
280- target_file_code = target_file_code + "\n \n " + helper_code
281-
282- # Note: code_context.read_only_context contains type definitions and global variables
283- # These should be passed as read-only context to the AI, not prepended to the target code
284- # If prepended to target code, the AI treats them as code to optimize and includes them in output
281+ if include_same_file_helpers :
282+ same_file_helpers = helpers_by_file .get (function_to_optimize .file_path , [])
283+ if same_file_helpers :
284+ helper_code = "\n \n " .join (h .source_code for h in same_file_helpers )
285+ target_file_code = target_file_code + "\n \n " + helper_code
285286
286287 # Add imports to target file code
287288 if imports_code :
288289 target_file_code = imports_code + "\n \n " + target_file_code
289290
290- read_writable_code_strings .append (
291+ if strip_javadoc :
292+ target_file_code = _strip_javadoc_comments (target_file_code )
293+
294+ code_strings .append (
291295 CodeString (code = target_file_code , file_path = target_relative_path , language = function_to_optimize .language )
292296 )
293297
294298 # Add helper files (cross-file helpers)
295- for file_path , file_helpers in helpers_by_file .items ():
296- if file_path == function_to_optimize .file_path :
297- continue # Already included in target file
299+ if include_cross_file_helpers :
300+ for file_path , file_helpers in helpers_by_file .items ():
301+ if file_path == function_to_optimize .file_path :
302+ continue # Already included in target file
298303
299- try :
300- helper_relative_path = file_path .resolve ().relative_to (project_root_path .resolve ())
301- except ValueError :
302- helper_relative_path = file_path
304+ try :
305+ helper_relative_path = file_path .resolve ().relative_to (project_root_path .resolve ())
306+ except ValueError :
307+ helper_relative_path = file_path
308+
309+ combined_helper_code = "\n \n " .join (h .source_code for h in file_helpers )
310+ if strip_javadoc :
311+ combined_helper_code = _strip_javadoc_comments (combined_helper_code )
312+
313+ code_strings .append (
314+ CodeString (
315+ code = combined_helper_code ,
316+ file_path = helper_relative_path ,
317+ language = function_to_optimize .language ,
318+ )
319+ )
303320
304- # Combine all helpers from this file
305- combined_helper_code = "\n \n " .join (h .source_code for h in file_helpers )
321+ read_only_context = code_context .read_only_context
322+ if strip_javadoc and read_only_context :
323+ read_only_context = _strip_javadoc_comments (read_only_context )
306324
307- read_writable_code_strings .append (
308- CodeString (
309- code = combined_helper_code , file_path = helper_relative_path , language = function_to_optimize .language
310- )
325+ return code_strings , helper_function_sources , read_only_context
326+
327+
328+ def get_code_optimization_context_for_language (
329+ function_to_optimize : FunctionToOptimize ,
330+ project_root_path : Path ,
331+ optim_token_limit : int = OPTIMIZATION_CONTEXT_TOKEN_LIMIT ,
332+ testgen_token_limit : int = TESTGEN_CONTEXT_TOKEN_LIMIT ,
333+ ) -> CodeOptimizationContext :
334+ """Extract code optimization context for non-Python languages.
335+
336+ Uses the language support abstraction to extract code context and converts
337+ it to the CodeOptimizationContext format expected by the pipeline.
338+
339+ This function supports multi-file context extraction, grouping helpers by file
340+ and creating proper CodeStringsMarkdown with file paths for multi-file replacement.
341+
342+ Applies progressive fallback when token limits are exceeded:
343+ 1. Full context (all helpers, Javadoc intact)
344+ 2. Remove cross-file helpers
345+ 3. Strip Javadoc comments
346+ 4. Remove all helpers (target code only)
347+
348+ Args:
349+ function_to_optimize: The function to extract context for.
350+ project_root_path: Root of the project.
351+ optim_token_limit: Token limit for optimization context.
352+ testgen_token_limit: Token limit for testgen context.
353+
354+ Returns:
355+ CodeOptimizationContext with target code and dependencies.
356+
357+ """
358+ from codeflash .languages import get_language_support
359+
360+ # Get language support for this function
361+ language = Language (function_to_optimize .language )
362+ lang_support = get_language_support (language )
363+
364+ # Extract code context using language support
365+ code_context = lang_support .extract_code_context (function_to_optimize , project_root_path , project_root_path )
366+
367+ # Progressive fallback strategies, ordered from most to least context
368+ fallback_strategies = [
369+ {"include_cross_file_helpers" : True , "strip_javadoc" : False , "include_same_file_helpers" : True },
370+ {"include_cross_file_helpers" : False , "strip_javadoc" : False , "include_same_file_helpers" : True },
371+ {"include_cross_file_helpers" : False , "strip_javadoc" : True , "include_same_file_helpers" : True },
372+ {"include_cross_file_helpers" : False , "strip_javadoc" : True , "include_same_file_helpers" : False },
373+ ]
374+
375+ fallback_descriptions = [
376+ "full context" ,
377+ "without cross-file helpers" ,
378+ "without cross-file helpers and Javadoc" ,
379+ "target code only (no helpers, no Javadoc)" ,
380+ ]
381+
382+ code_strings = None
383+ helper_function_sources = None
384+ read_only_context = None
385+
386+ for i , strategy in enumerate (fallback_strategies ):
387+ code_strings , helper_function_sources , read_only_context = _build_code_strings_for_language (
388+ code_context , function_to_optimize , project_root_path , ** strategy
311389 )
312390
391+ read_writable_code = CodeStringsMarkdown (
392+ code_strings = code_strings , language = function_to_optimize .language
393+ )
394+ read_writable_tokens = encoded_tokens_len (read_writable_code .markdown )
395+
396+ if read_writable_tokens <= optim_token_limit :
397+ if i > 0 :
398+ logger .debug (
399+ "Code context exceeded token limit, using fallback: %s (%d tokens)" ,
400+ fallback_descriptions [i ],
401+ read_writable_tokens ,
402+ )
403+ break
404+ else :
405+ raise ValueError ("Read-writable code has exceeded token limit even after removing all helpers and Javadoc" )
406+
313407 read_writable_code = CodeStringsMarkdown (
314- code_strings = read_writable_code_strings , language = function_to_optimize .language
408+ code_strings = code_strings , language = function_to_optimize .language
315409 )
316410
317- # Build testgen context (same as read_writable for non-Python)
411+ # Build testgen context with its own progressive fallback
412+ # Start from the same strategy level that worked for optim
413+ testgen_code_strings = code_strings
414+ testgen_helpers = helper_function_sources
415+
318416 testgen_context = CodeStringsMarkdown (
319- code_strings = read_writable_code_strings .copy (), language = function_to_optimize .language
417+ code_strings = testgen_code_strings .copy (), language = function_to_optimize .language
320418 )
321-
322- # Check token limits
323- read_writable_tokens = encoded_tokens_len (read_writable_code .markdown )
324- if read_writable_tokens > optim_token_limit :
325- raise ValueError ("Read-writable code has exceeded token limit, cannot proceed" )
326-
327419 testgen_tokens = encoded_tokens_len (testgen_context .markdown )
420+
328421 if testgen_tokens > testgen_token_limit :
329- raise ValueError ("Testgen code context has exceeded token limit, cannot proceed" )
422+ # Try remaining fallback strategies for testgen
423+ for j in range (i + 1 , len (fallback_strategies )):
424+ testgen_code_strings , testgen_helpers , read_only_context = _build_code_strings_for_language (
425+ code_context , function_to_optimize , project_root_path , ** fallback_strategies [j ]
426+ )
427+ testgen_context = CodeStringsMarkdown (
428+ code_strings = testgen_code_strings .copy (), language = function_to_optimize .language
429+ )
430+ testgen_tokens = encoded_tokens_len (testgen_context .markdown )
431+
432+ if testgen_tokens <= testgen_token_limit :
433+ logger .debug (
434+ "Testgen context exceeded token limit, using fallback: %s (%d tokens)" ,
435+ fallback_descriptions [j ],
436+ testgen_tokens ,
437+ )
438+ break
439+ else :
440+ raise ValueError ("Testgen code context has exceeded token limit even after removing all helpers and Javadoc" )
330441
331442 # Generate code hash from all read-writable code
332443 code_hash = hashlib .sha256 (read_writable_code .flat .encode ("utf-8" )).hexdigest ()
@@ -336,7 +447,7 @@ def get_code_optimization_context_for_language(
336447 read_writable_code = read_writable_code ,
337448 # Pass type definitions and globals as read-only context for the AI
338449 # This way the AI sees them as context but doesn't include them in optimized output
339- read_only_context_code = code_context . read_only_context ,
450+ read_only_context_code = read_only_context ,
340451 hashing_code_context = read_writable_code .flat ,
341452 hashing_code_context_hash = code_hash ,
342453 helper_functions = helper_function_sources ,
0 commit comments