-
Notifications
You must be signed in to change notification settings - Fork 26
Expand file tree
/
Copy pathsupport.py
More file actions
2517 lines (2028 loc) · 100 KB
/
support.py
File metadata and controls
2517 lines (2028 loc) · 100 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""JavaScript language support implementation.
This module implements the LanguageSupport protocol for JavaScript,
using tree-sitter for code analysis and Jest for test execution.
"""
from __future__ import annotations
import logging
import subprocess
import xml.etree.ElementTree as ET
from pathlib import Path
from typing import TYPE_CHECKING, Any
from codeflash.discovery.functions_to_optimize import FunctionToOptimize
from codeflash.languages.base import CodeContext, FunctionFilterCriteria, HelperFunction, Language, TestInfo, TestResult
from codeflash.languages.javascript.treesitter import TreeSitterAnalyzer, TreeSitterLanguage, get_analyzer_for_file
from codeflash.languages.registry import register_language
from codeflash.models.models import FunctionParent
if TYPE_CHECKING:
from collections.abc import Sequence
from codeflash.languages.base import ReferenceInfo
from codeflash.languages.javascript.treesitter import TypeDefinition
from codeflash.models.models import GeneratedTestsList, InvocationId
logger = logging.getLogger(__name__)
@register_language
class JavaScriptSupport:
"""JavaScript language support implementation.
This class implements the LanguageSupport protocol for JavaScript/JSX files,
using tree-sitter for code analysis and Jest for test execution.
"""
def __init__(self) -> None:
self._language_version: str | None = None
# === Properties ===
@property
def language(self) -> Language:
"""The language this implementation supports."""
return Language.JAVASCRIPT
@property
def file_extensions(self) -> tuple[str, ...]:
"""File extensions supported by JavaScript."""
return (".js", ".jsx", ".mjs", ".cjs")
@property
def default_file_extension(self) -> str:
"""Default file extension for JavaScript."""
return ".js"
@property
def test_framework(self) -> str:
"""Primary test framework for JavaScript."""
from codeflash.languages.test_framework import get_js_test_framework_or_default
return get_js_test_framework_or_default()
@property
def comment_prefix(self) -> str:
return "//"
@property
def dir_excludes(self) -> frozenset[str]:
return frozenset({"node_modules", "dist", "build", ".next", ".nuxt", "coverage", ".cache", ".turbo", ".vercel"})
@property
def language_version(self) -> str | None:
return self._language_version
# === Discovery ===
def discover_functions(
self, file_path: Path, filter_criteria: FunctionFilterCriteria | None = None
) -> list[FunctionToOptimize]:
"""Find all optimizable functions in a JavaScript file.
Uses tree-sitter to parse the file and find functions.
Args:
file_path: Path to the JavaScript file to analyze.
filter_criteria: Optional criteria to filter functions.
Returns:
List of FunctionToOptimize objects for discovered functions.
"""
criteria = filter_criteria or FunctionFilterCriteria()
try:
source = file_path.read_text(encoding="utf-8")
except Exception as e:
logger.warning("Failed to read %s: %s", file_path, e)
return []
try:
analyzer = get_analyzer_for_file(file_path)
tree_functions = analyzer.find_functions(
source, include_methods=criteria.include_methods, include_arrow_functions=True, require_name=True
)
functions: list[FunctionToOptimize] = []
for func in tree_functions:
# Check for return statement if required
if criteria.require_return and not analyzer.has_return_statement(func, source):
continue
# Check async filter
if not criteria.include_async and func.is_async:
continue
# Skip non-exported functions (can't be imported in tests)
# Exception: nested functions and methods are allowed if their parent is exported
if not func.is_exported and not func.parent_function:
logger.debug(f"Skipping non-exported function: {func.name}") # noqa: G004
continue
# Build parents list
parents: list[FunctionParent] = []
if func.class_name:
parents.append(FunctionParent(name=func.class_name, type="ClassDef"))
if func.parent_function:
parents.append(FunctionParent(name=func.parent_function, type="FunctionDef"))
functions.append(
FunctionToOptimize(
function_name=func.name,
file_path=file_path,
parents=parents,
starting_line=func.start_line,
ending_line=func.end_line,
starting_col=func.start_col,
ending_col=func.end_col,
is_async=func.is_async,
is_method=func.is_method,
language=str(self.language),
doc_start_line=func.doc_start_line,
)
)
return functions
except Exception as e:
logger.warning("Failed to parse %s: %s", file_path, e)
return []
def discover_functions_from_source(self, source: str, file_path: Path | None = None) -> list[FunctionToOptimize]:
"""Find all functions in source code string.
Uses tree-sitter to parse the source and find functions.
Args:
source: The source code to analyze.
file_path: Optional file path for context (used for language detection).
Returns:
List of FunctionToOptimize objects for discovered functions.
"""
try:
# Use JavaScript analyzer by default, or detect from file path
if file_path:
analyzer = get_analyzer_for_file(file_path)
else:
analyzer = TreeSitterAnalyzer(TreeSitterLanguage.JAVASCRIPT)
tree_functions = analyzer.find_functions(
source, include_methods=True, include_arrow_functions=True, require_name=True
)
functions: list[FunctionToOptimize] = []
for func in tree_functions:
# Build parents list
parents: list[FunctionParent] = []
if func.class_name:
parents.append(FunctionParent(name=func.class_name, type="ClassDef"))
if func.parent_function:
parents.append(FunctionParent(name=func.parent_function, type="FunctionDef"))
functions.append(
FunctionToOptimize(
function_name=func.name,
file_path=file_path or Path("unknown"),
parents=parents,
starting_line=func.start_line,
ending_line=func.end_line,
starting_col=func.start_col,
ending_col=func.end_col,
is_async=func.is_async,
is_method=func.is_method,
language=str(self.language),
doc_start_line=func.doc_start_line,
)
)
return functions
except Exception as e:
logger.warning("Failed to parse source: %s", e)
return []
def _get_test_patterns(self) -> list[str]:
"""Get test file patterns for this language.
Override in subclasses to provide language-specific patterns.
Returns:
List of glob patterns for test files.
"""
return ["*.test.js", "*.test.jsx", "*.spec.js", "*.spec.jsx", "__tests__/**/*.js", "__tests__/**/*.jsx"]
def discover_tests(
self, test_root: Path, source_functions: Sequence[FunctionToOptimize]
) -> dict[str, list[TestInfo]]:
"""Map source functions to their tests via static analysis.
For JavaScript, this uses static analysis to find test files
and match them to source functions based on imports and function calls.
Args:
test_root: Root directory containing tests.
source_functions: Functions to find tests for.
Returns:
Dict mapping qualified function names to lists of TestInfo.
"""
result: dict[str, list[TestInfo]] = {}
# Find all test files using language-specific patterns
test_patterns = self._get_test_patterns()
test_files: list[Path] = []
for pattern in test_patterns:
test_files.extend(test_root.rglob(pattern))
for test_file in test_files:
try:
source = test_file.read_text()
analyzer = get_analyzer_for_file(test_file)
imports = analyzer.find_imports(source)
# Build a set of imported function names
imported_names: set[str] = set()
for imp in imports:
if imp.default_import:
imported_names.add(imp.default_import)
for name, alias in imp.named_imports:
imported_names.add(alias or name)
# Find test functions (describe/it/test blocks)
test_functions = self._find_jest_tests(source, analyzer)
# Match source functions to tests
for func in source_functions:
if func.function_name in imported_names or func.function_name in source:
if func.qualified_name not in result:
result[func.qualified_name] = []
for test_name in test_functions:
result[func.qualified_name].append(
TestInfo(test_name=test_name, test_file=test_file, test_class=None)
)
except Exception as e:
logger.debug("Failed to analyze test file %s: %s", test_file, e)
return result
def _find_jest_tests(self, source: str, analyzer: TreeSitterAnalyzer) -> list[str]:
"""Find Jest test function names in source code."""
test_names: list[str] = []
source_bytes = source.encode("utf8")
tree = analyzer.parse(source_bytes)
self._walk_for_jest_tests(tree.root_node, source_bytes, test_names)
return test_names
def _walk_for_jest_tests(self, node: Any, source_bytes: bytes, test_names: list[str]) -> None:
"""Walk tree to find Jest test/it/describe calls."""
if node.type == "call_expression":
func_node = node.child_by_field_name("function")
if func_node:
func_name = source_bytes[func_node.start_byte : func_node.end_byte].decode("utf8")
if func_name in ("test", "it", "describe"):
# Get the first string argument as the test name
args_node = node.child_by_field_name("arguments")
if args_node:
for child in args_node.children:
if child.type == "string":
test_name = source_bytes[child.start_byte : child.end_byte].decode("utf8")
test_names.append(test_name.strip("'\""))
break
for child in node.children:
self._walk_for_jest_tests(child, source_bytes, test_names)
# === Code Analysis ===
def extract_code_context(self, function: FunctionToOptimize, project_root: Path, module_root: Path) -> CodeContext:
"""Extract function code and its dependencies.
Uses tree-sitter to analyze imports and find helper functions.
Args:
function: The function to extract context for.
project_root: Root of the project.
module_root: Root of the module containing the function.
Returns:
CodeContext with target code and dependencies.
"""
try:
source = function.file_path.read_text()
except Exception as e:
logger.exception("Failed to read %s: %s", function.file_path, e)
return CodeContext(target_code="", target_file=function.file_path, language=Language.JAVASCRIPT)
# Find imports and helper functions
analyzer = get_analyzer_for_file(function.file_path)
# Find the FunctionNode to get doc_start_line for JSDoc inclusion
tree_functions = analyzer.find_functions(source, include_methods=True, include_arrow_functions=True)
target_func = None
for func in tree_functions:
if func.name == function.function_name and func.start_line == function.starting_line:
target_func = func
break
# Extract the function source, including JSDoc if present
lines = source.splitlines(keepends=True)
if function.starting_line and function.ending_line:
# Use doc_start_line if available, otherwise fall back to start_line
effective_start = (target_func.doc_start_line if target_func else None) or function.starting_line
target_lines = lines[effective_start - 1 : function.ending_line]
target_code = "".join(target_lines)
else:
target_code = ""
imports = analyzer.find_imports(source)
# Find helper functions called by target (needed before class wrapping to find same-class helpers)
helpers = self._find_helper_functions(function, source, analyzer, imports, module_root)
# For class methods, wrap the method in its class definition
# This is necessary because method definition syntax is only valid inside a class body
same_class_helper_names: set[str] = set()
if function.is_method and function.parents:
class_name = None
for parent in function.parents:
if parent.type == "ClassDef":
class_name = parent.name
break
if class_name:
# Find same-class helper methods that need to be included inside the class wrapper
same_class_helpers = self._find_same_class_helpers(
class_name, function.function_name, helpers, tree_functions, lines
)
same_class_helper_names = {h[0] for h in same_class_helpers} # method names
# Find the class definition in the source to get proper indentation, JSDoc, constructor, and fields
class_info = self._find_class_definition(source, class_name, analyzer, function.function_name)
if class_info:
class_jsdoc, class_indent, constructor_code, fields_code = class_info
# Build the class body with fields, constructor, target method, and same-class helpers
class_body_parts = []
if fields_code:
class_body_parts.append(fields_code)
if constructor_code:
class_body_parts.append(constructor_code)
class_body_parts.append(target_code)
# Add same-class helper methods inside the class body
for _helper_name, helper_source in same_class_helpers:
class_body_parts.append(helper_source)
class_body = "\n".join(class_body_parts)
# Wrap the method in a class definition with context
if class_jsdoc:
target_code = (
f"{class_jsdoc}\n{class_indent}class {class_name} {{\n{class_body}{class_indent}}}\n"
)
else:
target_code = f"{class_indent}class {class_name} {{\n{class_body}{class_indent}}}\n"
else:
# Fallback: wrap with no indentation, including same-class helpers
helper_code = "\n".join(h[1] for h in same_class_helpers)
if helper_code:
target_code = f"class {class_name} {{\n{target_code}\n{helper_code}}}\n"
else:
target_code = f"class {class_name} {{\n{target_code}}}\n"
# Filter out same-class helpers from the helpers list (they're already inside the class wrapper)
if same_class_helper_names:
helpers = [h for h in helpers if h.name not in same_class_helper_names]
# Extract import statements as strings
import_lines = []
for imp in imports:
imp_lines = lines[imp.start_line - 1 : imp.end_line]
import_lines.append("".join(imp_lines).strip())
# Extract type definitions for function parameters and class fields
type_definitions_context, type_definition_names = self._extract_type_definitions_context(
function=function, source=source, analyzer=analyzer, imports=imports, module_root=module_root
)
# Find module-level declarations (global variables/constants) referenced by the function
# Exclude type definitions that are already included above to avoid duplication
read_only_context = self._find_referenced_globals(
target_code=target_code,
helpers=helpers,
source=source,
analyzer=analyzer,
imports=imports,
exclude_names=type_definition_names,
)
# Combine type definitions with other read-only context
if type_definitions_context:
if read_only_context:
read_only_context = type_definitions_context + "\n\n" + read_only_context
else:
read_only_context = type_definitions_context
# Validate that the extracted code is syntactically valid
# If not, raise an error to fail the optimization early
if target_code and not self.validate_syntax(target_code):
error_msg = (
f"Extracted code for {function.function_name} is not syntactically valid JavaScript. "
f"Cannot proceed with optimization."
)
logger.error(error_msg)
raise ValueError(error_msg)
return CodeContext(
target_code=target_code,
target_file=function.file_path,
helper_functions=helpers,
read_only_context=read_only_context,
imports=import_lines,
language=Language.JAVASCRIPT,
)
def _find_class_definition(
self, source: str, class_name: str, analyzer: TreeSitterAnalyzer, target_method_name: str | None = None
) -> tuple[str, str, str, str] | None:
"""Find a class definition and extract its JSDoc, indentation, constructor, and fields.
Args:
source: The source code to search.
class_name: The name of the class to find.
analyzer: TreeSitterAnalyzer for parsing.
target_method_name: Name of the target method (to exclude from extracted context).
Returns:
Tuple of (jsdoc_comment, indentation, constructor_code, fields_code) or None if not found.
Constructor and fields are included to provide context for method optimization.
"""
source_bytes = source.encode("utf8")
tree = analyzer.parse(source_bytes)
def find_class_node(node):
"""Recursively find a class declaration with the given name."""
if node.type in ("class_declaration", "class"):
name_node = node.child_by_field_name("name")
if name_node:
node_name = source_bytes[name_node.start_byte : name_node.end_byte].decode("utf8")
if node_name == class_name:
return node
for child in node.children:
result = find_class_node(child)
if result:
return result
return None
class_node = find_class_node(tree.root_node)
if not class_node:
return None
# Get indentation from the class line
lines = source.splitlines(keepends=True)
class_line_idx = class_node.start_point[0]
if class_line_idx < len(lines):
class_line = lines[class_line_idx]
indent = len(class_line) - len(class_line.lstrip())
indentation = " " * indent
else:
indentation = ""
# Look for preceding JSDoc comment
jsdoc = ""
prev_sibling = class_node.prev_named_sibling
if prev_sibling and prev_sibling.type == "comment":
comment_text = source_bytes[prev_sibling.start_byte : prev_sibling.end_byte].decode("utf8")
if comment_text.strip().startswith("/**"):
jsdoc = comment_text
# Find class body and extract constructor and fields
constructor_code = ""
fields_code = ""
body_node = class_node.child_by_field_name("body")
if body_node:
constructor_code, fields_code = self._extract_class_context(
body_node, source_bytes, lines, target_method_name
)
return (jsdoc, indentation, constructor_code, fields_code)
def _extract_class_context(
self, body_node: Any, source_bytes: bytes, lines: list[str], target_method_name: str | None
) -> tuple[str, str]:
"""Extract constructor and field declarations from a class body.
Args:
body_node: Tree-sitter node for the class body.
source_bytes: Source code as bytes.
lines: Source code split into lines.
target_method_name: Name of the target method to exclude.
Returns:
Tuple of (constructor_code, fields_code).
"""
constructor_parts: list[str] = []
field_parts: list[str] = []
for child in body_node.children:
# Skip braces and the target method
if child.type in ("{", "}"):
continue
# Handle method definitions (including constructor)
if child.type == "method_definition":
name_node = child.child_by_field_name("name")
if name_node:
method_name = source_bytes[name_node.start_byte : name_node.end_byte].decode("utf8")
# Extract constructor (but not the target method)
if method_name == "constructor":
# Get start line, check for preceding JSDoc
start_line = child.start_point[0]
end_line = child.end_point[0]
# Look for JSDoc comment before constructor
jsdoc_start = start_line
prev_sibling = child.prev_named_sibling
if prev_sibling and prev_sibling.type == "comment":
comment_text = source_bytes[prev_sibling.start_byte : prev_sibling.end_byte].decode("utf8")
if comment_text.strip().startswith("/**"):
jsdoc_start = prev_sibling.start_point[0]
constructor_lines = lines[jsdoc_start : end_line + 1]
constructor_parts.append("".join(constructor_lines))
# Handle public field definitions (class properties)
# In JS/TS: public_field_definition, field_definition
elif child.type in ("public_field_definition", "field_definition"):
start_line = child.start_point[0]
end_line = child.end_point[0]
# Look for preceding comment
comment_start = start_line
prev_sibling = child.prev_named_sibling
if prev_sibling and prev_sibling.type == "comment":
comment_start = prev_sibling.start_point[0]
field_lines = lines[comment_start : end_line + 1]
field_parts.append("".join(field_lines))
constructor_code = "".join(constructor_parts)
fields_code = "".join(field_parts)
return (constructor_code, fields_code)
def _find_same_class_helpers(
self,
class_name: str,
target_method_name: str,
helpers: list[HelperFunction],
tree_functions: list,
lines: list[str],
) -> list[tuple[str, str]]:
"""Find helper methods that belong to the same class as the target method.
These helpers need to be included inside the class wrapper rather than
appended outside, because they may use class-specific syntax like 'private'.
Args:
class_name: Name of the class containing the target method.
target_method_name: Name of the target method (to exclude).
helpers: List of all helper functions found.
tree_functions: List of FunctionNode from tree-sitter analysis.
lines: Source code split into lines.
Returns:
List of (method_name, source_code) tuples for same-class helpers.
"""
same_class_helpers: list[tuple[str, str]] = []
# Build a set of helper names for quick lookup
helper_names = {h.name for h in helpers}
# Names to exclude from same-class helpers (target method and constructor)
exclude_names = {target_method_name, "constructor"}
# Find methods in tree_functions that belong to the same class and are helpers
for func in tree_functions:
if func.class_name == class_name and func.name in helper_names and func.name not in exclude_names:
# Extract source including JSDoc if present
effective_start = func.doc_start_line or func.start_line
helper_lines = lines[effective_start - 1 : func.end_line]
helper_source = "".join(helper_lines)
same_class_helpers.append((func.name, helper_source))
return same_class_helpers
def _find_helper_functions(
self,
function: FunctionToOptimize,
source: str,
analyzer: TreeSitterAnalyzer,
imports: list[Any],
module_root: Path,
) -> list[HelperFunction]:
"""Find helper functions called by the target function.
This method finds helpers in both the same file and imported files.
Args:
function: The target function to find helpers for.
source: Source code of the file containing the function.
analyzer: TreeSitterAnalyzer for parsing.
imports: List of ImportInfo objects from the source file.
module_root: Root directory of the module/project.
Returns:
List of HelperFunction objects from same file and imported files.
"""
helpers: list[HelperFunction] = []
# Get all functions in the same file
all_functions = analyzer.find_functions(source, include_methods=True)
# Find the target function's tree-sitter node
target_func = None
for func in all_functions:
if func.name == function.function_name and func.start_line == function.starting_line:
target_func = func
break
if not target_func:
return helpers
# Find function calls within target
calls = analyzer.find_function_calls(source, target_func)
calls_set = set(calls)
# Split source into lines for JSDoc extraction
lines = source.splitlines(keepends=True)
# Match calls to functions in the same file
for func in all_functions:
if func.name in calls_set and func.name != function.function_name:
# Extract source including JSDoc if present
effective_start = func.doc_start_line or func.start_line
helper_lines = lines[effective_start - 1 : func.end_line]
helper_source = "".join(helper_lines)
helpers.append(
HelperFunction(
name=func.name,
qualified_name=func.name,
file_path=function.file_path,
source_code=helper_source,
start_line=effective_start, # Start from JSDoc if present
end_line=func.end_line,
)
)
# Find helpers in imported files
try:
from codeflash.languages.javascript.import_resolver import ImportResolver, MultiFileHelperFinder
import_resolver = ImportResolver(module_root)
helper_finder = MultiFileHelperFinder(module_root, import_resolver)
cross_file_helpers = helper_finder.find_helpers(
function=function,
source=source,
analyzer=analyzer,
imports=imports,
max_depth=2, # Target → helpers → helpers of helpers
)
# Add cross-file helpers to the list
for file_path, file_helpers in cross_file_helpers.items():
if file_path != function.file_path:
helpers.extend(file_helpers)
except Exception as e:
logger.debug("Failed to find cross-file helpers: %s", e)
return helpers
def _find_referenced_globals(
self,
target_code: str,
helpers: list[HelperFunction],
source: str,
analyzer: TreeSitterAnalyzer,
imports: list[Any],
exclude_names: set[str] | None = None,
) -> str:
"""Find module-level declarations referenced by the target function and its helpers.
Args:
target_code: The target function's source code.
helpers: List of helper functions.
source: Full source code of the file.
analyzer: TreeSitterAnalyzer for parsing.
imports: List of ImportInfo objects.
exclude_names: Names to exclude from the result (e.g., type definitions).
Returns:
String containing all referenced global declarations.
"""
if exclude_names is None:
exclude_names = set()
# Find all module-level declarations in the source file
module_declarations = analyzer.find_module_level_declarations(source)
if not module_declarations:
return ""
# Build a set of names that are imported (so we don't include them as globals)
imported_names: set[str] = set()
for imp in imports:
if imp.default_import:
imported_names.add(imp.default_import)
if imp.namespace_import:
imported_names.add(imp.namespace_import)
for name, alias in imp.named_imports:
imported_names.add(alias if alias else name)
# Build a map of declaration name -> declaration info
decl_map: dict[str, Any] = {}
for decl in module_declarations:
# Skip function declarations (they are handled as helpers)
# Also skip if it's an import or an excluded name (type definitions)
if decl.name not in imported_names and decl.name not in exclude_names:
decl_map[decl.name] = decl
if not decl_map:
return ""
# Find all identifiers referenced in the target code
referenced_in_target = analyzer.find_referenced_identifiers(target_code)
# Also find identifiers referenced in helper functions
referenced_in_helpers: set[str] = set()
for helper in helpers:
helper_refs = analyzer.find_referenced_identifiers(helper.source_code)
referenced_in_helpers.update(helper_refs)
# Combine all referenced identifiers
all_references = referenced_in_target | referenced_in_helpers
# Filter to only module-level declarations that are referenced
referenced_globals: list[Any] = []
seen_decl_sources: set[str] = set() # Avoid duplicates for destructuring
for ref_name in all_references:
if ref_name in decl_map:
decl = decl_map[ref_name]
# Avoid duplicate declarations (same source code)
if decl.source_code not in seen_decl_sources:
referenced_globals.append(decl)
seen_decl_sources.add(decl.source_code)
if not referenced_globals:
return ""
# Sort by line number to maintain original order
referenced_globals.sort(key=lambda d: d.start_line)
# Build the context string
global_lines = [decl.source_code for decl in referenced_globals]
return "\n".join(global_lines)
def _extract_type_definitions_context(
self,
function: FunctionToOptimize,
source: str,
analyzer: TreeSitterAnalyzer,
imports: list[Any],
module_root: Path,
) -> tuple[str, set[str]]:
"""Extract type definitions used by the function for read-only context.
Finds user-defined types referenced in:
1. Function parameters
2. Function return type
3. Class fields (if the function is a class method)
4. Types referenced within other type definitions (recursive)
Then looks up these type definitions in:
1. The same file
2. Imported files
Args:
function: The target function to analyze.
source: Source code of the file.
analyzer: TreeSitterAnalyzer for parsing.
imports: List of ImportInfo objects.
module_root: Root directory of the module.
Returns:
Tuple of (type definitions string, set of found type names).
"""
# Extract type names from function parameters and return type
type_names = analyzer.extract_type_annotations(source, function.function_name, function.starting_line or 1)
# If this is a class method, also extract types from class fields
if function.is_method and function.parents:
for parent in function.parents:
if parent.type == "ClassDef":
field_types = analyzer.extract_class_field_types(source, parent.name)
type_names.update(field_types)
if not type_names:
return "", set()
# Find type definitions in the same file
same_file_definitions = analyzer.find_type_definitions(source)
found_definitions: list[TypeDefinition] = []
# Build a map of type name -> definition for same-file types
same_file_type_map = {defn.name: defn for defn in same_file_definitions}
# Track which types we've found (avoid duplicates)
found_type_names: set[str] = set()
# Recursively find types - including types referenced within type definitions
types_to_find = set(type_names)
processed_types: set[str] = set()
max_iterations = 10 # Prevent infinite loops
for _ in range(max_iterations):
if not types_to_find:
break
new_types_to_find: set[str] = set()
types_not_in_same_file: set[str] = set()
for type_name in types_to_find:
if type_name in processed_types:
continue
processed_types.add(type_name)
# Look in same file first
if type_name in same_file_type_map and type_name not in found_type_names:
defn = same_file_type_map[type_name]
found_definitions.append(defn)
found_type_names.add(type_name)
# Extract types referenced in this type definition
referenced_types = self._extract_types_from_definition(defn.source_code, analyzer)
new_types_to_find.update(referenced_types - found_type_names - processed_types)
elif type_name not in same_file_type_map and type_name not in found_type_names:
# Type not found in same file, needs to be looked up in imports
types_not_in_same_file.add(type_name)
# For types not found in same file, look in imported files
if types_not_in_same_file:
imported_definitions = self._find_imported_type_definitions(
types_not_in_same_file, imports, module_root, function.file_path
)
for defn in imported_definitions:
if defn.name not in found_type_names:
found_definitions.append(defn)
found_type_names.add(defn.name)
types_to_find = new_types_to_find
if not found_definitions:
return "", found_type_names
# Sort by file path and line number for consistent ordering
found_definitions.sort(key=lambda d: (str(d.file_path or ""), d.start_line))
# Build the type definitions context string
# Group by file for better organization
type_def_parts: list[str] = []
current_file: Path | None = None
for defn in found_definitions:
if defn.file_path and defn.file_path != current_file:
current_file = defn.file_path
# Add a comment indicating the source file
type_def_parts.append(f"// From {current_file.name}")
type_def_parts.append(defn.source_code)
return "\n\n".join(type_def_parts), found_type_names
def _extract_types_from_definition(self, type_source: str, analyzer: TreeSitterAnalyzer) -> set[str]:
"""Extract type names referenced in a type definition's source code.
Args:
type_source: Source code of the type definition.
analyzer: TreeSitterAnalyzer for parsing.
Returns:
Set of type names found in the definition.
"""
# Parse the type definition and find type identifiers
source_bytes = type_source.encode("utf8")
tree = analyzer.parse(source_bytes)
type_names: set[str] = set()
def walk_for_types(node):
# Look for type_identifier nodes (user-defined types)
if node.type == "type_identifier":
type_name = source_bytes[node.start_byte : node.end_byte].decode("utf8")
# Skip primitive types
if type_name not in (
"number",
"string",
"boolean",
"void",
"null",
"undefined",
"any",
"never",
"unknown",
"object",
"symbol",
"bigint",
):
type_names.add(type_name)
for child in node.children:
walk_for_types(child)
walk_for_types(tree.root_node)
return type_names
def _find_imported_type_definitions(
self, type_names: set[str], imports: list[Any], module_root: Path, source_file_path: Path
) -> list[TypeDefinition]:
"""Find type definitions in imported files.
Args:
type_names: Set of type names to look for.
imports: List of ImportInfo objects from the source file.
module_root: Root directory of the module.
source_file_path: Path to the source file (for resolving relative imports).
Returns:
List of TypeDefinition objects found in imported files.
"""
found_definitions: list[TypeDefinition] = []
# Build a map of type names to their import info and original names
type_import_map: dict[str, tuple[Any, str]] = {} # local_name -> (ImportInfo, original_name)
for imp in imports:
# Check if any of our type names are imported from this module
for name, alias in imp.named_imports:
# The type could be imported with an alias
local_name = alias if alias else name
if local_name in type_names:
type_import_map[local_name] = (imp, name) # (ImportInfo, original_name)
if not type_import_map:
return found_definitions
# Resolve imports and find type definitions
from codeflash.languages.javascript.import_resolver import ImportResolver
try:
import_resolver = ImportResolver(module_root)
except Exception:
logger.debug("Failed to create ImportResolver for type definition lookup")
return found_definitions