-
Notifications
You must be signed in to change notification settings - Fork 26
Expand file tree
/
Copy pathcode_extractor.py
More file actions
750 lines (617 loc) · 31.7 KB
/
code_extractor.py
File metadata and controls
750 lines (617 loc) · 31.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
# ruff: noqa: ARG002
from __future__ import annotations
import ast
from itertools import chain
from typing import TYPE_CHECKING, Optional
import libcst as cst
from libcst.codemod import CodemodContext
from libcst.codemod.visitors import AddImportsVisitor, GatherImportsVisitor, RemoveImportsVisitor
from libcst.helpers import calculate_module_and_package
from codeflash.cli_cmds.console import logger
from codeflash.models.models import FunctionParent
if TYPE_CHECKING:
from pathlib import Path
from libcst.helpers import ModuleNameAndPackage
from codeflash.discovery.functions_to_optimize import FunctionToOptimize
from codeflash.models.models import FunctionSource
class GlobalAssignmentCollector(cst.CSTVisitor):
"""Collects all global assignment statements."""
def __init__(self) -> None:
super().__init__()
self.assignments: dict[str, cst.Assign] = {}
self.assignment_order: list[str] = []
# Track scope depth to identify global assignments
self.scope_depth = 0
self.if_else_depth = 0
def visit_FunctionDef(self, node: cst.FunctionDef) -> Optional[bool]:
self.scope_depth += 1
return True
def leave_FunctionDef(self, original_node: cst.FunctionDef) -> None:
self.scope_depth -= 1
def visit_ClassDef(self, node: cst.ClassDef) -> Optional[bool]:
self.scope_depth += 1
return True
def leave_ClassDef(self, original_node: cst.ClassDef) -> None:
self.scope_depth -= 1
def visit_If(self, node: cst.If) -> Optional[bool]:
self.if_else_depth += 1
return True
def leave_If(self, original_node: cst.If) -> None:
self.if_else_depth -= 1
def visit_Else(self, node: cst.Else) -> Optional[bool]:
# Else blocks are already counted as part of the if statement
return True
def visit_Assign(self, node: cst.Assign) -> Optional[bool]:
# Only process global assignments (not inside functions, classes, etc.)
if self.scope_depth == 0 and self.if_else_depth == 0: # We're at module level
for target in node.targets:
if isinstance(target.target, cst.Name):
name = target.target.value
self.assignments[name] = node
if name not in self.assignment_order:
self.assignment_order.append(name)
return True
class GlobalAssignmentTransformer(cst.CSTTransformer):
"""Transforms global assignments in the original file with those from the new file."""
def __init__(self, new_assignments: dict[str, cst.Assign], new_assignment_order: list[str]) -> None:
super().__init__()
self.new_assignments = new_assignments
self.new_assignment_order = new_assignment_order
self.processed_assignments: set[str] = set()
self.scope_depth = 0
self.if_else_depth = 0
def visit_FunctionDef(self, node: cst.FunctionDef) -> None:
self.scope_depth += 1
def leave_FunctionDef(self, original_node: cst.FunctionDef, updated_node: cst.FunctionDef) -> cst.FunctionDef:
self.scope_depth -= 1
return updated_node
def visit_ClassDef(self, node: cst.ClassDef) -> None:
self.scope_depth += 1
def leave_ClassDef(self, original_node: cst.ClassDef, updated_node: cst.ClassDef) -> cst.ClassDef:
self.scope_depth -= 1
return updated_node
def visit_If(self, node: cst.If) -> None:
self.if_else_depth += 1
def leave_If(self, original_node: cst.If, updated_node: cst.If) -> cst.If:
self.if_else_depth -= 1
return updated_node
def visit_Else(self, node: cst.Else) -> None:
# Else blocks are already counted as part of the if statement
pass
def leave_Assign(self, original_node: cst.Assign, updated_node: cst.Assign) -> cst.CSTNode:
if self.scope_depth > 0 or self.if_else_depth > 0:
return updated_node
# Check if this is a global assignment we need to replace
for target in original_node.targets:
if isinstance(target.target, cst.Name):
name = target.target.value
if name in self.new_assignments:
self.processed_assignments.add(name)
return self.new_assignments[name]
return updated_node
def _find_insertion_index(self, updated_node: cst.Module) -> int:
"""Find the position of the last import statement in the top-level of the module."""
insert_index = 0
for i, stmt in enumerate(updated_node.body):
is_top_level_import = isinstance(stmt, cst.SimpleStatementLine) and any(
isinstance(child, (cst.Import, cst.ImportFrom)) for child in stmt.body
)
is_conditional_import = isinstance(stmt, cst.If) and all(
isinstance(inner, cst.SimpleStatementLine)
and all(isinstance(child, (cst.Import, cst.ImportFrom)) for child in inner.body)
for inner in stmt.body.body
)
if is_top_level_import or is_conditional_import:
insert_index = i + 1
# Stop scanning once we reach a class or function definition.
# Imports are supposed to be at the top of the file, but they can technically appear anywhere, even at the bottom of the file.
# Without this check, a stray import later in the file
# would incorrectly shift our insertion index below actual code definitions.
if isinstance(stmt, (cst.ClassDef, cst.FunctionDef)):
break
return insert_index
def leave_Module(self, original_node: cst.Module, updated_node: cst.Module) -> cst.Module:
# Add any new assignments that weren't in the original file
new_statements = list(updated_node.body)
# Find assignments to append
assignments_to_append = [
self.new_assignments[name]
for name in self.new_assignment_order
if name not in self.processed_assignments and name in self.new_assignments
]
if assignments_to_append:
# after last top-level imports
insert_index = self._find_insertion_index(updated_node)
assignment_lines = [
cst.SimpleStatementLine([assignment], leading_lines=[cst.EmptyLine()])
for assignment in assignments_to_append
]
new_statements = list(chain(new_statements[:insert_index], assignment_lines, new_statements[insert_index:]))
# Add a blank line after the last assignment if needed
after_index = insert_index + len(assignment_lines)
if after_index < len(new_statements):
next_stmt = new_statements[after_index]
# If there's no empty line, add one
has_empty = any(isinstance(line, cst.EmptyLine) for line in next_stmt.leading_lines)
if not has_empty:
new_statements[after_index] = next_stmt.with_changes(
leading_lines=[cst.EmptyLine(), *next_stmt.leading_lines]
)
return updated_node.with_changes(body=new_statements)
class GlobalStatementCollector(cst.CSTVisitor):
"""Visitor that collects all global statements (excluding imports and functions/classes)."""
def __init__(self) -> None:
super().__init__()
self.global_statements = []
self.in_function_or_class = False
def visit_ClassDef(self, node: cst.ClassDef) -> bool:
# Don't visit inside classes
self.in_function_or_class = True
return False
def leave_ClassDef(self, original_node: cst.ClassDef) -> None:
self.in_function_or_class = False
def visit_FunctionDef(self, node: cst.FunctionDef) -> bool:
# Don't visit inside functions
self.in_function_or_class = True
return False
def leave_FunctionDef(self, original_node: cst.FunctionDef) -> None:
self.in_function_or_class = False
def visit_SimpleStatementLine(self, node: cst.SimpleStatementLine) -> None:
if not self.in_function_or_class:
for statement in node.body:
# Skip imports
if not isinstance(statement, (cst.Import, cst.ImportFrom, cst.Assign)):
self.global_statements.append(node)
break
class LastImportFinder(cst.CSTVisitor):
"""Finds the position of the last import statement in the module."""
def __init__(self) -> None:
super().__init__()
self.last_import_line = 0
self.current_line = 0
def visit_SimpleStatementLine(self, node: cst.SimpleStatementLine) -> None:
self.current_line += 1
for statement in node.body:
if isinstance(statement, (cst.Import, cst.ImportFrom)):
self.last_import_line = self.current_line
class DottedImportCollector(cst.CSTVisitor):
"""Collects all top-level imports from a Python module in normalized dotted format, including top-level conditional imports like `if TYPE_CHECKING:`.
Examples
--------
import os ==> "os"
import dbt.adapters.factory ==> "dbt.adapters.factory"
from pathlib import Path ==> "pathlib.Path"
from recce.adapter.base import BaseAdapter ==> "recce.adapter.base.BaseAdapter"
from typing import Any, List, Optional ==> "typing.Any", "typing.List", "typing.Optional"
from recce.util.lineage import ( build_column_key, filter_dependency_maps) ==> "recce.util.lineage.build_column_key", "recce.util.lineage.filter_dependency_maps"
"""
def __init__(self) -> None:
self.imports: set[str] = set()
self.depth = 0 # top-level
def get_full_dotted_name(self, expr: cst.BaseExpression) -> str:
if isinstance(expr, cst.Name):
return expr.value
if isinstance(expr, cst.Attribute):
return f"{self.get_full_dotted_name(expr.value)}.{expr.attr.value}"
return ""
def _collect_imports_from_block(self, block: cst.IndentedBlock) -> None:
for statement in block.body:
if isinstance(statement, cst.SimpleStatementLine):
for child in statement.body:
if isinstance(child, cst.Import):
for alias in child.names:
module = self.get_full_dotted_name(alias.name)
asname = alias.asname.name.value if alias.asname else alias.name.value
if isinstance(asname, cst.Attribute):
self.imports.add(module)
else:
self.imports.add(module if module == asname else f"{module}.{asname}")
elif isinstance(child, cst.ImportFrom):
if child.module is None:
continue
module = self.get_full_dotted_name(child.module)
if isinstance(child.names, cst.ImportStar):
continue
for alias in child.names:
if isinstance(alias, cst.ImportAlias):
name = alias.name.value
asname = alias.asname.name.value if alias.asname else name
self.imports.add(f"{module}.{asname}")
def visit_Module(self, node: cst.Module) -> None:
self.depth = 0
self._collect_imports_from_block(node)
def visit_FunctionDef(self, node: cst.FunctionDef) -> None:
self.depth += 1
def leave_FunctionDef(self, node: cst.FunctionDef) -> None:
self.depth -= 1
def visit_ClassDef(self, node: cst.ClassDef) -> None:
self.depth += 1
def leave_ClassDef(self, node: cst.ClassDef) -> None:
self.depth -= 1
def visit_If(self, node: cst.If) -> None:
if self.depth == 0:
self._collect_imports_from_block(node.body)
def visit_Try(self, node: cst.Try) -> None:
if self.depth == 0:
self._collect_imports_from_block(node.body)
class ImportInserter(cst.CSTTransformer):
"""Transformer that inserts global statements after the last import."""
def __init__(self, global_statements: list[cst.SimpleStatementLine], last_import_line: int) -> None:
super().__init__()
self.global_statements = global_statements
self.last_import_line = last_import_line
self.current_line = 0
self.inserted = False
def leave_SimpleStatementLine(
self, original_node: cst.SimpleStatementLine, updated_node: cst.SimpleStatementLine
) -> cst.Module:
self.current_line += 1
# If we're right after the last import and haven't inserted yet
if self.current_line == self.last_import_line and not self.inserted:
self.inserted = True
return cst.Module(body=[updated_node, *self.global_statements])
return cst.Module(body=[updated_node])
def leave_Module(self, original_node: cst.Module, updated_node: cst.Module) -> cst.Module:
# If there were no imports, add at the beginning of the module
if self.last_import_line == 0 and not self.inserted:
updated_body = list(updated_node.body)
for stmt in reversed(self.global_statements):
updated_body.insert(0, stmt)
return updated_node.with_changes(body=updated_body)
return updated_node
def extract_global_statements(source_code: str) -> tuple[cst.Module, list[cst.SimpleStatementLine]]:
"""Extract global statements from source code."""
module = cst.parse_module(source_code)
collector = GlobalStatementCollector()
module.visit(collector)
return module, collector.global_statements
def find_last_import_line(target_code: str) -> int:
"""Find the line number of the last import statement."""
module = cst.parse_module(target_code)
finder = LastImportFinder()
module.visit(finder)
return finder.last_import_line
class FutureAliasedImportTransformer(cst.CSTTransformer):
def leave_ImportFrom(
self, original_node: cst.ImportFrom, updated_node: cst.ImportFrom
) -> cst.BaseSmallStatement | cst.FlattenSentinel[cst.BaseSmallStatement] | cst.RemovalSentinel:
import libcst.matchers as m
if (
(updated_node_module := updated_node.module)
and updated_node_module.value == "__future__"
and all(m.matches(name, m.ImportAlias()) for name in updated_node.names)
):
if names := [name for name in updated_node.names if name.asname is None]:
return updated_node.with_changes(names=names)
return cst.RemoveFromParent()
return updated_node
def delete___future___aliased_imports(module_code: str) -> str:
return cst.parse_module(module_code).visit(FutureAliasedImportTransformer()).code
def add_global_assignments(src_module_code: str, dst_module_code: str) -> str:
src_module, new_added_global_statements = extract_global_statements(src_module_code)
dst_module, existing_global_statements = extract_global_statements(dst_module_code)
unique_global_statements = []
for stmt in new_added_global_statements:
if any(
stmt is existing_stmt or stmt.deep_equals(existing_stmt) for existing_stmt in existing_global_statements
):
continue
unique_global_statements.append(stmt)
mod_dst_code = dst_module_code
# Insert unique global statements if any
if unique_global_statements:
last_import_line = find_last_import_line(dst_module_code)
# Reuse already-parsed dst_module
transformer = ImportInserter(unique_global_statements, last_import_line)
# Use visit inplace, don't parse again
modified_module = dst_module.visit(transformer)
mod_dst_code = modified_module.code
# Parse the code after insertion
original_module = cst.parse_module(mod_dst_code)
else:
# No new statements to insert, reuse already-parsed dst_module
original_module = dst_module
# Parse the src_module_code once only (already done above: src_module)
# Collect assignments from the new file
new_collector = GlobalAssignmentCollector()
src_module.visit(new_collector)
# Only create transformer if there are assignments to insert/transform
if not new_collector.assignments: # nothing to transform
return mod_dst_code
# Transform the original destination module
transformer = GlobalAssignmentTransformer(new_collector.assignments, new_collector.assignment_order)
transformed_module = original_module.visit(transformer)
return transformed_module.code
def resolve_star_import(module_name: str, project_root: Path) -> set[str]:
try:
module_path = module_name.replace(".", "/")
possible_paths = [project_root / f"{module_path}.py", project_root / f"{module_path}/__init__.py"]
module_file = None
for path in possible_paths:
if path.exists():
module_file = path
break
if module_file is None:
logger.warning(f"Could not find module file for {module_name}, skipping star import resolution")
return set()
with module_file.open(encoding="utf8") as f:
module_code = f.read()
tree = ast.parse(module_code)
all_names = None
for node in ast.walk(tree):
if (
isinstance(node, ast.Assign)
and len(node.targets) == 1
and isinstance(node.targets[0], ast.Name)
and node.targets[0].id == "__all__"
):
if isinstance(node.value, (ast.List, ast.Tuple)):
all_names = []
for elt in node.value.elts:
if isinstance(elt, ast.Constant) and isinstance(elt.value, str):
all_names.append(elt.value)
elif isinstance(elt, ast.Str): # Python < 3.8 compatibility
all_names.append(elt.s)
break
if all_names is not None:
return set(all_names)
public_names = set()
for node in tree.body:
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
if not node.name.startswith("_"):
public_names.add(node.name)
elif isinstance(node, ast.Assign):
for target in node.targets:
if isinstance(target, ast.Name) and not target.id.startswith("_"):
public_names.add(target.id)
elif isinstance(node, ast.AnnAssign):
if isinstance(node.target, ast.Name) and not node.target.id.startswith("_"):
public_names.add(node.target.id)
elif isinstance(node, ast.Import) or (
isinstance(node, ast.ImportFrom) and not any(alias.name == "*" for alias in node.names)
):
for alias in node.names:
name = alias.asname or alias.name
if not name.startswith("_"):
public_names.add(name)
return public_names # noqa: TRY300
except Exception as e:
logger.warning(f"Error resolving star import for {module_name}: {e}")
return set()
def add_needed_imports_from_module(
src_module_code: str,
dst_module_code: str,
src_path: Path,
dst_path: Path,
project_root: Path,
helper_functions: list[FunctionSource] | None = None,
helper_functions_fqn: set[str] | None = None,
) -> str:
"""Add all needed and used source module code imports to the destination module code, and return it."""
src_module_code = delete___future___aliased_imports(src_module_code)
if not helper_functions_fqn:
helper_functions_fqn = {f.fully_qualified_name for f in (helper_functions or [])}
src_module_and_package: ModuleNameAndPackage = calculate_module_and_package(project_root, src_path)
dst_module_and_package: ModuleNameAndPackage = calculate_module_and_package(project_root, dst_path)
dst_context: CodemodContext = CodemodContext(
filename=src_path.name,
full_module_name=dst_module_and_package.name,
full_package_name=dst_module_and_package.package,
)
gatherer: GatherImportsVisitor = GatherImportsVisitor(
CodemodContext(
filename=src_path.name,
full_module_name=src_module_and_package.name,
full_package_name=src_module_and_package.package,
)
)
try:
cst.parse_module(src_module_code).visit(gatherer)
except Exception as e:
logger.error(f"Error parsing source module code: {e}")
return dst_module_code
dotted_import_collector = DottedImportCollector()
try:
parsed_dst_module = cst.parse_module(dst_module_code)
parsed_dst_module.visit(dotted_import_collector)
except cst.ParserSyntaxError as e:
logger.exception(f"Syntax error in destination module code: {e}")
return dst_module_code # Return the original code if there's a syntax error
try:
for mod in gatherer.module_imports:
# Skip __future__ imports as they cannot be imported directly
# __future__ imports should only be imported with specific objects i.e from __future__ import annotations
if mod == "__future__":
continue
if mod not in dotted_import_collector.imports:
AddImportsVisitor.add_needed_import(dst_context, mod)
RemoveImportsVisitor.remove_unused_import(dst_context, mod)
aliased_objects = set()
for mod, alias_pairs in gatherer.alias_mapping.items():
for alias_pair in alias_pairs:
if alias_pair[0] and alias_pair[1]: # Both name and alias exist
aliased_objects.add(f"{mod}.{alias_pair[0]}")
for mod, obj_seq in gatherer.object_mapping.items():
for obj in obj_seq:
if (
f"{mod}.{obj}" in helper_functions_fqn or dst_context.full_module_name == mod # avoid circular deps
):
continue # Skip adding imports for helper functions already in the context
if f"{mod}.{obj}" in aliased_objects:
continue
# Handle star imports by resolving them to actual symbol names
if obj == "*":
resolved_symbols = resolve_star_import(mod, project_root)
logger.debug(f"Resolved star import from {mod}: {resolved_symbols}")
for symbol in resolved_symbols:
if (
f"{mod}.{symbol}" not in helper_functions_fqn
and f"{mod}.{symbol}" not in dotted_import_collector.imports
):
AddImportsVisitor.add_needed_import(dst_context, mod, symbol)
RemoveImportsVisitor.remove_unused_import(dst_context, mod, symbol)
else:
if f"{mod}.{obj}" not in dotted_import_collector.imports:
AddImportsVisitor.add_needed_import(dst_context, mod, obj)
RemoveImportsVisitor.remove_unused_import(dst_context, mod, obj)
except Exception as e:
logger.exception(f"Error adding imports to destination module code: {e}")
return dst_module_code
for mod, asname in gatherer.module_aliases.items():
if not asname:
continue
if f"{mod}.{asname}" not in dotted_import_collector.imports:
AddImportsVisitor.add_needed_import(dst_context, mod, asname=asname)
RemoveImportsVisitor.remove_unused_import(dst_context, mod, asname=asname)
for mod, alias_pairs in gatherer.alias_mapping.items():
for alias_pair in alias_pairs:
if f"{mod}.{alias_pair[0]}" in helper_functions_fqn:
continue
if not alias_pair[0] or not alias_pair[1]:
continue
if f"{mod}.{alias_pair[1]}" not in dotted_import_collector.imports:
AddImportsVisitor.add_needed_import(dst_context, mod, alias_pair[0], asname=alias_pair[1])
RemoveImportsVisitor.remove_unused_import(dst_context, mod, alias_pair[0], asname=alias_pair[1])
try:
add_imports_visitor = AddImportsVisitor(dst_context)
transformed_module = add_imports_visitor.transform_module(parsed_dst_module)
transformed_module = RemoveImportsVisitor(dst_context).transform_module(transformed_module)
return transformed_module.code.lstrip("\n")
except Exception as e:
logger.exception(f"Error adding imports to destination module code: {e}")
return dst_module_code
def get_code(functions_to_optimize: list[FunctionToOptimize]) -> tuple[str | None, set[tuple[str, str]]]:
"""Return the code for a function or methods in a Python module.
functions_to_optimize is either a singleton FunctionToOptimize instance, which represents either a function at the
module level or a method of a class at the module level, or it represents a list of methods of the same class.
"""
if (
not functions_to_optimize
or (functions_to_optimize[0].parents and functions_to_optimize[0].parents[0].type != "ClassDef")
or (
len(functions_to_optimize[0].parents) > 1
or ((len(functions_to_optimize) > 1) and len({fn.parents[0] for fn in functions_to_optimize}) != 1)
)
):
return None, set()
file_path: Path = functions_to_optimize[0].file_path
class_skeleton: set[tuple[int, int | None]] = set()
contextual_dunder_methods: set[tuple[str, str]] = set()
target_code: str = ""
def find_target(node_list: list[ast.stmt], name_parts: tuple[str, str] | tuple[str]) -> ast.AST | None:
target: ast.FunctionDef | ast.AsyncFunctionDef | ast.ClassDef | ast.Assign | ast.AnnAssign | None = None
node: ast.stmt
for node in node_list:
if (
# The many mypy issues will be fixed once this code moves to the backend,
# using Type Guards as we move to 3.10+.
# We will cover the Type Alias case on the backend since it's a 3.12 feature.
isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)) and node.name == name_parts[0]
):
target = node
break
# The next two cases cover type aliases in pre-3.12 syntax, where only single assignment is allowed.
if (
isinstance(node, ast.Assign)
and len(node.targets) == 1
and isinstance(node.targets[0], ast.Name)
and node.targets[0].id == name_parts[0]
) or (isinstance(node, ast.AnnAssign) and hasattr(node.target, "id") and node.target.id == name_parts[0]):
if class_skeleton:
break
target = node
break
if target is None or len(name_parts) == 1:
return target
if not isinstance(target, ast.ClassDef):
return None
class_skeleton.add((target.lineno, target.body[0].lineno - 1))
cbody = target.body
if isinstance(cbody[0], ast.expr): # Is a docstring
class_skeleton.add((cbody[0].lineno, cbody[0].end_lineno))
cbody = cbody[1:]
cnode: ast.stmt
for cnode in cbody:
# Collect all dunder methods.
cnode_name: str
if (
isinstance(cnode, (ast.FunctionDef, ast.AsyncFunctionDef))
and len(cnode_name := cnode.name) > 4
and cnode_name != name_parts[1]
and cnode_name.isascii()
and cnode_name.startswith("__")
and cnode_name.endswith("__")
):
contextual_dunder_methods.add((target.name, cnode_name))
class_skeleton.add((cnode.lineno, cnode.end_lineno))
return find_target(target.body, name_parts[1:])
with file_path.open(encoding="utf8") as file:
source_code: str = file.read()
try:
module_node: ast.Module = ast.parse(source_code)
except SyntaxError:
logger.exception("get_code - Syntax error while parsing code")
return None, set()
# Get the source code lines for the target node
lines: list[str] = source_code.splitlines(keepends=True)
if len(functions_to_optimize[0].parents) == 1:
if (
functions_to_optimize[0].parents[0].type == "ClassDef"
): # All functions_to_optimize functions are methods of the same class.
qualified_name_parts_list: list[tuple[str, str] | tuple[str]] = [
(fto.parents[0].name, fto.function_name) for fto in functions_to_optimize
]
else:
logger.error(f"Error: get_code does not support inner functions: {functions_to_optimize[0].parents}")
return None, set()
elif len(functions_to_optimize[0].parents) == 0:
qualified_name_parts_list = [(functions_to_optimize[0].function_name,)]
else:
logger.error(
"Error: get_code does not support more than one level of nesting for now. "
f"Parents: {functions_to_optimize[0].parents}"
)
return None, set()
for qualified_name_parts in qualified_name_parts_list:
target_node: ast.AST | None = find_target(module_node.body, qualified_name_parts)
if target_node is None:
continue
if (
isinstance(target_node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef))
and target_node.decorator_list
):
target_code += "".join(lines[target_node.decorator_list[0].lineno - 1 : target_node.end_lineno])
else:
target_code += "".join(lines[target_node.lineno - 1 : target_node.end_lineno])
if not target_code:
return None, set()
class_list: list[tuple[int, int | None]] = sorted(class_skeleton)
class_code = "".join(["".join(lines[s_lineno - 1 : e_lineno]) for (s_lineno, e_lineno) in class_list])
return class_code + target_code, contextual_dunder_methods
def extract_code(functions_to_optimize: list[FunctionToOptimize]) -> tuple[str | None, set[tuple[str, str]]]:
edited_code, contextual_dunder_methods = get_code(functions_to_optimize)
if edited_code is None:
return None, set()
try:
compile(edited_code, "edited_code", "exec")
except SyntaxError as e:
logger.exception(f"extract_code - Syntax error in extracted optimization candidate code: {e}")
return None, set()
return edited_code, contextual_dunder_methods
def find_preexisting_objects(source_code: str) -> set[tuple[str, tuple[FunctionParent, ...]]]:
"""Find all preexisting functions, classes or class methods in the source code."""
preexisting_objects: set[tuple[str, tuple[FunctionParent, ...]]] = set()
try:
module_node: ast.Module = ast.parse(source_code)
except SyntaxError:
logger.exception("find_preexisting_objects - Syntax error while parsing code")
return preexisting_objects
for node in module_node.body:
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
preexisting_objects.add((node.name, ()))
elif isinstance(node, ast.ClassDef):
preexisting_objects.add((node.name, ()))
for cnode in node.body:
if isinstance(cnode, (ast.FunctionDef, ast.AsyncFunctionDef)):
preexisting_objects.add((cnode.name, (FunctionParent(node.name, "ClassDef"),)))
return preexisting_objects