|
14 | 14 | from pydantic import BaseModel, ConfigDict, Field, field_validator |
15 | 15 | from typing import Optional, List, Tuple, Union, Dict |
16 | 16 | from datetime import datetime, timezone |
| 17 | +from collections import defaultdict |
17 | 18 | from pathlib import Path |
18 | 19 | import traceback |
19 | 20 | import asyncio |
@@ -419,6 +420,7 @@ def _get_changed_files(self) -> Tuple[List[Path], bool]: |
419 | 420 | """ |
420 | 421 | file_deletion_detected = False |
421 | 422 | files = self._find_code_files() # Dict[Path, datetime] |
| 423 | + print("found code files") |
422 | 424 |
|
423 | 425 | changed_files = [] |
424 | 426 |
|
@@ -535,6 +537,97 @@ def _is_file_content_valid(filepath :Path)->bool: |
535 | 537 |
|
536 | 538 | return True |
537 | 539 |
|
| 540 | + @staticmethod |
| 541 | + def _is_subdirectory(identifier: str) -> bool: |
| 542 | + """ |
| 543 | + Check if an identifier represents a module/subdirectory. |
| 544 | + |
| 545 | + Args: |
| 546 | + identifier: A string or Path object to check |
| 547 | + |
| 548 | + Returns: |
| 549 | + True if the identifier ends with '/' (indicating a module), False otherwise |
| 550 | + """ |
| 551 | + if isinstance(identifier, Path): |
| 552 | + return False |
| 553 | + elif identifier.endswith("/"): |
| 554 | + return True |
| 555 | + else: |
| 556 | + return False |
| 557 | + |
| 558 | + def get_module_identifiers(self, module_ids: List[str]) -> Dict[str, List[str]]: |
| 559 | + """ |
| 560 | + Get all file identifiers that belong to specified modules. |
| 561 | + |
| 562 | + Args: |
| 563 | + module_ids: List of module identifier strings (directories) |
| 564 | + |
| 565 | + Returns: |
| 566 | + Dictionary mapping module names to lists of relative file paths within each module |
| 567 | + """ |
| 568 | + module_paths = { |
| 569 | + self.rootpath / module_id |
| 570 | + for module_id in module_ids |
| 571 | + } |
| 572 | + modules_identifiers = defaultdict(list) |
| 573 | + for filepath in self.files: |
| 574 | + for module_path in module_paths: |
| 575 | + if filepath.is_relative_to(module_path): |
| 576 | + modules_identifiers[module_path.name].append( |
| 577 | + str(filepath.relative_to(self.rootpath)) |
| 578 | + ) |
| 579 | + break |
| 580 | + |
| 581 | + # Log the results |
| 582 | + logger.info(f"Found {len(modules_identifiers)} modules") |
| 583 | + for module_name, identifiers in modules_identifiers.items(): |
| 584 | + logger.info(f"Module '{module_name}' contains {len(identifiers)} identifiers") |
| 585 | + |
| 586 | + return modules_identifiers |
| 587 | + |
| 588 | + def inject_identifiers_from_modules(self, unique_ids: List[str]) -> List[str]: |
| 589 | + """ |
| 590 | + Expand module identifiers into their constituent file identifiers. |
| 591 | + |
| 592 | + Takes a list of identifiers that may include module directories, finds all files |
| 593 | + within those modules, and replaces the module identifiers with individual file paths. |
| 594 | + |
| 595 | + Args: |
| 596 | + unique_ids: List of identifiers, may include both files and modules (ending with '/') |
| 597 | + |
| 598 | + Returns: |
| 599 | + Expanded list with module identifiers replaced by their constituent file identifiers |
| 600 | + """ |
| 601 | + modules_identifiers = [ |
| 602 | + unique_id for unique_id in unique_ids if self._is_subdirectory(unique_id) |
| 603 | + ] |
| 604 | + identifiers_per_module = self.get_module_identifiers(module_ids=modules_identifiers) |
| 605 | + |
| 606 | + unique_ids = [ |
| 607 | + unique_id for unique_id in unique_ids |
| 608 | + if unique_id not in modules_identifiers |
| 609 | + ] |
| 610 | + for identifiers in identifiers_per_module.values(): |
| 611 | + unique_ids.extend(identifiers) |
| 612 | + |
| 613 | + return unique_ids |
| 614 | + |
| 615 | + def precheck(self, unique_ids: List[str]) -> Dict[Path, str]: |
| 616 | + """ |
| 617 | + Preprocess and validate identifiers before further operations. |
| 618 | + |
| 619 | + Expands any module identifiers into their constituent files and validates |
| 620 | + that all identifiers correspond to actual files. |
| 621 | + |
| 622 | + Args: |
| 623 | + unique_ids: List of file or module identifiers to precheck |
| 624 | + |
| 625 | + Returns: |
| 626 | + Dictionary mapping validated file paths to their identifier strings |
| 627 | + """ |
| 628 | + unique_ids = self.inject_identifiers_from_modules(unique_ids) |
| 629 | + return self._precheck_id_is_file(unique_ids) |
| 630 | + |
538 | 631 | def _precheck_id_is_file(self, unique_ids : List[str])->Dict[Path, str]: |
539 | 632 | """ |
540 | 633 | Preload file contents for the given IDs if they correspond to known files. |
@@ -587,7 +680,7 @@ def get( |
587 | 680 | f"Formats: string={as_string}, list={as_string_list}" |
588 | 681 | ) |
589 | 682 |
|
590 | | - requested_files = self._precheck_id_is_file(code_identifiers) |
| 683 | + requested_files = self.precheck(code_identifiers) |
591 | 684 | return self.codebase.get( |
592 | 685 | unique_id=code_identifiers, |
593 | 686 | degree=context_depth, |
|
0 commit comments