|
21 | 21 | import os |
22 | 22 |
|
23 | 23 | class CodeTide(BaseModel): |
24 | | - """Root model representing a complete codebase""" |
| 24 | + """Root model representing a complete codebase with tools for parsing, tracking, and managing code files.""" |
| 25 | + |
25 | 26 | rootpath :Union[str, Path] |
26 | 27 | codebase :CodeBase = Field(default_factory=CodeBase) |
27 | 28 | files :Dict[Path, datetime]= Field(default_factory=dict) |
@@ -97,6 +98,15 @@ def serialize(self, |
97 | 98 | include_codebase_cached_elements: bool = False, |
98 | 99 | include_cached_ids: bool = False, |
99 | 100 | store_in_project_root: bool=True): |
| 101 | + """ |
| 102 | + Serialize the CodeTide object to a file. |
| 103 | +
|
| 104 | + Args: |
| 105 | + filepath: Output path for the serialized object. |
| 106 | + include_codebase_cached_elements: Whether to include codebase cache. |
| 107 | + include_cached_ids: Whether to save list of unique file IDs. |
| 108 | + store_in_project_root: Store file relative to project root if True. |
| 109 | + """ |
100 | 110 |
|
101 | 111 | if store_in_project_root: |
102 | 112 | filepath = Path(self.rootpath) / filepath |
@@ -132,6 +142,16 @@ def serialize(self, |
132 | 142 |
|
133 | 143 | @classmethod |
134 | 144 | def deserialize(cls, filepath :Optional[Union[str, Path]]=DEFAULT_SERIALIZATION_PATH, rootpath :Optional[Union[str, Path]] = None)->"CodeTide": |
| 145 | + """ |
| 146 | + Load a CodeTide instance from a serialized file. |
| 147 | +
|
| 148 | + Args: |
| 149 | + filepath: Path to the serialized CodeTide JSON. |
| 150 | + rootpath: Project root directory (used for relative paths). |
| 151 | +
|
| 152 | + Returns: |
| 153 | + Deserialized CodeTide instance. |
| 154 | + """ |
135 | 155 | if rootpath is not None: |
136 | 156 | filepath = Path(rootpath) / filepath |
137 | 157 |
|
@@ -219,7 +239,16 @@ async def _process_single_file( |
219 | 239 | filepath: Path, |
220 | 240 | parser: BaseParser |
221 | 241 | ) -> Optional[CodeFileModel]: |
222 | | - """Process a single file with error handling.""" |
| 242 | + """ |
| 243 | + Asynchronously process a single file using the given parser. |
| 244 | +
|
| 245 | + Args: |
| 246 | + filepath: Path to the file. |
| 247 | + parser: Parser object corresponding to the file's language. |
| 248 | +
|
| 249 | + Returns: |
| 250 | + Parsed CodeFileModel or None on failure. |
| 251 | + """ |
223 | 252 | try: |
224 | 253 | logger.debug(f"Processing file: {filepath}") |
225 | 254 | return await parser.parse_file(filepath, self.rootpath) |
@@ -331,8 +360,10 @@ def _resolve_files_dependencies(self): |
331 | 360 |
|
332 | 361 | def _get_changed_files(self) -> Tuple[List[Path], bool]: |
333 | 362 | """ |
334 | | - TODO consider if it is worth storing singular timestamp for latest fetch and then just use |
335 | | - pygit2 to changed files based on commit history + current repo status |
| 363 | + Detect which files have been added, modified, or deleted since last scan. |
| 364 | +
|
| 365 | + Returns: |
| 366 | + Tuple containing list of changed file paths and deletion flag. |
336 | 367 | """ |
337 | 368 | file_deletion_detected = False |
338 | 369 | files = self._find_code_files() # Dict[Path, datetime] |
@@ -361,6 +392,14 @@ async def check_for_updates(self, |
361 | 392 | serialize :bool=False, |
362 | 393 | max_concurrent_tasks: int = DEFAULT_MAX_CONCURRENT_TASKS, |
363 | 394 | batch_size: int = DEFAULT_BATCH_SIZE, **kwargs): |
| 395 | + """ |
| 396 | + Update the codebase by detecting and reprocessing changed files. |
| 397 | +
|
| 398 | + Args: |
| 399 | + serialize: Whether to serialize after updates. |
| 400 | + max_concurrent_tasks: Max concurrent parser tasks. |
| 401 | + batch_size: Batch size for async file processing. |
| 402 | + """ |
364 | 403 |
|
365 | 404 | changed_files, deletion_detected = self._get_changed_files() |
366 | 405 | if deletion_detected: |
@@ -432,12 +471,33 @@ async def check_for_updates(self, |
432 | 471 | ) |
433 | 472 |
|
434 | 473 | def _precheck_id_is_file(self, unique_ids : List[str])->Dict[Path, str]: |
| 474 | + """ |
| 475 | + Preload file contents for the given IDs if they correspond to known files. |
| 476 | +
|
| 477 | + Args: |
| 478 | + unique_ids: List of file paths or unique identifiers. |
| 479 | +
|
| 480 | + Returns: |
| 481 | + Dictionary mapping paths to file content. |
| 482 | + """ |
435 | 483 | return { |
436 | 484 | unique_id: readFile(self.rootpath / unique_id) for unique_id in unique_ids |
437 | 485 | if self.rootpath / unique_id in self.files |
438 | 486 | } |
439 | 487 |
|
440 | 488 | def get(self, unique_id :Union[str, List[str]], degree :int=1, as_string :bool=True, as_list_str :bool=False)->Union[CodeContextStructure, str, List[str]]: |
| 489 | + """ |
| 490 | + Retrieve context around code by unique ID(s). |
| 491 | +
|
| 492 | + Args: |
| 493 | + unique_id: Single or list of unique IDs for code entities. |
| 494 | + degree: Depth of context to fetch. |
| 495 | + as_string: Whether to return as a single string. |
| 496 | + as_list_str: Whether to return as list of strings. |
| 497 | +
|
| 498 | + Returns: |
| 499 | + Code context in the requested format. |
| 500 | + """ |
441 | 501 | if isinstance(unique_id, str): |
442 | 502 | unique_id = [unique_id] |
443 | 503 |
|
|
0 commit comments