11"""CocoIndex app for indexing codebases."""
22
3- import asyncio
4-
53import cocoindex .asyncio as coco_aio
64from cocoindex .connectors import localfs , sqlite
75from cocoindex .ops .text import RecursiveSplitter , detect_code_language
6361splitter = RecursiveSplitter ()
6462
6563
66- @coco_aio .function
67- async def process_chunk (
68- file_path : str ,
69- chunk : Chunk ,
70- language : str ,
71- id_gen : IdGenerator ,
72- table : sqlite .TableTarget ,
73- ) -> None :
74- """Process a single chunk: embed and store."""
75- id , chunk_embedding = await asyncio .gather (
76- id_gen .next_id (chunk .text ),
77- embedder .embed (chunk .text ),
78- )
79- table .declare_row (
80- row = CodeChunk ( # type: ignore[arg-type]
81- id = id ,
82- file_path = file_path ,
83- language = language ,
84- content = chunk .text ,
85- start_line = chunk .start .line ,
86- end_line = chunk .end .line ,
87- embedding = chunk_embedding ,
88- )
89- )
90-
91-
9264@coco_aio .function (memo = True )
9365async def process_file (
9466 file : localfs .File ,
95- table : sqlite .TableTarget ,
67+ table : sqlite .TableTarget [ CodeChunk ] ,
9668) -> None :
9769 """Process a single file: chunk, embed, and store."""
9870 # Read file content
@@ -118,12 +90,23 @@ async def process_file(
11890 )
11991
12092 id_gen = IdGenerator ()
121- await asyncio .gather (
122- * (
123- process_chunk (str (file .file_path .path ), chunk , language , id_gen , table )
124- for chunk in chunks
93+
94+ async def process (
95+ chunk : Chunk ,
96+ ) -> None :
97+ table .declare_row (
98+ row = CodeChunk (
99+ id = await id_gen .next_id (chunk .text ),
100+ file_path = str (file .file_path .path ),
101+ language = language ,
102+ content = chunk .text ,
103+ start_line = chunk .start .line ,
104+ end_line = chunk .end .line ,
105+ embedding = await embedder .embed (chunk .text ),
106+ )
125107 )
126- )
108+
109+ await coco_aio .map (process , chunks )
127110
128111
129112@coco_aio .function
@@ -132,15 +115,13 @@ async def app_main() -> None:
132115 db = coco_aio .use_context (SQLITE_DB )
133116
134117 # Declare the table target for storing embeddings
135- table = await coco_aio .mount_run (
136- coco_aio .component_subpath ("setup" , "table" ),
137- db .declare_table_target ,
118+ table = await db .mount_table_target (
138119 table_name = "code_chunks" ,
139120 table_schema = await sqlite .TableSchema .from_class (
140121 CodeChunk ,
141122 primary_key = ["id" ],
142123 ),
143- ). result ()
124+ )
144125
145126 # Walk source directory
146127 files = localfs .walk_dir (
@@ -153,13 +134,8 @@ async def app_main() -> None:
153134 )
154135
155136 # Process each file
156- for f in files :
157- coco_aio .mount (
158- coco_aio .component_subpath ("process" , str (f .file_path .path )),
159- process_file ,
160- f ,
161- table ,
162- )
137+ with coco_aio .component_subpath (coco_aio .Symbol ("process_file" )):
138+ await coco_aio .mount_each (process_file , files .items (), table )
163139
164140
165141# Create the app
0 commit comments