Skip to content

Commit 3b0a7f8

Browse files
committed
feat: index mode, fix patterns
1 parent f59c185 commit 3b0a7f8

2 files changed

Lines changed: 89 additions & 38 deletions

File tree

src/cocoindex_code/indexer.py

Lines changed: 36 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -13,44 +13,45 @@
1313

1414
# File patterns for supported languages
1515
INCLUDED_PATTERNS = [
16-
"*.py", # Python
17-
"*.pyi", # Python stubs
18-
"*.js", # JavaScript
19-
"*.jsx", # JavaScript React
20-
"*.ts", # TypeScript
21-
"*.tsx", # TypeScript React
22-
"*.mjs", # JavaScript ES modules
23-
"*.cjs", # JavaScript CommonJS
24-
"*.rs", # Rust
25-
"*.go", # Go
26-
"*.java", # Java
27-
"*.c", # C
28-
"*.h", # C/C++ headers
29-
"*.cpp", # C++
30-
"*.hpp", # C++ headers
31-
"*.cc", # C++
32-
"*.cxx", # C++
33-
"*.hxx", # C++ headers
34-
"*.hh", # C++ headers
35-
"*.cs", # C#
36-
"*.sql", # SQL
37-
"*.sh", # Shell
38-
"*.bash", # Bash
39-
"*.zsh", # Zsh
40-
"*.md", # Markdown
41-
"*.mdx", # MDX
42-
"*.txt", # Plain text
43-
"*.rst", # reStructuredText
16+
"**/*.py", # Python
17+
"**/*.pyi", # Python stubs
18+
"**/*.js", # JavaScript
19+
"**/*.jsx", # JavaScript React
20+
"**/*.ts", # TypeScript
21+
"**/*.tsx", # TypeScript React
22+
"**/*.mjs", # JavaScript ES modules
23+
"**/*.cjs", # JavaScript CommonJS
24+
"**/*.rs", # Rust
25+
"**/*.go", # Go
26+
"**/*.java", # Java
27+
"**/*.c", # C
28+
"**/*.h", # C/C++ headers
29+
"**/*.cpp", # C++
30+
"**/*.hpp", # C++ headers
31+
"**/*.cc", # C++
32+
"**/*.cxx", # C++
33+
"**/*.hxx", # C++ headers
34+
"**/*.hh", # C++ headers
35+
"**/*.cs", # C#
36+
"**/*.sql", # SQL
37+
"**/*.sh", # Shell
38+
"**/*.bash", # Bash
39+
"**/*.zsh", # Zsh
40+
"**/*.md", # Markdown
41+
"**/*.mdx", # MDX
42+
"**/*.txt", # Plain text
43+
"**/*.rst", # reStructuredText
4444
]
4545

4646
EXCLUDED_PATTERNS = [
47-
".*", # Hidden directories
48-
"__pycache__", # Python cache
49-
"node_modules", # Node.js dependencies
50-
"target", # Rust/Maven build output
51-
"dist", # Distribution directories
52-
"vendor/*.*/*", # Go vendor directory (domain-based paths)
53-
".cocoindex_code", # Our own index directory
47+
"**/.*", # Hidden directories
48+
"**/__pycache__", # Python cache
49+
"**/node_modules", # Node.js dependencies
50+
"**/target", # Rust/Maven build output
51+
"**/build/assets", # Build asserts directories
52+
"**/dist", # Distribution directories
53+
"**/vendor/*.*/*", # Go vendor directory (domain-based paths)
54+
"**/.cocoindex_code", # Our own index directory
5455
]
5556

5657
# Chunking configuration

src/cocoindex_code/server.py

Lines changed: 53 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
11
"""MCP server for codebase indexing and querying."""
22

3+
import argparse
34
import asyncio
5+
import sqlite3
46

57
from mcp.server.fastmcp import FastMCP
68
from pydantic import BaseModel, Field
79

810
from .indexer import app as indexer_app
911
from .query import query_codebase
12+
from .shared import config
1013

1114
# Initialize MCP server
1215
mcp = FastMCP(
@@ -142,16 +145,63 @@ async def search(
142145
)
143146

144147

145-
async def _async_main() -> None:
148+
async def _async_serve() -> None:
146149
"""Async entry point for the MCP server."""
147150
# Refresh index in background so startup isn't blocked
148151
asyncio.create_task(_refresh_index())
149152
await mcp.run_stdio_async()
150153

151154

155+
async def _async_index() -> None:
156+
"""Async entry point for the index command."""
157+
await indexer_app.update(report_to_stdout=True)
158+
_print_index_stats()
159+
160+
161+
def _print_index_stats() -> None:
162+
"""Print index statistics from the database."""
163+
db_path = config.target_sqlite_db_path
164+
if not db_path.exists():
165+
print("No index database found.")
166+
return
167+
168+
conn = sqlite3.connect(str(db_path))
169+
try:
170+
total_chunks = conn.execute("SELECT COUNT(*) FROM code_chunks").fetchone()[0]
171+
total_files = conn.execute("SELECT COUNT(DISTINCT file_path) FROM code_chunks").fetchone()[
172+
0
173+
]
174+
langs = conn.execute(
175+
"SELECT language, COUNT(*) as cnt FROM code_chunks GROUP BY language ORDER BY cnt DESC"
176+
).fetchall()
177+
178+
print("\nIndex stats:")
179+
print(f" Chunks: {total_chunks}")
180+
print(f" Files: {total_files}")
181+
if langs:
182+
print(" Languages:")
183+
for lang, count in langs:
184+
print(f" {lang}: {count} chunks")
185+
finally:
186+
conn.close()
187+
188+
152189
def main() -> None:
153-
"""Entry point for the MCP server."""
154-
asyncio.run(_async_main())
190+
"""Entry point for the cocoindex-code CLI."""
191+
parser = argparse.ArgumentParser(
192+
prog="cocoindex-code",
193+
description="MCP server for codebase indexing and querying.",
194+
)
195+
subparsers = parser.add_subparsers(dest="command")
196+
subparsers.add_parser("serve", help="Run the MCP server (default)")
197+
subparsers.add_parser("index", help="Build/refresh the index and report stats")
198+
199+
args = parser.parse_args()
200+
201+
if args.command == "index":
202+
asyncio.run(_async_index())
203+
else:
204+
asyncio.run(_async_serve())
155205

156206

157207
if __name__ == "__main__":

0 commit comments

Comments
 (0)