Skip to content

Commit 00b73b0

Browse files
phernandezclaude
andauthored
feat: Optimize directory operations for 10-100x performance improvement (#350)
Signed-off-by: phernandez <paul@basicmachines.co> Co-authored-by: Claude <noreply@anthropic.com>
1 parent a09066e commit 00b73b0

8 files changed

Lines changed: 621 additions & 7 deletions

File tree

src/basic_memory/api/routers/directory_router.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,27 @@ async def get_directory_tree(
3131
return tree
3232

3333

34+
@router.get("/structure", response_model=DirectoryNode)
35+
async def get_directory_structure(
36+
directory_service: DirectoryServiceDep,
37+
project_id: ProjectIdDep,
38+
):
39+
"""Get folder structure for navigation (no files).
40+
41+
Optimized endpoint for folder tree navigation. Returns only directory nodes
42+
without file metadata. For full tree with files, use /directory/tree.
43+
44+
Args:
45+
directory_service: Service for directory operations
46+
project_id: ID of the current project
47+
48+
Returns:
49+
DirectoryNode tree containing only folders (type="directory")
50+
"""
51+
structure = await directory_service.get_directory_structure()
52+
return structure
53+
54+
3455
@router.get("/list", response_model=List[DirectoryNode])
3556
async def list_directory(
3657
directory_service: DirectoryServiceDep,

src/basic_memory/repository/entity_repository.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,66 @@ async def upsert_entity(self, entity: Entity) -> Entity:
176176
entity = await self._handle_permalink_conflict(entity, session)
177177
return entity
178178

179+
async def get_distinct_directories(self) -> List[str]:
180+
"""Extract unique directory paths from file_path column.
181+
182+
Optimized method for getting directory structure without loading full entities
183+
or relationships. Returns a sorted list of unique directory paths.
184+
185+
Returns:
186+
List of unique directory paths (e.g., ["notes", "notes/meetings", "specs"])
187+
"""
188+
# Query only file_path column, no entity objects or relationships
189+
query = select(Entity.file_path).distinct()
190+
query = self._add_project_filter(query)
191+
192+
# Execute with use_query_options=False to skip eager loading
193+
result = await self.execute_query(query, use_query_options=False)
194+
file_paths = [row for row in result.scalars().all()]
195+
196+
# Parse file paths to extract unique directories
197+
directories = set()
198+
for file_path in file_paths:
199+
parts = [p for p in file_path.split("/") if p]
200+
# Add all parent directories (exclude filename which is the last part)
201+
for i in range(len(parts) - 1):
202+
dir_path = "/".join(parts[: i + 1])
203+
directories.add(dir_path)
204+
205+
return sorted(directories)
206+
207+
async def find_by_directory_prefix(self, directory_prefix: str) -> Sequence[Entity]:
208+
"""Find entities whose file_path starts with the given directory prefix.
209+
210+
Optimized method for listing directory contents without loading all entities.
211+
Uses SQL LIKE pattern matching to filter entities by directory path.
212+
213+
Args:
214+
directory_prefix: Directory path prefix (e.g., "docs", "docs/guides")
215+
Empty string returns all entities (root directory)
216+
217+
Returns:
218+
Sequence of entities in the specified directory and subdirectories
219+
"""
220+
# Build SQL LIKE pattern
221+
if directory_prefix == "" or directory_prefix == "/":
222+
# Root directory - return all entities
223+
return await self.find_all()
224+
225+
# Remove leading/trailing slashes for consistency
226+
directory_prefix = directory_prefix.strip("/")
227+
228+
# Query entities with file_path starting with prefix
229+
# Pattern matches "prefix/" to ensure we get files IN the directory,
230+
# not just files whose names start with the prefix
231+
pattern = f"{directory_prefix}/%"
232+
233+
query = self.select().where(Entity.file_path.like(pattern))
234+
235+
# Skip eager loading - we only need basic entity fields for directory trees
236+
result = await self.execute_query(query, use_query_options=False)
237+
return list(result.scalars().all())
238+
179239
async def _handle_permalink_conflict(self, entity: Entity, session: AsyncSession) -> Entity:
180240
"""Handle permalink conflicts by generating a unique permalink."""
181241
base_permalink = entity.permalink

src/basic_memory/repository/repository.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -152,12 +152,25 @@ def select(self, *entities: Any) -> Select:
152152
# Add project filter if applicable
153153
return self._add_project_filter(query)
154154

155-
async def find_all(self, skip: int = 0, limit: Optional[int] = None) -> Sequence[T]:
156-
"""Fetch records from the database with pagination."""
155+
async def find_all(
156+
self, skip: int = 0, limit: Optional[int] = None, use_load_options: bool = True
157+
) -> Sequence[T]:
158+
"""Fetch records from the database with pagination.
159+
160+
Args:
161+
skip: Number of records to skip
162+
limit: Maximum number of records to return
163+
use_load_options: Whether to apply eager loading options (default: True)
164+
"""
157165
logger.debug(f"Finding all {self.Model.__name__} (skip={skip}, limit={limit})")
158166

159167
async with db.scoped_session(self.session_maker) as session:
160-
query = select(self.Model).offset(skip).options(*self.get_load_options())
168+
query = select(self.Model).offset(skip)
169+
170+
# Only apply load options if requested
171+
if use_load_options:
172+
query = query.options(*self.get_load_options())
173+
161174
# Add project filter if applicable
162175
query = self._add_project_filter(query)
163176

src/basic_memory/services/directory_service.py

Lines changed: 124 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
import fnmatch
44
import logging
55
import os
6-
from typing import Dict, List, Optional
6+
from typing import Dict, List, Optional, Sequence
77

8+
from basic_memory.models import Entity
89
from basic_memory.repository import EntityRepository
910
from basic_memory.schemas.directory import DirectoryNode
1011

@@ -89,6 +90,49 @@ async def get_directory_tree(self) -> DirectoryNode:
8990
# Return the root node with its children
9091
return root_node
9192

93+
async def get_directory_structure(self) -> DirectoryNode:
94+
"""Build a hierarchical directory structure without file details.
95+
96+
Optimized method for folder navigation that only returns directory nodes,
97+
no file metadata. Much faster than get_directory_tree() for large knowledge bases.
98+
99+
Returns:
100+
DirectoryNode tree containing only folders (type="directory")
101+
"""
102+
# Get unique directories without loading entities
103+
directories = await self.entity_repository.get_distinct_directories()
104+
105+
# Create a root directory node
106+
root_node = DirectoryNode(name="Root", directory_path="/", type="directory")
107+
108+
# Map to store directory nodes by path for easy lookup
109+
dir_map: Dict[str, DirectoryNode] = {"/": root_node}
110+
111+
# Build tree with just folders
112+
for dir_path in directories:
113+
parts = [p for p in dir_path.split("/") if p]
114+
current_path = "/"
115+
116+
for i, part in enumerate(parts):
117+
parent_path = current_path
118+
# Build the directory path
119+
current_path = (
120+
f"{current_path}{part}" if current_path == "/" else f"{current_path}/{part}"
121+
)
122+
123+
# Create directory node if it doesn't exist
124+
if current_path not in dir_map:
125+
dir_node = DirectoryNode(
126+
name=part, directory_path=current_path, type="directory"
127+
)
128+
dir_map[current_path] = dir_node
129+
130+
# Add to parent's children
131+
if parent_path in dir_map:
132+
dir_map[parent_path].children.append(dir_node)
133+
134+
return root_node
135+
92136
async def list_directory(
93137
self,
94138
dir_name: str = "/",
@@ -118,8 +162,13 @@ async def list_directory(
118162
if dir_name != "/" and dir_name.endswith("/"):
119163
dir_name = dir_name.rstrip("/")
120164

121-
# Get the full directory tree
122-
root_tree = await self.get_directory_tree()
165+
# Optimize: Query only entities in the target directory
166+
# instead of loading the entire tree
167+
dir_prefix = dir_name.lstrip("/")
168+
entity_rows = await self.entity_repository.find_by_directory_prefix(dir_prefix)
169+
170+
# Build a partial tree from only the relevant entities
171+
root_tree = self._build_directory_tree_from_entities(entity_rows, dir_name)
123172

124173
# Find the target directory node
125174
target_node = self._find_directory_node(root_tree, dir_name)
@@ -132,6 +181,78 @@ async def list_directory(
132181

133182
return result
134183

184+
def _build_directory_tree_from_entities(
185+
self, entity_rows: Sequence[Entity], root_path: str
186+
) -> DirectoryNode:
187+
"""Build a directory tree from a subset of entities.
188+
189+
Args:
190+
entity_rows: Sequence of entity objects to build tree from
191+
root_path: Root directory path for the tree
192+
193+
Returns:
194+
DirectoryNode representing the tree root
195+
"""
196+
# Create a root directory node
197+
root_node = DirectoryNode(name="Root", directory_path=root_path, type="directory")
198+
199+
# Map to store directory nodes by path for easy lookup
200+
dir_map: Dict[str, DirectoryNode] = {root_path: root_node}
201+
202+
# First pass: create all directory nodes
203+
for file in entity_rows:
204+
# Process directory path components
205+
parts = [p for p in file.file_path.split("/") if p]
206+
207+
# Create directory structure
208+
current_path = "/"
209+
for i, part in enumerate(parts[:-1]): # Skip the filename
210+
parent_path = current_path
211+
# Build the directory path
212+
current_path = (
213+
f"{current_path}{part}" if current_path == "/" else f"{current_path}/{part}"
214+
)
215+
216+
# Create directory node if it doesn't exist
217+
if current_path not in dir_map:
218+
dir_node = DirectoryNode(
219+
name=part, directory_path=current_path, type="directory"
220+
)
221+
dir_map[current_path] = dir_node
222+
223+
# Add to parent's children
224+
if parent_path in dir_map:
225+
dir_map[parent_path].children.append(dir_node)
226+
227+
# Second pass: add file nodes to their parent directories
228+
for file in entity_rows:
229+
file_name = os.path.basename(file.file_path)
230+
parent_dir = os.path.dirname(file.file_path)
231+
directory_path = "/" if parent_dir == "" else f"/{parent_dir}"
232+
233+
# Create file node
234+
file_node = DirectoryNode(
235+
name=file_name,
236+
file_path=file.file_path,
237+
directory_path=f"/{file.file_path}",
238+
type="file",
239+
title=file.title,
240+
permalink=file.permalink,
241+
entity_id=file.id,
242+
entity_type=file.entity_type,
243+
content_type=file.content_type,
244+
updated_at=file.updated_at,
245+
)
246+
247+
# Add to parent directory's children
248+
if directory_path in dir_map:
249+
dir_map[directory_path].children.append(file_node)
250+
elif root_path in dir_map:
251+
# Fallback to root if parent not found
252+
dir_map[root_path].children.append(file_node)
253+
254+
return root_node
255+
135256
def _find_directory_node(
136257
self, root: DirectoryNode, target_path: str
137258
) -> Optional[DirectoryNode]:

src/basic_memory/services/project_service.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,13 @@ def current_project(self) -> str:
8080
return os.environ.get("BASIC_MEMORY_PROJECT", self.config_manager.default_project)
8181

8282
async def list_projects(self) -> Sequence[Project]:
83-
return await self.repository.find_all()
83+
"""List all projects without loading entity relationships.
84+
85+
Returns only basic project fields (name, path, etc.) without
86+
eager loading the entities relationship which could load thousands
87+
of entities for large knowledge bases.
88+
"""
89+
return await self.repository.find_all(use_load_options=False)
8490

8591
async def get_project(self, name: str) -> Optional[Project]:
8692
"""Get the file path for a project by name or permalink."""

0 commit comments

Comments
 (0)