6060]
6161
6262
63- class BlockRetriever (BaseRetriever ):
63+ class BlockRetriever (BlockRetrieverFilesystemSupport , BlockRetrieverPromptCacheSupport , BaseRetriever ):
6464
6565 def __init__ (
6666 self ,
@@ -120,22 +120,14 @@ def __init__(
120120 self .max_tokens_per_block ,
121121 self .min_tokens_per_block ,
122122 )
123- self ._filesystem_support = BlockRetrieverFilesystemSupport (self )
124- self ._prompt_cache_support = BlockRetrieverPromptCacheSupport (self )
125123 self ._plan_cache : dict [str , BlockTreePlan ] = {}
126124 self ._precomputed_tree_id : str = ""
127-
128- def __getattr__ (self , name : str ):
129- if name .startswith ("__" ):
130- raise AttributeError (name )
131-
132- for support_name in ("_filesystem_support" , "_prompt_cache_support" ):
133- support = self .__dict__ .get (support_name )
134- if support is None :
135- continue
136- if hasattr (type (support ), name ):
137- return getattr (support , name )
138- raise AttributeError (f"{ type (self ).__name__ !s} object has no attribute { name !r} " )
125+ self ._fs_node_cache : dict [tuple [str , str ], dict [str , Any ]] = {}
126+ self ._fs_attrs_cache : dict [tuple [str , str ], dict [str , Any ]] = {}
127+ self ._fs_path_cache : dict [tuple [str , str ], str ] = {}
128+ self ._fs_is_dir_cache : dict [tuple [str , str ], bool ] = {}
129+ self ._fs_children_cache : dict [tuple [str , str ], list [dict [str , Any ]]] = {}
130+ self ._fs_block_render_cache : dict [tuple [str , ...], tuple [str , int ]] = {}
139131
140132 def retrieve (
141133 self ,
@@ -164,6 +156,7 @@ def _retrieve_fs(
164156 return self ._empty_result ()
165157
166158 if tree_id != self ._precomputed_tree_id :
159+ self ._clear_fs_lookup_cache ()
167160 self .token_counter .clear_cache ()
168161 self .token_counter .precompute_tree_tokens (self .storage , tree_id )
169162 self ._precomputed_tree_id = tree_id
@@ -783,18 +776,18 @@ def _process_block(
783776 def _update_frontier (self , node_ids , tree_id , beam_size ):
784777 next_frontier = []
785778 for node_id in node_ids :
786- node = self .storage . get_node ( tree_id , node_id )
787- attrs = {}
788- if node and node . attrs_json :
789- try :
790- attrs = json . loads ( node . attrs_json )
791- except json . JSONDecodeError :
792- attrs = {}
793-
794- frontier_path = attrs . get ( "rel_path" , "" ) if self . mode == "filesystem" else ( node . path if node else "" )
779+ if self .mode == "filesystem" :
780+ node = self . _get_cached_fs_node_dict ( tree_id , node_id )
781+ attrs = self . _get_cached_fs_attrs ( tree_id , node_id , node = node )
782+ frontier_path = attrs . get ( "rel_path" , "" )
783+ title = attrs . get ( "title" , "" )
784+ else :
785+ node = self . storage . get_node ( tree_id , node_id )
786+ frontier_path = node . path if node else ""
787+ title = ""
795788 next_frontier .append ({
796789 "node_id" : node_id ,
797- "title" : attrs . get ( " title" , "" ) ,
790+ "title" : title ,
798791 "path" : frontier_path ,
799792 })
800793 if beam_size and len (next_frontier ) >= beam_size :
@@ -808,7 +801,13 @@ def _update_beams(self, node_ids, tree_id, beam_size):
808801 def _frontier_has_children (self , tree_id : str , frontier : list [dict [str , str ]]) -> bool :
809802 for frontier_node in frontier :
810803 node_id = frontier_node .get ("node_id" , "" )
811- if node_id and self .storage .get_children (tree_id , node_id ):
804+ if not node_id :
805+ continue
806+ if self .mode == "filesystem" :
807+ if self ._fs_node_has_children (tree_id , node_id ):
808+ return True
809+ continue
810+ if self .storage .get_children (tree_id , node_id ):
812811 return True
813812 return False
814813
@@ -831,14 +830,14 @@ def _override_done_if_dirs(self, result: BlockResult, tree_id: str, beams: list[
831830 return self ._override_done_if_frontier_dirs (result , tree_id , beams )
832831
833832 def _is_fs_directory_id (self , tree_id : str , node_id : str ) -> bool :
834- node = self . storage . get_node (tree_id , node_id )
835- if not node or not node . attrs_json :
836- return False
837- try :
838- attrs = json . loads ( node . attrs_json )
839- except json . JSONDecodeError :
840- return False
841- return bool ( attrs . get ( " is_dir" , False ))
833+ key = (tree_id , node_id )
834+ if key in self . _fs_is_dir_cache :
835+ return self . _fs_is_dir_cache [ key ]
836+
837+ attrs = self . _get_cached_fs_attrs ( tree_id , node_id )
838+ is_dir = bool ( attrs . get ( "is_dir" , False ))
839+ self . _fs_is_dir_cache [ key ] = is_dir
840+ return is_dir
842841
843842 # ---- allowed node filtering (dynamic, but content stays fixed) ----
844843
@@ -876,19 +875,120 @@ def _get_node_paths(self, tree_id: str, node_ids: list[str]) -> dict[str, str]:
876875
877876 cursor = self .storage .conn .cursor ()
878877 path_map : dict [str , str ] = {}
878+ missing : list [str ] = []
879+ seen_missing : set [str ] = set ()
880+
881+ for node_id in node_ids :
882+ key = (tree_id , node_id )
883+ cached_path = self ._fs_path_cache .get (key )
884+ if cached_path is not None :
885+ path_map [node_id ] = cached_path
886+ elif node_id not in seen_missing :
887+ seen_missing .add (node_id )
888+ missing .append (node_id )
889+
890+ if not missing :
891+ return {node_id : path_map [node_id ] for node_id in node_ids if node_id in path_map }
892+
879893 chunk_size = 500
880894
881- for i in range (0 , len (node_ids ), chunk_size ):
882- chunk = node_ids [i :i + chunk_size ]
895+ for i in range (0 , len (missing ), chunk_size ):
896+ chunk = missing [i :i + chunk_size ]
883897 placeholders = "," .join ("?" for _ in chunk )
884898 cursor .execute (
885899 f"SELECT node_id, path FROM nodes WHERE tree_id = ? AND node_id IN ({ placeholders } )" ,
886900 (tree_id , * chunk ),
887901 )
888902 for row in cursor .fetchall ():
889- path_map [row ["node_id" ]] = row ["path" ]
903+ node_id = row ["node_id" ]
904+ path = row ["path" ]
905+ self ._fs_path_cache [(tree_id , node_id )] = path
906+ path_map [node_id ] = path
907+
908+ return {node_id : path_map [node_id ] for node_id in node_ids if node_id in path_map }
909+
910+ @staticmethod
911+ def _parse_fs_attrs (attrs_value : Any ) -> dict [str , Any ]:
912+ if isinstance (attrs_value , dict ):
913+ return attrs_value
914+ if isinstance (attrs_value , str ) and attrs_value :
915+ try :
916+ parsed = json .loads (attrs_value )
917+ except json .JSONDecodeError :
918+ return {}
919+ return parsed if isinstance (parsed , dict ) else {}
920+ return {}
921+
922+ def _remember_fs_node (self , tree_id : str , node : dict [str , Any ]) -> dict [str , Any ]:
923+ node_id = node .get ("node_id" )
924+ if not node_id :
925+ return node
926+
927+ key = (tree_id , node_id )
928+ existing = self ._fs_node_cache .get (key )
929+ if existing :
930+ merged = {** existing , ** node }
931+ if "entity" not in node and "entity" in existing :
932+ merged ["entity" ] = existing ["entity" ]
933+ node = merged
934+
935+ self ._fs_node_cache [key ] = node
936+ path = node .get ("path" )
937+ if isinstance (path , str ):
938+ self ._fs_path_cache [key ] = path
939+
940+ attrs = self ._parse_fs_attrs (node .get ("attrs" ) if "attrs" in node else node .get ("attrs_json" ))
941+ self ._fs_attrs_cache [key ] = attrs
942+ self ._fs_is_dir_cache [key ] = bool (attrs .get ("is_dir" , False ))
943+ return node
944+
945+ def _get_cached_fs_node_dict (self , tree_id : str , node_id : str ) -> dict [str , Any ] | None :
946+ key = (tree_id , node_id )
947+ cached = self ._fs_node_cache .get (key )
948+ if cached is not None :
949+ return cached
890950
891- return path_map
951+ node = self .storage .get_node (tree_id , node_id )
952+ if not node :
953+ return None
954+ return self ._remember_fs_node (tree_id , node .to_dict ())
955+
956+ def _get_cached_fs_attrs (
957+ self ,
958+ tree_id : str ,
959+ node_id : str ,
960+ node : dict [str , Any ] | None = None ,
961+ ) -> dict [str , Any ]:
962+ key = (tree_id , node_id )
963+ cached = self ._fs_attrs_cache .get (key )
964+ if cached is not None :
965+ return cached
966+
967+ node = node or self ._get_cached_fs_node_dict (tree_id , node_id )
968+ if not node :
969+ attrs : dict [str , Any ] = {}
970+ else :
971+ attrs = self ._parse_fs_attrs (node .get ("attrs" ) if "attrs" in node else node .get ("attrs_json" ))
972+ self ._remember_fs_node (tree_id , node )
973+
974+ self ._fs_attrs_cache [key ] = attrs
975+ self ._fs_is_dir_cache [key ] = bool (attrs .get ("is_dir" , False ))
976+ return attrs
977+
978+ def _fs_node_has_children (self , tree_id : str , node_id : str ) -> bool :
979+ key = (tree_id , node_id )
980+ cached = self ._fs_children_cache .get (key )
981+ if cached is not None :
982+ return bool (cached )
983+ return bool (self ._get_direct_children_nodes (tree_id , node_id ))
984+
985+ def _clear_fs_lookup_cache (self ) -> None :
986+ self ._fs_node_cache .clear ()
987+ self ._fs_attrs_cache .clear ()
988+ self ._fs_path_cache .clear ()
989+ self ._fs_is_dir_cache .clear ()
990+ self ._fs_children_cache .clear ()
991+ self ._fs_block_render_cache .clear ()
892992
893993 # ---- DB helpers ----
894994
@@ -922,12 +1022,14 @@ def _empty_result(self):
9221022
9231023 def clear_cache (self ):
9241024 self ._plan_cache .clear ()
1025+ self ._clear_fs_lookup_cache ()
9251026
9261027 def clear_plan_cache (self , tree_id = None ):
9271028 if tree_id :
9281029 self ._plan_cache .pop (tree_id , None )
9291030 else :
9301031 self ._plan_cache .clear ()
1032+ self ._clear_fs_lookup_cache ()
9311033
9321034 def get_cache_stats (self ):
9331035 return {"plan_cache_size" : len (self ._plan_cache )}
0 commit comments