5353
5454_ingest_semaphore = asyncio .Semaphore (5 )
5555_latency_samples : dict [str , deque [float ]] = defaultdict (lambda : deque (maxlen = 200 ))
56+ _latency_lock = threading .Lock ()
5657
5758router = APIRouter (
5859 prefix = "/v1/memory" ,
@@ -112,7 +113,8 @@ def _error(request: Request, detail: str, code: int, elapsed_ms: float = 0) -> J
112113
113114
114115def _record_latency (mode : str , elapsed_ms : float ) -> None :
115- _latency_samples [mode ].append (elapsed_ms )
116+ with _latency_lock :
117+ _latency_samples [mode ].append (elapsed_ms )
116118
117119
118120def _percentile (sorted_values : List [float ], percentile : float ) -> float :
@@ -123,8 +125,11 @@ def _percentile(sorted_values: List[float], percentile: float) -> float:
123125
124126
125127def _latency_stats () -> Dict [str , Dict [str , float ]]:
128+ with _latency_lock :
129+ snapshot = {mode : list (samples ) for mode , samples in _latency_samples .items ()}
130+
126131 stats : Dict [str , Dict [str , float ]] = {}
127- for mode , samples in _latency_samples .items ():
132+ for mode , samples in snapshot .items ():
128133 values = sorted (samples )
129134 stats [mode ] = {
130135 "count" : len (values ),
@@ -135,11 +140,14 @@ def _latency_stats() -> Dict[str, Dict[str, float]]:
135140 return stats
136141
137142
138- async def _timed (mode : str , func , * args , ** kwargs ):
143+ async def _timed (mode : str , func , * args , threaded : bool = False , ** kwargs ):
139144 start = time .perf_counter ()
140- result = func (* args , ** kwargs )
141- if hasattr (result , "__await__" ):
142- result = await result
145+ if threaded :
146+ result = await asyncio .to_thread (func , * args , ** kwargs )
147+ else :
148+ result = func (* args , ** kwargs )
149+ if hasattr (result , "__await__" ):
150+ result = await result
143151 elapsed_ms = round ((time .perf_counter () - start ) * 1000 , 2 )
144152 _record_latency (mode , elapsed_ms )
145153 return result , elapsed_ms
@@ -727,27 +735,39 @@ async def search_memory(req: SearchRequest, request: Request, user: dict = Depen
727735 all_results : List [SourceRecord ] = []
728736 latency_ms : Dict [str , float ] = {}
729737 plan = pipeline .raw_retrieval_plan (req .domains , answer = req .answer )
738+ raw_tasks = []
730739
731740 if "profile" in plan :
732- results , elapsed = await _timed ("profile" , _search_profile , pipeline , user_id )
733- latency_ms ["profile" ] = elapsed
734- all_results .extend (results )
741+ raw_tasks .append ((
742+ "profile" ,
743+ _timed ("profile" , _search_profile , pipeline , user_id , threaded = True ),
744+ ))
735745 if "temporal" in plan :
736- results , elapsed = await _timed ("temporal" , _search_temporal , pipeline , req .query , user_id , req .top_k )
737- latency_ms ["temporal" ] = elapsed
738- all_results .extend (results )
746+ raw_tasks .append ((
747+ "temporal" ,
748+ _timed ("temporal" , _search_temporal , pipeline , req .query , user_id , req .top_k , threaded = True ),
749+ ))
739750 if "summary" in plan :
740- results , elapsed = await _timed ("summary" , _search_summary , pipeline , req .query , user_id , req .top_k )
741- latency_ms ["summary" ] = elapsed
742- all_results .extend (results )
751+ raw_tasks .append ((
752+ "summary" ,
753+ _timed ("summary" , _search_summary , pipeline , req .query , user_id , req .top_k ),
754+ ))
743755 if "snippet" in plan :
744- results , elapsed = await _timed ("snippet" , _search_snippet , pipeline , req .query , user_id , req .top_k )
745- latency_ms ["snippet" ] = elapsed
746- all_results .extend (results )
756+ raw_tasks .append ((
757+ "snippet" ,
758+ _timed ("snippet" , _search_snippet , pipeline , req .query , user_id , req .top_k ),
759+ ))
747760 if "code" in plan :
748- results , elapsed = await _timed ("code" , _search_code , pipeline , req .query , user_id , req .top_k )
749- latency_ms ["code" ] = elapsed
750- all_results .extend (results )
761+ raw_tasks .append ((
762+ "code" ,
763+ _timed ("code" , _search_code , pipeline , req .query , user_id , req .top_k ),
764+ ))
765+
766+ if raw_tasks :
767+ raw_results = await asyncio .gather (* (task for _ , task in raw_tasks ))
768+ for (domain , _ ), (results , elapsed ) in zip (raw_tasks , raw_results ):
769+ latency_ms [domain ] = elapsed
770+ all_results .extend (results )
751771
752772 all_results .sort (key = lambda record : record .score , reverse = True )
753773
0 commit comments