Skip to content

Commit f01cac6

Browse files
committed
Optimize caching logic to support limited result queries by extracting rows from full cached results
1 parent f695ca5 commit f01cac6

1 file changed

Lines changed: 32 additions & 3 deletions

File tree

src/vfbquery/solr_result_cache.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from typing import Dict, Any, Optional, List
1919
import logging
2020
from dataclasses import dataclass, asdict
21+
import pandas as pd
2122
from vfbquery.term_info_queries import NumpyEncoder
2223

2324
logger = logging.getLogger(__name__)
@@ -633,9 +634,37 @@ def wrapper(*args, **kwargs):
633634
cache.clear_cache_entry(query_type, cache_term_id)
634635

635636
# Try cache first (will be empty if force_refresh was True)
636-
# Only use cache if we're getting complete results (no limit applied)
637-
if not force_refresh and should_cache:
638-
cached_result = cache.get_cached_result(query_type, cache_term_id, **kwargs)
637+
# OPTIMIZATION: If requesting limited results, check if full results are cached
638+
# If yes, we can extract the limited rows from the cached full results
639+
if not force_refresh:
640+
# First try to get cached result matching the exact query (including limit)
641+
if should_cache:
642+
cached_result = cache.get_cached_result(query_type, cache_term_id, **kwargs)
643+
else:
644+
# For limited queries, try to get full cached results instead
645+
full_kwargs = kwargs.copy()
646+
full_kwargs['limit'] = -1 # Get full results
647+
cached_result = cache.get_cached_result(query_type, cache_term_id, **full_kwargs)
648+
649+
# If we got full cached results, extract the limited portion
650+
if cached_result is not None and limit > 0:
651+
logger.debug(f"Extracting first {limit} rows from cached full results for {term_id}")
652+
653+
# Extract limited rows based on result type
654+
if isinstance(cached_result, dict) and 'rows' in cached_result:
655+
cached_result = {
656+
'headers': cached_result.get('headers', {}),
657+
'rows': cached_result['rows'][:limit],
658+
'count': cached_result.get('count', len(cached_result.get('rows', [])))
659+
}
660+
elif isinstance(cached_result, pd.DataFrame):
661+
# Keep the full count but limit the rows
662+
original_count = len(cached_result)
663+
cached_result = cached_result.head(limit)
664+
# Add count attribute if possible
665+
if hasattr(cached_result, '_metadata'):
666+
cached_result._metadata['count'] = original_count
667+
639668
if cached_result is not None:
640669
# Validate that cached result has essential fields for term_info
641670
if query_type == 'term_info':

0 commit comments

Comments
 (0)