|
18 | 18 | from typing import Dict, Any, Optional, List |
19 | 19 | import logging |
20 | 20 | from dataclasses import dataclass, asdict |
| 21 | +import pandas as pd |
21 | 22 | from vfbquery.term_info_queries import NumpyEncoder |
22 | 23 |
|
23 | 24 | logger = logging.getLogger(__name__) |
@@ -633,9 +634,37 @@ def wrapper(*args, **kwargs): |
633 | 634 | cache.clear_cache_entry(query_type, cache_term_id) |
634 | 635 |
|
635 | 636 | # Try cache first (will be empty if force_refresh was True) |
636 | | - # Only use cache if we're getting complete results (no limit applied) |
637 | | - if not force_refresh and should_cache: |
638 | | - cached_result = cache.get_cached_result(query_type, cache_term_id, **kwargs) |
| 637 | + # OPTIMIZATION: If requesting limited results, check if full results are cached |
| 638 | + # If yes, we can extract the limited rows from the cached full results |
| 639 | + if not force_refresh: |
| 640 | + # First try to get cached result matching the exact query (including limit) |
| 641 | + if should_cache: |
| 642 | + cached_result = cache.get_cached_result(query_type, cache_term_id, **kwargs) |
| 643 | + else: |
| 644 | + # For limited queries, try to get full cached results instead |
| 645 | + full_kwargs = kwargs.copy() |
| 646 | + full_kwargs['limit'] = -1 # Get full results |
| 647 | + cached_result = cache.get_cached_result(query_type, cache_term_id, **full_kwargs) |
| 648 | + |
| 649 | + # If we got full cached results, extract the limited portion |
| 650 | + if cached_result is not None and limit > 0: |
| 651 | + logger.debug(f"Extracting first {limit} rows from cached full results for {term_id}") |
| 652 | + |
| 653 | + # Extract limited rows based on result type |
| 654 | + if isinstance(cached_result, dict) and 'rows' in cached_result: |
| 655 | + cached_result = { |
| 656 | + 'headers': cached_result.get('headers', {}), |
| 657 | + 'rows': cached_result['rows'][:limit], |
| 658 | + 'count': cached_result.get('count', len(cached_result.get('rows', []))) |
| 659 | + } |
| 660 | + elif isinstance(cached_result, pd.DataFrame): |
| 661 | + # Keep the full count but limit the rows |
| 662 | + original_count = len(cached_result) |
| 663 | + cached_result = cached_result.head(limit) |
| 664 | + # Add count attribute if possible |
| 665 | + if hasattr(cached_result, '_metadata'): |
| 666 | + cached_result._metadata['count'] = original_count |
| 667 | + |
639 | 668 | if cached_result is not None: |
640 | 669 | # Validate that cached result has essential fields for term_info |
641 | 670 | if query_type == 'term_info': |
|
0 commit comments