VirtualFlyBrain
diff --git a/‎debug_cache_result.py‎
Lines changed: 113 additions & 0 deletions b/‎debug_cache_result.py‎
Lines changed: 113 additions & 0 deletions
diff --git a/‎debug_cache_storage.py‎
Lines changed: 66 additions & 0 deletions b/‎debug_cache_storage.py‎
Lines changed: 66 additions & 0 deletions
diff --git a/‎src/vfbquery/solr_result_cache.py‎
Lines changed: 65 additions & 30 deletions b/‎src/vfbquery/solr_result_cache.py‎
Lines changed: 65 additions & 30 deletions
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+
+"""Debug the exact cache_result implementation"""
+
+import sys
+import os
+sys.path.insert(0, 'src')
+import json
+import requests
+
+def debug_cache_result():
+    """Debug the exact steps in cache_result"""
+    
+    cache_url = "https://solr.virtualflybrain.org/solr/vfb_json"
+    term_id = "FBbt_00003686"
+    field_name = "vfb_query_term_info_str"
+    
+    test_result = {
+        "label": "Kenyon cell",  
+        "cached": True,
+        "test_data": "debug test"
+    }
+    
+    print(f"=== Debugging cache_result for {term_id} ===")
+    
+    # Step 1: Create cache metadata (simplified version)
+    print("1. Creating cache metadata...")
+    cached_data = {
+        "result": test_result,
+        "cached_at": "2025-09-09T19:45:00+01:00",
+        "expires_at": "2025-12-08T19:45:00+01:00"
+    }
+    print(f"   Cached data: {json.dumps(cached_data)[:100]}...")
+    
+    # Step 2: Check if document exists (exact same logic)
+    print("2. Checking if document exists...")
+    existing_response = requests.get(f"{cache_url}/select", params={
+        "q": f"id:{term_id}",
+        "wt": "json",
+        "fl": "id"
+    }, timeout=5)
+    
+    print(f"   Response status: {existing_response.status_code}")
+    
+    if existing_response.status_code != 200:
+        print(f"   ERROR: Cannot access document {term_id} for caching")
+        return False
+    
+    existing_data = existing_response.json()
+    existing_docs = existing_data.get("response", {}).get("docs", [])
+    
+    print(f"   Found {len(existing_docs)} documents")
+    
+    if not existing_docs:
+        print(f"   ERROR: Document {term_id} does not exist - cannot add cache field")
+        return False
+    
+    print(f"   ✓ Document exists: {existing_docs[0].get('id')}")
+    
+    # Step 3: Perform atomic update 
+    print("3. Performing atomic update...")
+    
+    update_doc = {
+        "id": term_id,
+        field_name: {"set": json.dumps(cached_data)}
+    }
+    
+    print(f"   Update document: {json.dumps(update_doc)[:150]}...")
+    
+    response = requests.post(
+        f"{cache_url}/update",
+        data=json.dumps([update_doc]),
+        headers={"Content-Type": "application/json"},
+        params={"commit": "true"},
+        timeout=10
+    )
+    
+    print(f"   Update response status: {response.status_code}")
+    print(f"   Update response: {response.text[:200]}...")
+    
+    if response.status_code == 200:
+        print("   ✓ Cache update successful")
+        
+        # Step 4: Verify the update worked
+        print("4. Verifying update...")
+        verify_response = requests.get(f"{cache_url}/select", params={
+            "q": f"id:{term_id}",
+            "fl": f"id,{field_name}",
+            "wt": "json"
+        }, timeout=5)
+        
+        if verify_response.status_code == 200:
+            verify_data = verify_response.json()
+            verify_docs = verify_data.get("response", {}).get("docs", [])
+            
+            if verify_docs and field_name in verify_docs[0]:
+                print(f"   ✓ Field {field_name} successfully added")
+                cached_value = verify_docs[0][field_name][0]
+                print(f"   Cached value: {cached_value[:100]}...")
+                return True
+            else:
+                print(f"   ✗ Field {field_name} not found after update")
+                return False
+        else:
+            print(f"   ERROR: Cannot verify update: {verify_response.status_code}")
+            return False
+    else:
+        print(f"   ERROR: Update failed: {response.text}")
+        return False
+
+if __name__ == "__main__":
+    success = debug_cache_result()
+    print(f"\nFinal result: {'SUCCESS' if success else 'FAILED'}")
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+
+"""Debug what's actually stored and retrieved from cache"""
+
+import sys
+import os
+sys.path.insert(0, 'src')
+import json
+import requests
+
+def debug_cache_storage():
+    """Debug what's stored in the cache field"""
+    
+    cache_url = "https://solr.virtualflybrain.org/solr/vfb_json"
+    term_id = "FBbt_00003686"
+    field_name = "vfb_query_term_info_str"
+    
+    print(f"=== Debugging cache storage for {term_id} ===")
+    
+    # Check what's actually stored
+    response = requests.get(f"{cache_url}/select", params={
+        "q": f"id:{term_id}",
+        "fl": f"id,{field_name}",
+        "wt": "json"
+    }, timeout=5)
+    
+    if response.status_code == 200:
+        data = response.json()
+        docs = data.get("response", {}).get("docs", [])
+        
+        if docs and field_name in docs[0]:
+            cached_field = docs[0][field_name]
+            print(f"Raw cached field: {type(cached_field)} = {cached_field}")
+            
+            if isinstance(cached_field, list):
+                cached_value = cached_field[0]
+            else:
+                cached_value = cached_field
+                
+            print(f"Cached value: {type(cached_value)} = {cached_value[:200]}...")
+            
+            try:
+                # Try to parse as JSON
+                parsed_data = json.loads(cached_value)
+                print(f"Parsed data type: {type(parsed_data)}")
+                print(f"Parsed data keys: {list(parsed_data.keys()) if isinstance(parsed_data, dict) else 'Not a dict'}")
+                
+                if isinstance(parsed_data, dict) and "result" in parsed_data:
+                    result = parsed_data["result"]
+                    print(f"Result type: {type(result)}")
+                    print(f"Result: {result}")
+                    
+                    if isinstance(result, dict) and "label" in result:
+                        print(f"Label: {result['label']}")
+                    else:
+                        print(f"Result is not a dict or has no label: {result}")
+                        
+            except json.JSONDecodeError as e:
+                print(f"JSON parsing failed: {e}")
+        else:
+            print(f"Field {field_name} not found in document")
+    else:
+        print(f"Request failed: {response.status_code}")
+
+if __name__ == "__main__":
+    debug_cache_storage()
@@ -59,18 +59,11 @@ def __init__(self,
         self.max_result_size_mb = max_result_size_mb
         self.max_result_size_bytes = max_result_size_mb * 1024 * 1024
 
-    def _generate_field_name(self, query_type: str, **params) -> str:
-        """Generate SOLR field name for VFBquery results"""
-        if not params:
-            # Simple case - no parameters
-            return f"vfb_query_{query_type}"
-        else:
-            # Complex case - include parameter hash
-            param_str = json.dumps(sorted(params.items()), sort_keys=True)
-            param_hash = hashlib.md5(param_str.encode()).hexdigest()[:8]
-            return f"vfb_query_{query_type}_{param_hash}"
+    def _get_cache_field_name(self, query_type):
+        """Get the field name for a specific query type"""
+        return f"vfb_query_{query_type}_ss"
 
-    def _create_cache_metadata(self, result: Any) -> Dict[str, Any]:
+    def _create_cache_metadata(self, result: Any) -> Optional[Dict[str, Any]]:
         """Create metadata for cached result with 3-month expiration"""
         serialized_result = json.dumps(result, cls=NumpyEncoder)
         result_size = len(serialized_result.encode('utf-8'))
@@ -84,7 +77,7 @@ def _create_cache_metadata(self, result: Any) -> Dict[str, Any]:
         expires_at = now + timedelta(hours=self.ttl_hours)  # 2160 hours = 90 days = 3 months
 
         return {
-            "result": serialized_result,
+            "result": result,  # Store original object, not serialized string
             "cached_at": now.isoformat(),
             "expires_at": expires_at.isoformat(),
             "result_size": result_size,
@@ -105,7 +98,7 @@ def get_cached_result(self, query_type: str, term_id: str, **params) -> Optional
         Returns:
             Cached result or None if not found/expired
         """
-        field_name = self._generate_field_name(query_type, **params)
+        field_name = self._get_cache_field_name(query_type)
 
         try:
             # Query existing vfb_json document for cached VFBquery result
@@ -155,8 +148,16 @@ def get_cached_result(self, query_type: str, term_id: str, **params) -> Optional
             # Increment hit count asynchronously
             self._increment_field_hit_count(term_id, field_name, cached_data.get("hit_count", 0))
 
-            # Deserialize and return result
-            result = json.loads(cached_data["result"])
+            # Return cached result 
+            result = cached_data["result"]
+            # If result is a string, parse it as JSON
+            if isinstance(result, str):
+                try:
+                    result = json.loads(result)
+                except json.JSONDecodeError:
+                    logger.warning(f"Failed to parse cached result for {term_id}")
+                    return None
+            
             logger.info(f"Cache hit for {query_type}({term_id})")
             return result
 
@@ -181,24 +182,58 @@ def cache_result(self, query_type: str, term_id: str, result: Any, **params) ->
             logger.debug("Empty result, not caching")
             return False
 
-        field_name = self._generate_field_name(query_type, **params)
+        field_name = self._get_cache_field_name(query_type)
 
         try:
             # Create cached metadata and result
             cached_data = self._create_cache_metadata(result)
             if not cached_data:
                 return False  # Result too large or other issue
 
-            # Update existing SOLR document with new field using atomic update
-            # This preserves all existing fields in the document
-            update_doc = {
-                "id": term_id,
-                field_name: {"set": json.dumps(cached_data)}
-            }
+            # First, get the existing document to ensure it exists
+            existing_response = requests.get(f"{self.cache_url}/select", params={
+                "q": f"id:{term_id}",
+                "wt": "json",
+                "fl": "id"
+            }, timeout=5)
+            
+            if existing_response.status_code != 200:
+                logger.error(f"Cannot access document {term_id} for caching")
+                return False
+            
+            existing_data = existing_response.json()
+            existing_docs = existing_data.get("response", {}).get("docs", [])
+            
+            if not existing_docs:
+                logger.warning(f"Document {term_id} does not exist - cannot add cache field")
+                return False
+            
+            # Fetch complete existing document to preserve all fields
+            complete_doc_response = requests.get(f"{self.cache_url}/select", params={
+                "q": f"id:{term_id}",
+                "wt": "json",
+                "rows": "1"
+            }, timeout=5)
+            
+            if complete_doc_response.status_code != 200:
+                logger.error(f"Cannot fetch complete document {term_id}")
+                return False
+                
+            complete_data = complete_doc_response.json()
+            complete_docs = complete_data.get("response", {}).get("docs", [])
+            
+            if not complete_docs:
+                logger.error(f"Document {term_id} not found for complete fetch")
+                return False
 
+            # Get the existing document and add our cache field
+            existing_doc = complete_docs[0].copy()
+            existing_doc[field_name] = json.dumps(cached_data)  # Add cache field
+            
+            # Replace entire document (like VFB indexer does)
             response = requests.post(
-                f"{self.cache_url}/update/json/docs",
-                json=[update_doc],
+                f"{self.cache_url}/update",
+                data=json.dumps([existing_doc]),
                 headers={"Content-Type": "application/json"},
                 params={"commit": "true"},  # Immediate commit for availability
                 timeout=10
@@ -208,7 +243,7 @@ def cache_result(self, query_type: str, term_id: str, result: Any, **params) ->
                 logger.info(f"Cached {field_name} for {term_id}, size: {cached_data['result_size']/1024:.1f}KB")
                 return True
             else:
-                logger.error(f"Failed to cache result: HTTP {response.status_code}")
+                logger.error(f"Failed to cache result: HTTP {response.status_code} - {response.text}")
                 return False
 
         except Exception as e:
@@ -278,7 +313,7 @@ def get_cache_age(self, query_type: str, term_id: str, **params) -> Optional[Dic
         Returns:
             Dictionary with cache age info or None if not cached
         """
-        field_name = self._generate_field_name(query_type, **params)
+        field_name = self._get_cache_field_name(query_type)
 
         try:
             response = requests.get(f"{self.cache_url}/select", params={
@@ -334,7 +369,7 @@ def cleanup_expired_entries(self) -> int:
 
             # Search for documents that have VFBquery cache fields
             response = requests.get(f"{self.cache_url}/select", params={
-                "q": "vfb_query_term_info:[* TO *] OR vfb_query_anatomy:[* TO *] OR vfb_query_neuron:[* TO *]",
+                "q": "vfb_query_term_info_str:[* TO *] OR vfb_query_anatomy_str:[* TO *] OR vfb_query_neuron_str:[* TO *]",
                 "fl": "id,vfb_query_*",  # Get ID and all VFBquery fields
                 "rows": "1000",  # Process in batches
                 "wt": "json"
@@ -407,7 +442,7 @@ def get_cache_stats(self) -> Dict[str, Any]:
             # Get documents with VFBquery cache fields
             # Use a specific field search since wildcards may not work in all SOLR versions
             response = requests.get(f"{self.cache_url}/select", params={
-                "q": "vfb_query_term_info:[* TO *] OR vfb_query_anatomy:[* TO *] OR vfb_query_neuron:[* TO *]",
+                "q": "vfb_query_term_info_str:[* TO *] OR vfb_query_anatomy_str:[* TO *] OR vfb_query_neuron_str:[* TO *]",
                 "fl": "id,vfb_query_*",  # Get ID and all VFBquery fields
                 "rows": "1000",  # Process in batches 
                 "wt": "json"
@@ -432,8 +467,8 @@ def get_cache_stats(self) -> Dict[str, Any]:
                         if field_name.startswith("vfb_query_"):
                             total_fields += 1
 
-                            # Extract query type from field name
-                            query_type = field_name.replace("vfb_query_", "").split("_")[0]
+                            # Extract query type from field name (remove vfb_query_ prefix and _str suffix)
+                            query_type = field_name.replace("vfb_query_", "").replace("_str", "")
                             field_stats[query_type] = field_stats.get(query_type, 0) + 1
 
                             try: