Skip to content

Commit e713714

Browse files
committed
fixed write
1 parent 9552df4 commit e713714

11 files changed

Lines changed: 925 additions & 30 deletions

debug_cache_result.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#!/usr/bin/env python3
2+
3+
"""Debug the exact cache_result implementation"""
4+
5+
import sys
6+
import os
7+
sys.path.insert(0, 'src')
8+
import json
9+
import requests
10+
11+
def debug_cache_result():
12+
"""Debug the exact steps in cache_result"""
13+
14+
cache_url = "https://solr.virtualflybrain.org/solr/vfb_json"
15+
term_id = "FBbt_00003686"
16+
field_name = "vfb_query_term_info_str"
17+
18+
test_result = {
19+
"label": "Kenyon cell",
20+
"cached": True,
21+
"test_data": "debug test"
22+
}
23+
24+
print(f"=== Debugging cache_result for {term_id} ===")
25+
26+
# Step 1: Create cache metadata (simplified version)
27+
print("1. Creating cache metadata...")
28+
cached_data = {
29+
"result": test_result,
30+
"cached_at": "2025-09-09T19:45:00+01:00",
31+
"expires_at": "2025-12-08T19:45:00+01:00"
32+
}
33+
print(f" Cached data: {json.dumps(cached_data)[:100]}...")
34+
35+
# Step 2: Check if document exists (exact same logic)
36+
print("2. Checking if document exists...")
37+
existing_response = requests.get(f"{cache_url}/select", params={
38+
"q": f"id:{term_id}",
39+
"wt": "json",
40+
"fl": "id"
41+
}, timeout=5)
42+
43+
print(f" Response status: {existing_response.status_code}")
44+
45+
if existing_response.status_code != 200:
46+
print(f" ERROR: Cannot access document {term_id} for caching")
47+
return False
48+
49+
existing_data = existing_response.json()
50+
existing_docs = existing_data.get("response", {}).get("docs", [])
51+
52+
print(f" Found {len(existing_docs)} documents")
53+
54+
if not existing_docs:
55+
print(f" ERROR: Document {term_id} does not exist - cannot add cache field")
56+
return False
57+
58+
print(f" ✓ Document exists: {existing_docs[0].get('id')}")
59+
60+
# Step 3: Perform atomic update
61+
print("3. Performing atomic update...")
62+
63+
update_doc = {
64+
"id": term_id,
65+
field_name: {"set": json.dumps(cached_data)}
66+
}
67+
68+
print(f" Update document: {json.dumps(update_doc)[:150]}...")
69+
70+
response = requests.post(
71+
f"{cache_url}/update",
72+
data=json.dumps([update_doc]),
73+
headers={"Content-Type": "application/json"},
74+
params={"commit": "true"},
75+
timeout=10
76+
)
77+
78+
print(f" Update response status: {response.status_code}")
79+
print(f" Update response: {response.text[:200]}...")
80+
81+
if response.status_code == 200:
82+
print(" ✓ Cache update successful")
83+
84+
# Step 4: Verify the update worked
85+
print("4. Verifying update...")
86+
verify_response = requests.get(f"{cache_url}/select", params={
87+
"q": f"id:{term_id}",
88+
"fl": f"id,{field_name}",
89+
"wt": "json"
90+
}, timeout=5)
91+
92+
if verify_response.status_code == 200:
93+
verify_data = verify_response.json()
94+
verify_docs = verify_data.get("response", {}).get("docs", [])
95+
96+
if verify_docs and field_name in verify_docs[0]:
97+
print(f" ✓ Field {field_name} successfully added")
98+
cached_value = verify_docs[0][field_name][0]
99+
print(f" Cached value: {cached_value[:100]}...")
100+
return True
101+
else:
102+
print(f" ✗ Field {field_name} not found after update")
103+
return False
104+
else:
105+
print(f" ERROR: Cannot verify update: {verify_response.status_code}")
106+
return False
107+
else:
108+
print(f" ERROR: Update failed: {response.text}")
109+
return False
110+
111+
if __name__ == "__main__":
112+
success = debug_cache_result()
113+
print(f"\nFinal result: {'SUCCESS' if success else 'FAILED'}")

debug_cache_storage.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
#!/usr/bin/env python3
2+
3+
"""Debug what's actually stored and retrieved from cache"""
4+
5+
import sys
6+
import os
7+
sys.path.insert(0, 'src')
8+
import json
9+
import requests
10+
11+
def debug_cache_storage():
12+
"""Debug what's stored in the cache field"""
13+
14+
cache_url = "https://solr.virtualflybrain.org/solr/vfb_json"
15+
term_id = "FBbt_00003686"
16+
field_name = "vfb_query_term_info_str"
17+
18+
print(f"=== Debugging cache storage for {term_id} ===")
19+
20+
# Check what's actually stored
21+
response = requests.get(f"{cache_url}/select", params={
22+
"q": f"id:{term_id}",
23+
"fl": f"id,{field_name}",
24+
"wt": "json"
25+
}, timeout=5)
26+
27+
if response.status_code == 200:
28+
data = response.json()
29+
docs = data.get("response", {}).get("docs", [])
30+
31+
if docs and field_name in docs[0]:
32+
cached_field = docs[0][field_name]
33+
print(f"Raw cached field: {type(cached_field)} = {cached_field}")
34+
35+
if isinstance(cached_field, list):
36+
cached_value = cached_field[0]
37+
else:
38+
cached_value = cached_field
39+
40+
print(f"Cached value: {type(cached_value)} = {cached_value[:200]}...")
41+
42+
try:
43+
# Try to parse as JSON
44+
parsed_data = json.loads(cached_value)
45+
print(f"Parsed data type: {type(parsed_data)}")
46+
print(f"Parsed data keys: {list(parsed_data.keys()) if isinstance(parsed_data, dict) else 'Not a dict'}")
47+
48+
if isinstance(parsed_data, dict) and "result" in parsed_data:
49+
result = parsed_data["result"]
50+
print(f"Result type: {type(result)}")
51+
print(f"Result: {result}")
52+
53+
if isinstance(result, dict) and "label" in result:
54+
print(f"Label: {result['label']}")
55+
else:
56+
print(f"Result is not a dict or has no label: {result}")
57+
58+
except json.JSONDecodeError as e:
59+
print(f"JSON parsing failed: {e}")
60+
else:
61+
print(f"Field {field_name} not found in document")
62+
else:
63+
print(f"Request failed: {response.status_code}")
64+
65+
if __name__ == "__main__":
66+
debug_cache_storage()

src/vfbquery/solr_result_cache.py

Lines changed: 65 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,11 @@ def __init__(self,
5959
self.max_result_size_mb = max_result_size_mb
6060
self.max_result_size_bytes = max_result_size_mb * 1024 * 1024
6161

62-
def _generate_field_name(self, query_type: str, **params) -> str:
63-
"""Generate SOLR field name for VFBquery results"""
64-
if not params:
65-
# Simple case - no parameters
66-
return f"vfb_query_{query_type}"
67-
else:
68-
# Complex case - include parameter hash
69-
param_str = json.dumps(sorted(params.items()), sort_keys=True)
70-
param_hash = hashlib.md5(param_str.encode()).hexdigest()[:8]
71-
return f"vfb_query_{query_type}_{param_hash}"
62+
def _get_cache_field_name(self, query_type):
63+
"""Get the field name for a specific query type"""
64+
return f"vfb_query_{query_type}_ss"
7265

73-
def _create_cache_metadata(self, result: Any) -> Dict[str, Any]:
66+
def _create_cache_metadata(self, result: Any) -> Optional[Dict[str, Any]]:
7467
"""Create metadata for cached result with 3-month expiration"""
7568
serialized_result = json.dumps(result, cls=NumpyEncoder)
7669
result_size = len(serialized_result.encode('utf-8'))
@@ -84,7 +77,7 @@ def _create_cache_metadata(self, result: Any) -> Dict[str, Any]:
8477
expires_at = now + timedelta(hours=self.ttl_hours) # 2160 hours = 90 days = 3 months
8578

8679
return {
87-
"result": serialized_result,
80+
"result": result, # Store original object, not serialized string
8881
"cached_at": now.isoformat(),
8982
"expires_at": expires_at.isoformat(),
9083
"result_size": result_size,
@@ -105,7 +98,7 @@ def get_cached_result(self, query_type: str, term_id: str, **params) -> Optional
10598
Returns:
10699
Cached result or None if not found/expired
107100
"""
108-
field_name = self._generate_field_name(query_type, **params)
101+
field_name = self._get_cache_field_name(query_type)
109102

110103
try:
111104
# Query existing vfb_json document for cached VFBquery result
@@ -155,8 +148,16 @@ def get_cached_result(self, query_type: str, term_id: str, **params) -> Optional
155148
# Increment hit count asynchronously
156149
self._increment_field_hit_count(term_id, field_name, cached_data.get("hit_count", 0))
157150

158-
# Deserialize and return result
159-
result = json.loads(cached_data["result"])
151+
# Return cached result
152+
result = cached_data["result"]
153+
# If result is a string, parse it as JSON
154+
if isinstance(result, str):
155+
try:
156+
result = json.loads(result)
157+
except json.JSONDecodeError:
158+
logger.warning(f"Failed to parse cached result for {term_id}")
159+
return None
160+
160161
logger.info(f"Cache hit for {query_type}({term_id})")
161162
return result
162163

@@ -181,24 +182,58 @@ def cache_result(self, query_type: str, term_id: str, result: Any, **params) ->
181182
logger.debug("Empty result, not caching")
182183
return False
183184

184-
field_name = self._generate_field_name(query_type, **params)
185+
field_name = self._get_cache_field_name(query_type)
185186

186187
try:
187188
# Create cached metadata and result
188189
cached_data = self._create_cache_metadata(result)
189190
if not cached_data:
190191
return False # Result too large or other issue
191192

192-
# Update existing SOLR document with new field using atomic update
193-
# This preserves all existing fields in the document
194-
update_doc = {
195-
"id": term_id,
196-
field_name: {"set": json.dumps(cached_data)}
197-
}
193+
# First, get the existing document to ensure it exists
194+
existing_response = requests.get(f"{self.cache_url}/select", params={
195+
"q": f"id:{term_id}",
196+
"wt": "json",
197+
"fl": "id"
198+
}, timeout=5)
199+
200+
if existing_response.status_code != 200:
201+
logger.error(f"Cannot access document {term_id} for caching")
202+
return False
203+
204+
existing_data = existing_response.json()
205+
existing_docs = existing_data.get("response", {}).get("docs", [])
206+
207+
if not existing_docs:
208+
logger.warning(f"Document {term_id} does not exist - cannot add cache field")
209+
return False
210+
211+
# Fetch complete existing document to preserve all fields
212+
complete_doc_response = requests.get(f"{self.cache_url}/select", params={
213+
"q": f"id:{term_id}",
214+
"wt": "json",
215+
"rows": "1"
216+
}, timeout=5)
217+
218+
if complete_doc_response.status_code != 200:
219+
logger.error(f"Cannot fetch complete document {term_id}")
220+
return False
221+
222+
complete_data = complete_doc_response.json()
223+
complete_docs = complete_data.get("response", {}).get("docs", [])
224+
225+
if not complete_docs:
226+
logger.error(f"Document {term_id} not found for complete fetch")
227+
return False
198228

229+
# Get the existing document and add our cache field
230+
existing_doc = complete_docs[0].copy()
231+
existing_doc[field_name] = json.dumps(cached_data) # Add cache field
232+
233+
# Replace entire document (like VFB indexer does)
199234
response = requests.post(
200-
f"{self.cache_url}/update/json/docs",
201-
json=[update_doc],
235+
f"{self.cache_url}/update",
236+
data=json.dumps([existing_doc]),
202237
headers={"Content-Type": "application/json"},
203238
params={"commit": "true"}, # Immediate commit for availability
204239
timeout=10
@@ -208,7 +243,7 @@ def cache_result(self, query_type: str, term_id: str, result: Any, **params) ->
208243
logger.info(f"Cached {field_name} for {term_id}, size: {cached_data['result_size']/1024:.1f}KB")
209244
return True
210245
else:
211-
logger.error(f"Failed to cache result: HTTP {response.status_code}")
246+
logger.error(f"Failed to cache result: HTTP {response.status_code} - {response.text}")
212247
return False
213248

214249
except Exception as e:
@@ -278,7 +313,7 @@ def get_cache_age(self, query_type: str, term_id: str, **params) -> Optional[Dic
278313
Returns:
279314
Dictionary with cache age info or None if not cached
280315
"""
281-
field_name = self._generate_field_name(query_type, **params)
316+
field_name = self._get_cache_field_name(query_type)
282317

283318
try:
284319
response = requests.get(f"{self.cache_url}/select", params={
@@ -334,7 +369,7 @@ def cleanup_expired_entries(self) -> int:
334369

335370
# Search for documents that have VFBquery cache fields
336371
response = requests.get(f"{self.cache_url}/select", params={
337-
"q": "vfb_query_term_info:[* TO *] OR vfb_query_anatomy:[* TO *] OR vfb_query_neuron:[* TO *]",
372+
"q": "vfb_query_term_info_str:[* TO *] OR vfb_query_anatomy_str:[* TO *] OR vfb_query_neuron_str:[* TO *]",
338373
"fl": "id,vfb_query_*", # Get ID and all VFBquery fields
339374
"rows": "1000", # Process in batches
340375
"wt": "json"
@@ -407,7 +442,7 @@ def get_cache_stats(self) -> Dict[str, Any]:
407442
# Get documents with VFBquery cache fields
408443
# Use a specific field search since wildcards may not work in all SOLR versions
409444
response = requests.get(f"{self.cache_url}/select", params={
410-
"q": "vfb_query_term_info:[* TO *] OR vfb_query_anatomy:[* TO *] OR vfb_query_neuron:[* TO *]",
445+
"q": "vfb_query_term_info_str:[* TO *] OR vfb_query_anatomy_str:[* TO *] OR vfb_query_neuron_str:[* TO *]",
411446
"fl": "id,vfb_query_*", # Get ID and all VFBquery fields
412447
"rows": "1000", # Process in batches
413448
"wt": "json"
@@ -432,8 +467,8 @@ def get_cache_stats(self) -> Dict[str, Any]:
432467
if field_name.startswith("vfb_query_"):
433468
total_fields += 1
434469

435-
# Extract query type from field name
436-
query_type = field_name.replace("vfb_query_", "").split("_")[0]
470+
# Extract query type from field name (remove vfb_query_ prefix and _str suffix)
471+
query_type = field_name.replace("vfb_query_", "").replace("_str", "")
437472
field_stats[query_type] = field_stats.get(query_type, 0) + 1
438473

439474
try:

0 commit comments

Comments
 (0)