Skip to content

Commit 28c8a09

Browse files
committed
testing/debug tools
1 parent e4c211a commit 28c8a09

File tree

3 files changed

+67
-6
lines changed

3 files changed

+67
-6
lines changed

main_chat/tests/inspect_vector_db.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
_PROJECT_ROOT = Path(__file__).resolve().parents[2]
1111
sys.path.insert(0, str(_PROJECT_ROOT))
1212

13-
import config
1413
from main_chat.rag_pipeline.rag_retrieval import load_vectordb
1514

1615

@@ -56,6 +55,17 @@ def inspect_vectordb():
5655
for dtype, count in sorted(doc_types.items()):
5756
print(f" - {dtype}: {count}")
5857

58+
print("\n" + "=" * 80)
59+
print("CLIENT_UPLOAD METADATA CHECK")
60+
print("=" * 80)
61+
62+
for meta in metadatas:
63+
if meta.get("doc_type") == "client_upload" and meta.get("chunk_id") == 0:
64+
print(f"\nCLIENT_UPLOAD document:")
65+
print(f" Source: {meta.get('source')}")
66+
print(f" Full metadata: {meta}")
67+
print(f" Has folder_category? {meta.get('folder_category')}")
68+
5969
# Show ALL files/sources grouped by document type
6070
print("\n" + "=" * 80)
6171
print("📁 ALL FILES IN VECTOR DATABASE (by type)")
@@ -88,12 +98,12 @@ def inspect_vectordb():
8898
print("Testing Policy Retrieval")
8999
print("=" * 80)
90100

91-
test_query = "anti-displacement"
92-
print(f"\nTest query: '{test_query}'")
93-
94101
# Try retrieving policies
95102
from main_chat.rag_pipeline.rag_retrieval import retrieve_policies
96103

104+
test_query = "anti-displacement"
105+
print(f"\nTest query: '{test_query}'")
106+
97107
result = retrieve_policies(test_query, k=5)
98108
chunks = result.get("chunks", [])
99109
metadata = result.get("metadata", [])
@@ -105,10 +115,11 @@ def inspect_vectordb():
105115
for i, (chunk, meta) in enumerate(zip(chunks[:2], metadata[:2]), 1):
106116
print(f"\n Result {i}:")
107117
print(f" Source: {meta.get('source', 'unknown')}")
108-
print(f" Type: {meta.get('doc_type', 'unknown')}")
118+
print(f" Doc Type: {meta.get('doc_type', 'unknown')}")
119+
print(f" Folder Category: {meta.get('folder_category', 'none')}")
109120
print(f" Preview: {chunk[:200]}...")
110121
else:
111-
print("✗ No chunks retrieved! This is the problem.")
122+
print("✗ No chunks retrieved!")
112123
# Try a more specific source filter
113124
print("\nTrying with specific source filter...")
114125
result2 = retrieve_policies(test_query, k=5, source="Boston Anti-Displacement Plan Analysis.txt")

main_chat/tests/test_metadata.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
from pathlib import Path
2+
import sys
3+
4+
# test_policy_retrieval.py
5+
6+
_PROJECT_ROOT = Path(__file__).resolve().parents[2]
7+
sys.path.insert(0, str(_PROJECT_ROOT))
8+
# check_metadata.py
9+
from main_chat.rag_pipeline.rag_retrieval import load_vectordb
10+
11+
vectordb = load_vectordb()
12+
all_docs = vectordb.get(limit=10000)
13+
14+
# Find CLIENT_UPLOAD docs and show their metadata
15+
print("=" * 80)
16+
print("CLIENT_UPLOAD DOCUMENTS - ACTUAL METADATA")
17+
print("=" * 80)
18+
19+
for i, meta in enumerate(all_docs["metadatas"]):
20+
doc_type = meta.get("doc_type", "")
21+
22+
# Check all possible variations
23+
if "CLIENT" in str(doc_type).upper() or "UPLOAD" in str(doc_type).upper():
24+
print(f"\nDocument {i+1}:")
25+
print(f" doc_type: '{doc_type}' (type: {type(doc_type).__name__})")
26+
print(f" folder_category: '{meta.get('folder_category')}' (exists: {('folder_category' in meta)})")
27+
print(f" source: '{meta.get('source')}'")
28+
print(f" All keys: {list(meta.keys())}")
29+
30+
# Only show first 3
31+
if i >= 2:
32+
break

main_chat/tests/test_retrieval.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from pathlib import Path
2+
import sys
3+
4+
# test_policy_retrieval.py
5+
6+
_PROJECT_ROOT = Path(__file__).resolve().parents[2]
7+
sys.path.insert(0, str(_PROJECT_ROOT))
8+
9+
from main_chat.rag_pipeline.rag_retrieval import retrieve_policies
10+
11+
# Test general policy query
12+
result = retrieve_policies("housing policy", k=10)
13+
print(f"Found {len(result['chunks'])} chunks")
14+
15+
for i, meta in enumerate(result["metadata"][:5], 1):
16+
print(f"\n{i}. {meta.get('source')}")
17+
print(f" Folder: {meta.get('folder_category')}")
18+
print(f" Preview: {result['chunks'][i-1][:150]}...")

0 commit comments

Comments
 (0)