Skip to content

Commit 39440b7

Browse files
Merge branch 'master' into lgs/benchmarking-baselines
2 parents 38307e7 + be765dc commit 39440b7

36 files changed

Lines changed: 786 additions & 395 deletions

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,5 @@ benchmarks/results/*
1212
temp/*
1313
*.log
1414
bin/*
15-
.venv/
15+
.venv/
16+
.env

benchmarks/_plotter_combined.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -963,4 +963,4 @@ def __plot_delta_accuracy(
963963

964964
filename = results_dir + f"/delta_accuracy_{timestamp}.pdf"
965965
plt.savefig(filename, format="pdf", bbox_inches="tight")
966-
plt.close()
966+
plt.close()

benchmarks/_plotter_helper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,4 +330,4 @@ def compute_avg_latency_score(latency_list: pd.DataFrame) -> float:
330330
Returns:
331331
avg_latency: float - Average Latency 0.xx
332332
"""
333-
return latency_list.mean()
333+
return latency_list.mean()

benchmarks/_plotter_individual.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,4 +341,4 @@ def __plot_avg_latency_cache_hit_rate_cache_miss_rate(
341341
filename = benchmark.output_folder_path + f"/statistics_{benchmark.timestamp}.json"
342342

343343
with open(filename, "w") as f:
344-
json.dump(statistics, f, indent=4)
344+
json.dump(statistics, f, indent=4)

benchmarks/benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -582,4 +582,4 @@ def main():
582582

583583

584584
if __name__ == "__main__":
585-
main()
585+
main()

poetry.lock

Lines changed: 77 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ dependencies = [
2222
"accelerate (>=1.6.0,<2.0.0)",
2323
"typing-extensions (>=4.13.2,<5.0.0)",
2424
"torchvision (>=0.22.0,<0.23.0)",
25+
"torchvision (>=0.22.0,<0.23.0)",
26+
"statsmodels (>=0.14.4,<0.15.0)",
2527
]
2628

2729

@@ -44,6 +46,7 @@ ruff = "^0.11.6"
4446
mypy = "^1.15.0"
4547
pre-commit = "^4.2.0"
4648
pytest = "^8.0.0"
49+
python-dotenv = "^1.1.0"
4750

4851

4952
[tool.ruff]

tests/ReadMe.md

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,27 +6,9 @@ The unit tests are supposed to soley test the logic of an invidual module strate
66
## Integration Tests
77
The integration tests are supposed to test the combination and interaction of all module strategies.
88

9-
## Run All Tests
9+
### Run Integration Tests
10+
Set `OPEN_AI_APIKEY` in `.env`, and run:
1011

11-
```bash
12-
pip install -e .
13-
```
14-
15-
```bash
16-
export OPENAI_API_KEY="your_api_key_here"
17-
```
18-
19-
```bash
20-
python3 runner.py
21-
```
22-
23-
## Run Individual Tests
24-
25-
```bash
26-
pytest unit/VectorDBStrategy/test.py
27-
```
28-
29-
With print terminal output enabled
30-
```bash
31-
pytest -vs unit/VectorDBStrategy/test.py
12+
```base
13+
poetry run pytest tests/integration
3214
```

tests/integration/test_1.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,4 @@ class TestVectorQIntegration(unittest.TestCase):
5858
if __name__ == "__main__":
5959
# unittest.main()
6060
pass
61+
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
import unittest
2+
3+
from dotenv import load_dotenv
4+
5+
from vectorq import (
6+
DynamicThresholdPolicy,
7+
HNSWLibVectorDB,
8+
InMemoryEmbeddingMetadataStorage,
9+
LangChainEmbeddingEngine,
10+
OpenAIInferenceEngine,
11+
StringComparisonSimilarityEvaluator,
12+
VectorQ,
13+
VectorQConfig,
14+
)
15+
16+
load_dotenv()
17+
18+
19+
def create_default_config_and_policy():
20+
config = VectorQConfig(
21+
inference_engine=OpenAIInferenceEngine(
22+
model_name="gpt-4.1-nano-2025-04-14",
23+
temperature=0.0,
24+
),
25+
embedding_engine=LangChainEmbeddingEngine(
26+
model_name="sentence-transformers/all-mpnet-base-v2"
27+
),
28+
vector_db=HNSWLibVectorDB(),
29+
embedding_metadata_storage=InMemoryEmbeddingMetadataStorage(),
30+
system_prompt="Please answer in a single word with the first letter capitalized. Example: London",
31+
)
32+
policy = DynamicThresholdPolicy(
33+
delta=0.05,
34+
is_global=False,
35+
similarity_evaluator=StringComparisonSimilarityEvaluator(),
36+
)
37+
return config, policy
38+
39+
40+
class TestVectorQDynamicThreshold(unittest.TestCase):
41+
def test_basic_functionality(self):
42+
"""Test that the cache correctly identifies hits and misses."""
43+
config, policy = create_default_config_and_policy()
44+
vectorq = VectorQ(config, policy)
45+
46+
# First request should be a miss
47+
cache_hit, response, _ = vectorq.infer_with_cache_info(
48+
prompt="What is the capital of France?"
49+
)
50+
self.assertFalse(cache_hit, "First request should be a cache miss")
51+
self.assertTrue(len(response) > 0, "Response should not be empty")
52+
53+
# The 2nd to 5th request should be miss because it's still adjusting the threshold
54+
cache_hit, response, _ = vectorq.infer_with_cache_info(
55+
prompt="What's France's capital city?"
56+
)
57+
self.assertFalse(cache_hit, "Second request should be a cache miss")
58+
self.assertTrue(len(response) > 0, "Response should not be empty")
59+
cache_hit, response, _ = vectorq.infer_with_cache_info(
60+
prompt="France's capital city is called what?"
61+
)
62+
self.assertFalse(cache_hit, "Identical request should be a cache hit")
63+
self.assertTrue(len(response) > 0, "Response should not be empty")
64+
cache_hit, response, _ = vectorq.infer_with_cache_info(
65+
prompt="Tell me the capital city of France"
66+
)
67+
cache_hit, response, _ = vectorq.infer_with_cache_info(
68+
prompt="Which city is the capital of France?"
69+
)
70+
71+
# After several tries with the Bayesian policy, we should now get a hit
72+
cache_hit, response, _ = vectorq.infer_with_cache_info(
73+
prompt="The capital of France is?"
74+
)
75+
self.assertTrue(cache_hit, "Similar request should now be a cache hit")
76+
self.assertTrue(len(response) > 0, "Response should not be empty")
77+
78+
cache_hit, response, _ = vectorq.infer_with_cache_info(
79+
prompt="Can you tell me what the capital of France is?"
80+
)
81+
self.assertTrue(cache_hit, "Similar request should now be a cache hit")
82+
self.assertTrue(len(response) > 0, "Response should not be empty")
83+
84+
def test_high_delta(self):
85+
# TODO: Implement this
86+
self.assertTrue(True)
87+
88+
def test_low_delta(self):
89+
# TODO: Implement this
90+
self.assertTrue(True)
91+
92+
93+
if __name__ == "__main__":
94+
unittest.main()

0 commit comments

Comments
 (0)