|
| 1 | +""" |
| 2 | +quick_demo.py — Run the full MemoryLens evaluation pipeline with NO API key. |
| 3 | +
|
| 4 | +Uses only local embeddings (sentence-transformers) and content-based metrics. |
| 5 | +All evaluation is deterministic and reproducible. |
| 6 | +
|
| 7 | +Usage: |
| 8 | + python quick_demo.py |
| 9 | + python quick_demo.py --turns 50 |
| 10 | +""" |
| 11 | + |
| 12 | +import os |
| 13 | +import sys |
| 14 | +import argparse |
| 15 | + |
| 16 | +os.environ["TRANSFORMERS_NO_TF"] = "1" |
| 17 | +os.environ["USE_TF"] = "0" |
| 18 | +sys.path.insert(0, os.path.dirname(__file__)) |
| 19 | + |
| 20 | + |
| 21 | +def main() -> None: |
| 22 | + parser = argparse.ArgumentParser(description="MemoryLens quick demo (no API key needed)") |
| 23 | + parser.add_argument("--turns", type=int, default=100) |
| 24 | + parser.add_argument("--quiet", action="store_true") |
| 25 | + args = parser.parse_args() |
| 26 | + |
| 27 | + checkpoints = [t for t in [10, 25, 50, 75, 100] if t <= args.turns] |
| 28 | + if not checkpoints: |
| 29 | + checkpoints = [args.turns] |
| 30 | + |
| 31 | + from simulator.facts import BENCHMARK_FACTS |
| 32 | + from simulator.conversation import generate_conversation |
| 33 | + from memory.naive import NaiveMemory |
| 34 | + from memory.rag import RAGMemory |
| 35 | + from memory.cascading import CascadingTemporalMemory |
| 36 | + from evaluation.metrics import ( |
| 37 | + recall_at_t, temporal_drift_score, memory_noise_ratio, |
| 38 | + precision_at_k, cascade_efficiency, |
| 39 | + ) |
| 40 | + |
| 41 | + if not args.quiet: |
| 42 | + print("=" * 60) |
| 43 | + print(" MemoryLens — Quick Demo (no API key required)") |
| 44 | + print("=" * 60) |
| 45 | + print(f" Turns: {args.turns} Checkpoints: {checkpoints}") |
| 46 | + print(f" Facts: {len(BENCHMARK_FACTS)}") |
| 47 | + print() |
| 48 | + print(" Loading sentence-transformer model...") |
| 49 | + |
| 50 | + facts = BENCHMARK_FACTS |
| 51 | + events = generate_conversation(facts, args.turns) |
| 52 | + |
| 53 | + backends = { |
| 54 | + "naive": NaiveMemory(max_context_tokens=1200), |
| 55 | + "rag": RAGMemory(), |
| 56 | + "cascading": CascadingTemporalMemory(), |
| 57 | + } |
| 58 | + |
| 59 | + # Storage for results |
| 60 | + recall_table: dict = {n: {} for n in backends} |
| 61 | + tokens_table: dict = {n: {} for n in backends} |
| 62 | + drift_table: dict = {n: {} for n in backends} |
| 63 | + noise_table: dict = {n: {} for n in backends} |
| 64 | + eff_table: dict = {"cascading": {}} |
| 65 | + |
| 66 | + checkpoint_set = set(checkpoints) |
| 67 | + known_values: list = [] |
| 68 | + |
| 69 | + for ev in events: |
| 70 | + turn = ev["turn"] |
| 71 | + ack = "Got it." if ev["is_fact"] else "Sure." |
| 72 | + for mem in backends.values(): |
| 73 | + mem.add_message("user", ev["content"], turn) |
| 74 | + mem.add_message("assistant", ack, turn) |
| 75 | + |
| 76 | + if ev["is_fact"]: |
| 77 | + for f in facts: |
| 78 | + if f.key == ev["fact_key"]: |
| 79 | + val = f.current_value(turn) |
| 80 | + if val not in known_values: |
| 81 | + known_values.append(val) |
| 82 | + |
| 83 | + if (turn + 1) in checkpoint_set: |
| 84 | + cp = turn + 1 |
| 85 | + active = [f for f in facts if f.injected_at <= turn] |
| 86 | + |
| 87 | + for name, mem in backends.items(): |
| 88 | + recalls = [recall_at_t(mem, f, turn) for f in active] |
| 89 | + recall_table[name][cp] = sum(r["recalled"] for r in recalls) / len(recalls) |
| 90 | + tokens_table[name][cp] = int(sum(r["tokens"] for r in recalls) / len(recalls)) |
| 91 | + |
| 92 | + drift_facts = [f for f in active if f.updated_at and f.updated_at <= turn] |
| 93 | + if drift_facts: |
| 94 | + drifts = [temporal_drift_score(mem, f, turn)["drift"] for f in drift_facts] |
| 95 | + drift_table[name][cp] = sum(drifts) / len(drifts) |
| 96 | + else: |
| 97 | + drift_table[name][cp] = 0.0 |
| 98 | + |
| 99 | + noise_table[name][cp] = memory_noise_ratio( |
| 100 | + mem, "best sorting algorithm?", known_values, turn |
| 101 | + ) |
| 102 | + |
| 103 | + # Cascade efficiency |
| 104 | + eff_table["cascading"][cp] = cascade_efficiency( |
| 105 | + backends["cascading"], backends["naive"], active, turn |
| 106 | + ) |
| 107 | + |
| 108 | + if not args.quiet: |
| 109 | + print("\n RECALL@T") |
| 110 | + print(f" {'Backend':<12} " + " ".join(f"T={c:<4}" for c in checkpoints)) |
| 111 | + print(" " + "-" * 52) |
| 112 | + for name in backends: |
| 113 | + vals = " ".join(f"{recall_table[name].get(c, 0)*100:5.1f}%" for c in checkpoints) |
| 114 | + print(f" {name:<12} {vals}") |
| 115 | + |
| 116 | + print("\n TOKENS / QUERY") |
| 117 | + print(f" {'Backend':<12} " + " ".join(f"T={c:<4}" for c in checkpoints)) |
| 118 | + print(" " + "-" * 52) |
| 119 | + for name in backends: |
| 120 | + vals = " ".join(f"{tokens_table[name].get(c, 0):6d}" for c in checkpoints) |
| 121 | + print(f" {name:<12} {vals}") |
| 122 | + |
| 123 | + print("\n TEMPORAL DRIFT") |
| 124 | + print(f" {'Backend':<12} " + " ".join(f"T={c:<4}" for c in checkpoints)) |
| 125 | + print(" " + "-" * 52) |
| 126 | + for name in backends: |
| 127 | + vals = " ".join(f"{drift_table[name].get(c, 0)*100:5.1f}%" for c in checkpoints) |
| 128 | + print(f" {name:<12} {vals}") |
| 129 | + |
| 130 | + print("\n CASCADE EFFICIENCY (cascading recall-per-token vs naive)") |
| 131 | + vals = " ".join(f"{eff_table['cascading'].get(c, 1.0):5.2f}x" for c in checkpoints) |
| 132 | + print(f" {'cascading':<12} {vals}") |
| 133 | + |
| 134 | + # Business impact |
| 135 | + qpm = 100_000 |
| 136 | + cost_inr = 83 / 1_000_000 |
| 137 | + final_cp = checkpoints[-1] |
| 138 | + print("\n BUSINESS IMPACT @ 100K queries/month") |
| 139 | + print(f" {'Backend':<12} {'Tokens/Q':>9} {'Monthly(INR)':>13} {'Recall':>8}") |
| 140 | + print(" " + "-" * 52) |
| 141 | + for name in backends: |
| 142 | + tok = tokens_table[name].get(final_cp, 0) |
| 143 | + cost = tok * qpm * cost_inr |
| 144 | + rec = recall_table[name].get(final_cp, 0) |
| 145 | + print(f" {name:<12} {tok:>9,} INR{cost:>9,.0f} {rec:>7.1%}") |
| 146 | + |
| 147 | + print() |
| 148 | + print(" >> Run 'streamlit run dashboard.py' to see full visualisation") |
| 149 | + print("=" * 60) |
| 150 | + |
| 151 | + |
| 152 | +if __name__ == "__main__": |
| 153 | + main() |
0 commit comments