|
1 | 1 | #!/usr/bin/env python3 |
2 | | -"""Synthesize learnings from JSONL archive with time-weighted compression.""" |
| 2 | +"""Synthesize learnings and failures from JSONL archive with time-weighted compression.""" |
3 | 3 |
|
4 | 4 | import json |
5 | 5 | import os |
6 | | -from datetime import datetime, timedelta |
| 6 | +from datetime import datetime, timezone |
7 | 7 |
|
8 | 8 | REPO_PATH = "." |
9 | 9 | LEARNINGS_FILE = f"{REPO_PATH}/memory/learnings.jsonl" |
| 10 | +FAILURES_FILE = f"{REPO_PATH}/memory/failures.jsonl" |
10 | 11 | ACTIVE_FILE = f"{REPO_PATH}/memory/ACTIVE_LEARNINGS.md" |
11 | 12 |
|
12 | 13 |
|
| 14 | +def now_utc(): |
| 15 | + return datetime.now(timezone.utc) |
| 16 | + |
| 17 | + |
13 | 18 | def weight_by_age(days_old): |
14 | 19 | """Time-weighted compression factor (recent=100%, old=summarized).""" |
15 | 20 | if days_old <= 1: |
16 | | - return 1.0 # Full detail |
| 21 | + return 1.0 |
17 | 22 | elif days_old <= 7: |
18 | | - return 0.7 # 70% of detail |
| 23 | + return 0.7 |
19 | 24 | elif days_old <= 30: |
20 | | - return 0.3 # 30% of detail |
| 25 | + return 0.3 |
21 | 26 | else: |
22 | | - return 0.1 # Minimal detail |
| 27 | + return 0.1 |
23 | 28 |
|
24 | 29 |
|
25 | | -def load_learnings(): |
26 | | - """Load all learnings from JSONL.""" |
27 | | - if not os.path.exists(LEARNINGS_FILE): |
28 | | - return [] |
29 | | - |
30 | | - learnings = [] |
| 30 | +def parse_ts(ts_str): |
| 31 | + """Parse an ISO-8601 timestamp string, handling Z suffix and missing tz.""" |
| 32 | + if not ts_str: |
| 33 | + return None |
| 34 | + ts_str = ts_str.replace("Z", "+00:00") |
| 35 | + try: |
| 36 | + dt = datetime.fromisoformat(ts_str) |
| 37 | + if dt.tzinfo is None: |
| 38 | + dt = dt.replace(tzinfo=timezone.utc) |
| 39 | + return dt |
| 40 | + except ValueError: |
| 41 | + return None |
| 42 | + |
| 43 | + |
| 44 | +def load_jsonl(path): |
| 45 | + """Load all entries from a JSONL file, silently skipping corrupt lines.""" |
| 46 | + entries = [] |
| 47 | + if not os.path.exists(path): |
| 48 | + return entries |
31 | 49 | try: |
32 | | - with open(LEARNINGS_FILE, "r") as f: |
| 50 | + with open(path) as f: |
33 | 51 | for line in f: |
34 | | - if line.strip(): |
35 | | - learnings.append(json.loads(line)) |
36 | | - except Exception as e: |
37 | | - print(f"Error loading learnings: {e}") |
38 | | - |
39 | | - return learnings |
40 | | - |
41 | | - |
42 | | -def synthesize_learnings(learnings): |
43 | | - """Synthesize learnings with time-weighted compression.""" |
44 | | - if not learnings: |
45 | | - return ( |
46 | | - "## Active Learnings\n\nNo learnings yet. Start with `/learn` or `/memo`.\n" |
47 | | - ) |
48 | | - |
49 | | - now = datetime.fromisoformat(datetime.utcnow().isoformat()) |
50 | | - |
51 | | - # Group by recency |
52 | | - recent = [] |
53 | | - medium = [] |
54 | | - old = [] |
55 | | - |
56 | | - for learning in learnings: |
57 | | - try: |
58 | | - created = datetime.fromisoformat(learning.get("ts", "")) |
59 | | - days_old = (now - created).days |
60 | | - |
61 | | - weight = weight_by_age(days_old) |
62 | | - |
63 | | - if days_old <= 1: |
64 | | - recent.append((learning, weight)) |
65 | | - elif days_old <= 30: |
66 | | - medium.append((learning, weight)) |
67 | | - else: |
68 | | - old.append((learning, weight)) |
69 | | - except Exception: |
70 | | - recent.append((learning, 1.0)) |
71 | | - |
72 | | - output = ["## Active Learnings\n"] |
73 | | - output.append(f"*Last synthesized: {datetime.utcnow().isoformat()}*\n\n") |
| 52 | + line = line.strip() |
| 53 | + if line: |
| 54 | + try: |
| 55 | + entries.append(json.loads(line)) |
| 56 | + except json.JSONDecodeError: |
| 57 | + pass |
| 58 | + except OSError as e: |
| 59 | + print(f"Warning: could not read {path}: {e}") |
| 60 | + return entries |
| 61 | + |
| 62 | + |
| 63 | +def synthesize_learnings(learnings, failures): |
| 64 | + """Synthesize learnings and failures into ACTIVE_LEARNINGS.md content.""" |
| 65 | + now = now_utc() |
| 66 | + |
| 67 | + # Bucket learnings by age. |
| 68 | + recent, medium, old = [], [], [] |
| 69 | + for entry in learnings: |
| 70 | + dt = parse_ts(entry.get("ts", "")) |
| 71 | + days_old = (now - dt).days if dt else 999 |
| 72 | + weight = weight_by_age(days_old) |
| 73 | + if days_old <= 1: |
| 74 | + recent.append((entry, weight)) |
| 75 | + elif days_old <= 30: |
| 76 | + medium.append((entry, weight)) |
| 77 | + else: |
| 78 | + old.append((entry, weight)) |
| 79 | + |
| 80 | + # Recent failures (last 14 days), keep most recent 10. |
| 81 | + recent_failures = [] |
| 82 | + for entry in failures: |
| 83 | + dt = parse_ts(entry.get("ts", "")) |
| 84 | + days_old = (now - dt).days if dt else 999 |
| 85 | + if days_old <= 14: |
| 86 | + recent_failures.append(entry) |
| 87 | + recent_failures = recent_failures[-10:] |
| 88 | + |
| 89 | + if not learnings and not failures: |
| 90 | + return "## Active Learnings\n\nNo learnings yet.\n" |
| 91 | + |
| 92 | + out = ["## Active Learnings\n\n"] |
| 93 | + out.append(f"*Last synthesized: {now.strftime('%Y-%m-%dT%H:%M:%SZ')}*\n\n") |
74 | 94 |
|
75 | 95 | if recent: |
76 | | - output.append("### Recent (Full Detail)\n\n") |
77 | | - for learning, _ in recent[:5]: # Top 5 recent |
78 | | - title = learning.get("title", "Untitled") |
79 | | - context = learning.get("context", "") |
80 | | - takeaway = learning.get("takeaway", "") |
81 | | - detail = context[:200] if context else takeaway[:200] if takeaway else "" |
82 | | - output.append(f"- **{title}**: {detail}\n") |
83 | | - output.append("\n") |
| 96 | + out.append("### Recent (Full Detail)\n\n") |
| 97 | + for entry, _ in recent[:5]: |
| 98 | + title = entry.get("title", "Untitled") |
| 99 | + detail = entry.get("context") or entry.get("takeaway") or "" |
| 100 | + out.append(f"- **{title}**: {detail[:200]}\n") |
| 101 | + out.append("\n") |
84 | 102 |
|
85 | 103 | if medium: |
86 | | - output.append("### Active Lessons (Condensed)\n\n") |
87 | | - for learning, weight in medium[:10]: |
88 | | - title = learning.get("title", "Lesson") |
89 | | - context = learning.get("context", "") |
90 | | - takeaway = learning.get("takeaway", "") |
91 | | - content = context or takeaway |
| 104 | + out.append("### Active Lessons (Condensed)\n\n") |
| 105 | + for entry, weight in medium[:10]: |
| 106 | + title = entry.get("title", "Lesson") |
| 107 | + content = entry.get("context") or entry.get("takeaway") or "" |
92 | 108 | if weight < 1.0 and len(content) > 100: |
93 | 109 | content = content[:100] + "..." |
94 | | - output.append(f"- {title}: {content}\n") |
95 | | - output.append("\n") |
| 110 | + out.append(f"- {title}: {content}\n") |
| 111 | + out.append("\n") |
96 | 112 |
|
97 | 113 | if old: |
98 | | - output.append("### Archived Insights\n\n") |
| 114 | + out.append("### Archived Insights\n\n") |
99 | 115 | themes = {} |
100 | | - for learning, _ in old: |
101 | | - theme = learning.get("source", "General") |
102 | | - if theme not in themes: |
103 | | - themes[theme] = [] |
104 | | - themes[theme].append(learning.get("title", "Lesson")) |
105 | | - |
| 116 | + for entry, _ in old: |
| 117 | + theme = entry.get("source", "General") |
| 118 | + themes.setdefault(theme, []).append(entry.get("title", "Lesson")) |
106 | 119 | for theme, titles in themes.items(): |
107 | | - output.append(f"- **{theme}**: {', '.join(titles[:3])}\n") |
108 | | - output.append("\n") |
| 120 | + out.append(f"- **{theme}**: {', '.join(titles[:3])}\n") |
| 121 | + out.append("\n") |
109 | 122 |
|
110 | | - return "".join(output) |
| 123 | + if recent_failures: |
| 124 | + out.append("### Recent Failures (avoid repeating)\n\n") |
| 125 | + for entry in recent_failures: |
| 126 | + day = entry.get("day", "?") |
| 127 | + task = entry.get("task", "?") |
| 128 | + reason = entry.get("reason", "") |
| 129 | + line = f"- Day {day} — {task}" |
| 130 | + if reason: |
| 131 | + line += f": {reason[:120]}" |
| 132 | + out.append(line + "\n") |
| 133 | + out.append("\n") |
111 | 134 |
|
| 135 | + return "".join(out) |
112 | 136 |
|
113 | | -def main(): |
114 | | - """Synthesize and write active_learnings.md.""" |
115 | | - learnings = load_learnings() |
116 | | - synthesis = synthesize_learnings(learnings) |
117 | 137 |
|
118 | | - os.makedirs(os.path.dirname(ACTIVE_FILE), exist_ok=True) |
| 138 | +def main(): |
| 139 | + learnings = load_jsonl(LEARNINGS_FILE) |
| 140 | + failures = load_jsonl(FAILURES_FILE) |
| 141 | + synthesis = synthesize_learnings(learnings, failures) |
119 | 142 |
|
| 143 | + os.makedirs(os.path.dirname(os.path.abspath(ACTIVE_FILE)), exist_ok=True) |
120 | 144 | with open(ACTIVE_FILE, "w") as f: |
121 | 145 | f.write(synthesis) |
122 | 146 |
|
123 | | - print(f"Synthesized {len(learnings)} learnings into {ACTIVE_FILE}") |
| 147 | + print(f"Synthesized {len(learnings)} learnings + {len(failures)} failures → {ACTIVE_FILE}") |
124 | 148 |
|
125 | 149 |
|
126 | 150 | if __name__ == "__main__": |
|
0 commit comments