Skip to content

Commit a842dea

Browse files
Phase E: Cognitive Architecture (#67)
* feat: Phase E cognitive architecture - conviction trajectory, repetition detection, THINK/SAY * docs: mark episodic/semantic memory as omitted
1 parent 4502c81 commit a842dea

4 files changed

Lines changed: 190 additions & 6 deletions

File tree

docs/simulation-v2-architecture.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -813,9 +813,9 @@ Split into independent subsystems. Each assessed for actual value vs implementat
813813

814814
- **3.10d: Repetition detection + forced deepening.** If Jaccard similarity on word-level trigrams between consecutive reasonings > 70%, inject a prompt nudge: "You've been thinking the same thing for several days. Has anything actually changed? Are you starting to doubt your plan? Have you actually done anything about it?" Simple trigram comparison, no embeddings needed. Prevents the stale convergence we saw in the ASI run ("No change — save, learn AI, backup income" × 5 timesteps). Without this, agents converge to identical outputs and the sim produces meaningless duplicate reasoning.
815815

816-
**Tier 3: Build at high fidelity (medium effort, marginal value over full traces)**
816+
**OMITTED: Marginal value**
817817

818-
- **3.10e: Episodic vs semantic memory.** After N timesteps of consistent reasoning on a theme, engine extracts a belief statement and adds to persistent "beliefs" field. Shown as "Things I've come to believe:" separate from "What I thought recently." Extraction is rule-based: if the same keywords appear in 3+ consecutive reasonings, consolidate into a belief. Marginal value because the LLM already consolidates beliefs implicitly when reading its own full history — making it explicit is a nice-to-have.
818+
- **~~3.10e: Episodic vs semantic memory.~~** ~~After N timesteps of consistent reasoning on a theme, engine extracts a belief statement and adds to persistent "beliefs" field.~~ **Omitted.** The LLM already consolidates beliefs implicitly when reading its own full history. Making it explicit adds complexity for marginal gain. The full memory trace (uncapped, timestamped) provides sufficient context.
819819

820820
**CUT: Not building**
821821

@@ -832,7 +832,7 @@ Split into independent subsystems. Each assessed for actual value vs implementat
832832
| 3.10b conviction self-awareness | Yes | Yes | Yes |
833833
| 3.10c THINK vs SAY | No | No | Yes |
834834
| 3.10d repetition detection | No | No | Yes |
835-
| 3.10e episodic/semantic memory | No | Yes | Yes |
835+
| ~~3.10e episodic/semantic memory~~ | | | **Omitted** |
836836
| ~~3.10f attention weighting~~ ||| **Cut** |
837837
| 3.10g spontaneous recall | No | No | **Deferred** |
838838

@@ -1175,7 +1175,7 @@ Ship this alone. Every simulation immediately feels more human, and the accounta
11751175
- Conviction self-awareness (all tiers — deterministic)
11761176
- THINK vs SAY separation (high fidelity — schema change)
11771177
- Repetition detection + deepening nudge (high fidelity — string overlap check)
1178-
- Episodic/semantic memory consolidation (high fidelity — rule-based belief extraction)
1178+
- ~~Episodic/semantic memory consolidation~~**Omitted**
11791179

11801180
### Phase F: Fidelity Tiers + Results (~1.5 weeks)
11811181

extropy/simulation/reasoning.py

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,55 @@ def build_pass1_prompt(
267267
trajectory = "fairly steady"
268268
prompt_parts.append(f"\nI've been feeling {trajectory} since this started.")
269269

270+
# --- Conviction self-awareness (Phase E) ---
271+
if is_re_reasoning and len(context.memory_trace) >= 2:
272+
convictions = [
273+
m.conviction for m in context.memory_trace if m.conviction is not None
274+
]
275+
if len(convictions) >= 2:
276+
latest = convictions[-1]
277+
trend = latest - convictions[0]
278+
279+
# Firmness label based on latest conviction
280+
if latest >= 0.7:
281+
firmness = "firm about this"
282+
elif latest >= 0.5:
283+
firmness = "moderately certain"
284+
elif latest >= 0.3:
285+
firmness = "leaning but uncertain"
286+
else:
287+
firmness = "quite uncertain"
288+
289+
# Trend suffix
290+
if trend > 0.2:
291+
trend_text = " and getting more certain"
292+
elif trend < -0.2:
293+
trend_text = " but my certainty has been slipping"
294+
else:
295+
trend_text = ""
296+
297+
prompt_parts.append(f"I've been {firmness}{trend_text} since this started.")
298+
299+
# --- Repetition detection (Phase E) ---
300+
if is_re_reasoning and len(context.memory_trace) >= 2:
301+
last_two = context.memory_trace[-2:]
302+
prev_reasoning = last_two[0].raw_reasoning or ""
303+
curr_reasoning = last_two[1].raw_reasoning or ""
304+
305+
if prev_reasoning and curr_reasoning:
306+
from .text_utils import compute_trigram_jaccard
307+
308+
similarity = compute_trigram_jaccard(prev_reasoning, curr_reasoning)
309+
if similarity > 0.7:
310+
prompt_parts.extend(
311+
[
312+
"",
313+
"*You've been thinking the same things for a while now. "
314+
"Has anything actually changed? Are you starting to doubt yourself? "
315+
"Have you done anything about it, or just thought about it?*",
316+
]
317+
)
318+
270319
# --- Intent accountability ---
271320
if is_re_reasoning and context.prior_action_intent:
272321
prompt_parts.extend(
@@ -306,8 +355,29 @@ def build_pass1_prompt(
306355
]
307356
)
308357

309-
# --- Instructions ---
310-
if is_re_reasoning:
358+
# --- Instructions (with THINK vs SAY at high fidelity) ---
359+
if fidelity == "high":
360+
# Explicit THINK vs SAY separation (Phase E)
361+
prompt_parts.extend(
362+
[
363+
"",
364+
"## Your Honest Reaction",
365+
"",
366+
"There's often a gap between what you THINK and what you SAY.",
367+
"",
368+
"**Your internal monologue** — what you're actually thinking:",
369+
"- Be raw and honest. Fears, doubts, contradictions, anger — whatever is true.",
370+
"- This is just you, thinking to yourself.",
371+
"",
372+
"**Your public statement** — what you'd tell people:",
373+
"- This might differ from your private thoughts.",
374+
"- Consider who you're talking to and what image you want to project.",
375+
"",
376+
"Commit to both. Your reasoning should reflect the internal truth.",
377+
"Your public_statement should reflect what you'd actually say out loud.",
378+
]
379+
)
380+
elif is_re_reasoning:
311381
prompt_parts.extend(
312382
[
313383
"",

extropy/simulation/text_utils.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""Text utilities for cognitive architecture features."""
2+
3+
4+
def compute_trigram_jaccard(text1: str, text2: str) -> float:
5+
"""Compute Jaccard similarity of word-level trigrams.
6+
7+
Word trigrams (3-word sequences) are more semantically meaningful
8+
than character trigrams for detecting paraphrased repetition.
9+
10+
Args:
11+
text1: First text
12+
text2: Second text
13+
14+
Returns:
15+
Jaccard similarity in [0, 1]. >0.7 indicates repetitive content.
16+
"""
17+
18+
def get_word_trigrams(text: str) -> set[tuple[str, ...]]:
19+
words = text.lower().split()
20+
if len(words) < 3:
21+
return set()
22+
return {tuple(words[i : i + 3]) for i in range(len(words) - 2)}
23+
24+
t1 = get_word_trigrams(text1)
25+
t2 = get_word_trigrams(text2)
26+
27+
if not t1 or not t2:
28+
return 0.0
29+
30+
intersection = len(t1 & t2)
31+
union = len(t1 | t2)
32+
33+
return intersection / union if union > 0 else 0.0

tests/test_text_utils.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
"""Unit tests for text utilities."""
2+
3+
from extropy.simulation.text_utils import compute_trigram_jaccard
4+
5+
6+
class TestComputeTrigramJaccard:
7+
"""Tests for trigram Jaccard similarity."""
8+
9+
def test_identical_texts_returns_1(self):
10+
"""Identical texts should have similarity of 1.0."""
11+
text = (
12+
"I am very worried about my job security and what this means for my family"
13+
)
14+
assert compute_trigram_jaccard(text, text) == 1.0
15+
16+
def test_completely_different_texts_returns_0(self):
17+
"""Completely different texts should have similarity near 0."""
18+
text1 = "The quick brown fox jumps over the lazy dog"
19+
text2 = "A completely unrelated sentence with no overlap whatsoever here"
20+
similarity = compute_trigram_jaccard(text1, text2)
21+
assert similarity < 0.1
22+
23+
def test_similar_texts_high_similarity(self):
24+
"""Similar/paraphrased texts should have high similarity."""
25+
text1 = "I am worried about my job and what this means for my family"
26+
text2 = "I am worried about my job and what this means for our family"
27+
similarity = compute_trigram_jaccard(text1, text2)
28+
# One word change still yields ~69% similarity
29+
assert similarity > 0.6
30+
31+
def test_short_text_returns_0(self):
32+
"""Texts with fewer than 3 words should return 0."""
33+
assert compute_trigram_jaccard("hello world", "hello world") == 0.0
34+
assert compute_trigram_jaccard("one", "two") == 0.0
35+
36+
def test_empty_text_returns_0(self):
37+
"""Empty texts should return 0."""
38+
assert compute_trigram_jaccard("", "") == 0.0
39+
assert compute_trigram_jaccard("hello there friend", "") == 0.0
40+
41+
def test_case_insensitive(self):
42+
"""Similarity should be case-insensitive."""
43+
text1 = "I Am Worried About My Job"
44+
text2 = "i am worried about my job"
45+
assert compute_trigram_jaccard(text1, text2) == 1.0
46+
47+
def test_partial_overlap(self):
48+
"""Texts with partial overlap should have intermediate similarity."""
49+
text1 = "I need to save money and cut expenses immediately"
50+
text2 = "I need to save money but also invest for the future"
51+
similarity = compute_trigram_jaccard(text1, text2)
52+
# Some overlap but not complete
53+
assert 0.2 < similarity < 0.8
54+
55+
def test_repetitive_reasoning_detection(self):
56+
"""Should detect when agent reasoning is repetitive."""
57+
reasoning1 = (
58+
"I'm terrified about losing my job. Need to cut spending and save money. "
59+
"Maybe look for backup work. Lisa and I need to talk about the budget."
60+
)
61+
reasoning2 = (
62+
"Still terrified about losing my job. Need to cut spending and save money. "
63+
"Looking at gig apps for backup work. Lisa and I talked about the budget."
64+
)
65+
similarity = compute_trigram_jaccard(reasoning1, reasoning2)
66+
# These share themes but are paraphrased — ~43% similarity
67+
# Higher than completely different texts, showing partial overlap
68+
assert similarity > 0.3
69+
70+
def test_different_reasoning_low_similarity(self):
71+
"""Different reasoning should have low similarity."""
72+
reasoning1 = (
73+
"I'm terrified about losing my job. Need to cut spending and save money. "
74+
"Maybe look for backup work."
75+
)
76+
reasoning2 = (
77+
"Actually feeling more optimistic now. The retraining program looks promising. "
78+
"I signed up for the AI course and it's going well."
79+
)
80+
similarity = compute_trigram_jaccard(reasoning1, reasoning2)
81+
assert similarity < 0.3

0 commit comments

Comments
 (0)