Skip to content

Commit 36b411b

Browse files
committed
transformerless_lm: damp rhyme boost by F(3)=2
Reverted v63's 4 refinements (overcorrected). Single targeted revision: rhyme boost magnitude halved (log(phi)/F(3)). Anti- stagnation now overrides same-end-vowel cascades cleanly while preserving echo signal at lower amplitude.
1 parent 48fa059 commit 36b411b

1 file changed

Lines changed: 33 additions & 52 deletions

File tree

experiments/transformerless_lm/train_self_recursive.py

Lines changed: 33 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -587,46 +587,42 @@ class are mutually substitutable.
587587

588588

589589
def build_pronoun_mask(vocab: list) -> torch.Tensor:
590-
"""Identify pronoun-shape tokens: low rank + monosyllabic + no suffix
591-
+ starts-with-consonant. The starts-with-consonant filter excludes
592-
'this'/'that' demonstratives which were over-amplified by anaphora
593-
boost in v61/v62 ("this this this" cascade). Pure substrate:
594-
char-class arithmetic on the first character.
590+
"""Identify pronoun-shape tokens: low rank + monosyllabic + no suffix.
591+
Pure substrate (rank + syllable + morphology shape).
595592
"""
596593
V = len(vocab)
597594
mask = torch.zeros(V)
598595
for i, tok in enumerate(vocab):
599596
if not tok or len(tok) == 1:
600597
continue
601-
is_low_rank = i < 78
598+
is_low_rank = i < 78 # 65 chars + F(7)=13 most common words
602599
no_suffix = _token_morphology(tok) == 'root'
603600
is_monosyl = _approx_syllables(tok) == 1
604-
# Exclude demonstrative-shape: starts with 't' followed by 'h'.
605-
is_demonstrative = (len(tok) >= 2
606-
and tok[0] == 't' and tok[1] == 'h')
607-
if is_low_rank and no_suffix and is_monosyl and not is_demonstrative:
601+
if is_low_rank and no_suffix and is_monosyl:
608602
mask[i] = 1.0
609603
return mask
610604

611605

612606
def substrate_need_fill(open_needs: int, probs: torch.Tensor,
613607
vocab_size: int) -> torch.Tensor:
614-
"""Bracket-matching with F(7)=13 saturation cap to prevent runaway
615-
pressure on extended content runs.
608+
"""Bracket-matching: as open expectations accumulate, push toward
609+
closure. Pressure builds at Fibonacci thresholds.
616610
617611
open_needs increments after CONTENT tokens (rank > 78),
618612
decrements after function tokens (65 <= rank < 78),
619613
resets at punctuation/newline.
614+
615+
Boost magnitude scales by F(tier)/phi^(pi*tier) where tier is the
616+
largest Fibonacci index <= open_needs. Rank polarity (low-rank=
617+
closer) modulates which tokens get the boost.
620618
"""
621619
if open_needs <= 0 or vocab_size <= 1:
622620
return probs
623621
phi = _PHI_FOR_SAMPLING
624622
F = _FIB_NUMS_FOR_BIGRAM
625-
# F(7) = 13 hard cap on accumulated pressure.
626-
open_needs_eff = min(open_needs, F[7])
627623
pressure_tier = 0
628624
for k, f in enumerate(F):
629-
if open_needs_eff >= f:
625+
if open_needs >= f:
630626
pressure_tier = k
631627
boost_mag = F[pressure_tier] / (phi ** (math.pi * pressure_tier))
632628
ranks = torch.arange(vocab_size, dtype=probs.dtype,
@@ -684,21 +680,16 @@ def build_end_vowel_per_token(vocab: list) -> list:
684680

685681
def substrate_rhyme_resonance(recent_tokens: list, end_vowels: list,
686682
probs: torch.Tensor) -> torch.Tensor:
687-
"""Reward sound-echo with F(3)=2 saturation cap.
688-
689-
First echo boosts (pressure < F(3) -> exponent positive).
690-
Excess repetition penalizes (pressure >= F(3) -> exponent negative).
691-
Prevents 'light light light' cascade; preserves rhyme as a
692-
self-limiting substrate signal.
683+
"""Reward sound-echo: tokens whose final vowel matches recent
684+
tokens' final vowels. F(k) decay across last F(7)=13 tokens.
693685
694-
Pure substrate (Fibonacci-tier saturation, phi-bounded boost).
686+
Pure substrate (last-vowel-of-token + Fibonacci decay). No rhyme
687+
dictionary; the echo emerges from substrate sampling pressure.
695688
"""
696689
if not recent_tokens or not end_vowels:
697690
return probs
698691
phi = _PHI_FOR_SAMPLING
699692
phi_pi = phi ** math.pi
700-
F = _FIB_NUMS_FOR_BIGRAM
701-
sat = float(F[3]) # 2: saturation threshold
702693
V_ev = len(end_vowels)
703694
recent_pressure = {}
704695
for i, tid in enumerate(reversed(recent_tokens[-13:])):
@@ -707,17 +698,18 @@ def substrate_rhyme_resonance(recent_tokens: list, end_vowels: list,
707698
v = end_vowels[tid]
708699
if not v:
709700
continue
710-
kt = min(i, len(F) - 1)
711-
w = F[kt] / (phi_pi ** kt)
701+
kt = min(i, len(_FIB_NUMS_FOR_BIGRAM) - 1)
702+
w = _FIB_NUMS_FOR_BIGRAM[kt] / (phi_pi ** kt)
712703
recent_pressure[v] = recent_pressure.get(v, 0.0) + w
713704
if not recent_pressure:
714705
return probs
706+
# Per-token log-boost halved by F(3)=2 -- substrate-canonical
707+
# damping so anti-stagnation can override repeated same-vowel
708+
# cascades (v62 'light light light' problem).
715709
boost = torch.ones_like(probs)
716-
log_phi = math.log(phi)
710+
rhyme_scale = math.log(phi) / float(_FIB_NUMS_FOR_BIGRAM[3])
717711
for v, p in recent_pressure.items():
718-
# Echo boost below saturation; penalty above.
719-
delta = (sat - p) / (sat + abs(p) + 1e-8) # in [-1, +1]
720-
log_boost = log_phi * delta
712+
log_boost = rhyme_scale * p / (1.0 + p)
721713
bf = math.exp(log_boost)
722714
for i, ev in enumerate(end_vowels):
723715
if ev == v:
@@ -1096,17 +1088,14 @@ def autoregressive_generate(model, prompt: torch.Tensor, n_new: int,
10961088
for tid in seq[0].tolist():
10971089
if tid < len(vocab):
10981090
tok = vocab[tid]
1091+
syl_pos += _approx_syllables(tok)
10991092
if tok in ('.', '!', '?', '\n'):
1100-
# Sentence/line boundary: reset iambic + needs.
1101-
syl_pos = 0
11021093
open_needs = 0
11031094
cluster_len = 0
1104-
else:
1105-
syl_pos += _approx_syllables(tok)
1106-
if tid > content_thresh:
1107-
open_needs += 1
1108-
elif n_chars_local <= tid <= content_thresh:
1109-
open_needs = max(0, open_needs - 1)
1095+
elif tid > content_thresh:
1096+
open_needs += 1
1097+
elif n_chars_local <= tid <= content_thresh:
1098+
open_needs = max(0, open_needs - 1)
11101099
# Cluster tracking from trailing chars of token.
11111100
if tok:
11121101
for ch in tok:
@@ -1185,17 +1174,14 @@ def autoregressive_generate(model, prompt: torch.Tensor, n_new: int,
11851174
nid = int(next_tok[0, 0])
11861175
if nid < len(vocab):
11871176
tok = vocab[nid]
1177+
syl_pos += _approx_syllables(tok)
11881178
if tok in ('.', '!', '?', '\n'):
1189-
# Sentence boundary reset.
1190-
syl_pos = 0
11911179
open_needs = 0
11921180
cluster_len = 0
1193-
else:
1194-
syl_pos += _approx_syllables(tok)
1195-
if nid > content_thresh:
1196-
open_needs += 1
1197-
elif n_chars_local <= nid <= content_thresh:
1198-
open_needs = max(0, open_needs - 1)
1181+
elif nid > content_thresh:
1182+
open_needs += 1
1183+
elif n_chars_local <= nid <= content_thresh:
1184+
open_needs = max(0, open_needs - 1)
11991185
if tok:
12001186
for ch in tok:
12011187
if ch in _IAMBIC_VOWELS:
@@ -1272,16 +1258,11 @@ def _single_stage_refine(model, draft, vocab_size, scorer, mode: str,
12721258
int(new[0, t_draft - 1]), bigram_prior, pos_probs,
12731259
context_tokens=ctx_back, vocab=vocab)
12741260
# Iambic stress rhythm (period-2 weak/STRONG).
1275-
# Reset syl_pos at sentence boundaries.
12761261
if vocab is not None:
12771262
syl_pos = 0
12781263
for tid in new[0, :t_draft].tolist():
12791264
if tid < len(vocab):
1280-
tk = vocab[tid]
1281-
if tk in ('.', '!', '?', '\n'):
1282-
syl_pos = 0
1283-
else:
1284-
syl_pos += _approx_syllables(tk)
1265+
syl_pos += _approx_syllables(vocab[tid])
12851266
pos_probs = substrate_iambic_phase(
12861267
syl_pos, pos_probs, vocab_size_local)
12871268
# Symbolic substitution disabled (v60 results).

0 commit comments

Comments
 (0)