@@ -587,46 +587,42 @@ class are mutually substitutable.
587587
588588
589589def build_pronoun_mask (vocab : list ) -> torch .Tensor :
590- """Identify pronoun-shape tokens: low rank + monosyllabic + no suffix
591- + starts-with-consonant. The starts-with-consonant filter excludes
592- 'this'/'that' demonstratives which were over-amplified by anaphora
593- boost in v61/v62 ("this this this" cascade). Pure substrate:
594- char-class arithmetic on the first character.
590+ """Identify pronoun-shape tokens: low rank + monosyllabic + no suffix.
591+ Pure substrate (rank + syllable + morphology shape).
595592 """
596593 V = len (vocab )
597594 mask = torch .zeros (V )
598595 for i , tok in enumerate (vocab ):
599596 if not tok or len (tok ) == 1 :
600597 continue
601- is_low_rank = i < 78
598+ is_low_rank = i < 78 # 65 chars + F(7)=13 most common words
602599 no_suffix = _token_morphology (tok ) == 'root'
603600 is_monosyl = _approx_syllables (tok ) == 1
604- # Exclude demonstrative-shape: starts with 't' followed by 'h'.
605- is_demonstrative = (len (tok ) >= 2
606- and tok [0 ] == 't' and tok [1 ] == 'h' )
607- if is_low_rank and no_suffix and is_monosyl and not is_demonstrative :
601+ if is_low_rank and no_suffix and is_monosyl :
608602 mask [i ] = 1.0
609603 return mask
610604
611605
612606def substrate_need_fill (open_needs : int , probs : torch .Tensor ,
613607 vocab_size : int ) -> torch .Tensor :
614- """Bracket-matching with F(7)=13 saturation cap to prevent runaway
615- pressure on extended content runs .
608+ """Bracket-matching: as open expectations accumulate, push toward
609+ closure. Pressure builds at Fibonacci thresholds .
616610
617611 open_needs increments after CONTENT tokens (rank > 78),
618612 decrements after function tokens (65 <= rank < 78),
619613 resets at punctuation/newline.
614+
615+ Boost magnitude scales by F(tier)/phi^(pi*tier) where tier is the
616+ largest Fibonacci index <= open_needs. Rank polarity (low-rank=
617+ closer) modulates which tokens get the boost.
620618 """
621619 if open_needs <= 0 or vocab_size <= 1 :
622620 return probs
623621 phi = _PHI_FOR_SAMPLING
624622 F = _FIB_NUMS_FOR_BIGRAM
625- # F(7) = 13 hard cap on accumulated pressure.
626- open_needs_eff = min (open_needs , F [7 ])
627623 pressure_tier = 0
628624 for k , f in enumerate (F ):
629- if open_needs_eff >= f :
625+ if open_needs >= f :
630626 pressure_tier = k
631627 boost_mag = F [pressure_tier ] / (phi ** (math .pi * pressure_tier ))
632628 ranks = torch .arange (vocab_size , dtype = probs .dtype ,
@@ -684,21 +680,16 @@ def build_end_vowel_per_token(vocab: list) -> list:
684680
685681def substrate_rhyme_resonance (recent_tokens : list , end_vowels : list ,
686682 probs : torch .Tensor ) -> torch .Tensor :
687- """Reward sound-echo with F(3)=2 saturation cap.
688-
689- First echo boosts (pressure < F(3) -> exponent positive).
690- Excess repetition penalizes (pressure >= F(3) -> exponent negative).
691- Prevents 'light light light' cascade; preserves rhyme as a
692- self-limiting substrate signal.
683+ """Reward sound-echo: tokens whose final vowel matches recent
684+ tokens' final vowels. F(k) decay across last F(7)=13 tokens.
693685
694- Pure substrate (Fibonacci-tier saturation, phi-bounded boost).
686+ Pure substrate (last-vowel-of-token + Fibonacci decay). No rhyme
687+ dictionary; the echo emerges from substrate sampling pressure.
695688 """
696689 if not recent_tokens or not end_vowels :
697690 return probs
698691 phi = _PHI_FOR_SAMPLING
699692 phi_pi = phi ** math .pi
700- F = _FIB_NUMS_FOR_BIGRAM
701- sat = float (F [3 ]) # 2: saturation threshold
702693 V_ev = len (end_vowels )
703694 recent_pressure = {}
704695 for i , tid in enumerate (reversed (recent_tokens [- 13 :])):
@@ -707,17 +698,18 @@ def substrate_rhyme_resonance(recent_tokens: list, end_vowels: list,
707698 v = end_vowels [tid ]
708699 if not v :
709700 continue
710- kt = min (i , len (F ) - 1 )
711- w = F [kt ] / (phi_pi ** kt )
701+ kt = min (i , len (_FIB_NUMS_FOR_BIGRAM ) - 1 )
702+ w = _FIB_NUMS_FOR_BIGRAM [kt ] / (phi_pi ** kt )
712703 recent_pressure [v ] = recent_pressure .get (v , 0.0 ) + w
713704 if not recent_pressure :
714705 return probs
706+ # Per-token log-boost halved by F(3)=2 -- substrate-canonical
707+ # damping so anti-stagnation can override repeated same-vowel
708+ # cascades (v62 'light light light' problem).
715709 boost = torch .ones_like (probs )
716- log_phi = math .log (phi )
710+ rhyme_scale = math .log (phi ) / float ( _FIB_NUMS_FOR_BIGRAM [ 3 ] )
717711 for v , p in recent_pressure .items ():
718- # Echo boost below saturation; penalty above.
719- delta = (sat - p ) / (sat + abs (p ) + 1e-8 ) # in [-1, +1]
720- log_boost = log_phi * delta
712+ log_boost = rhyme_scale * p / (1.0 + p )
721713 bf = math .exp (log_boost )
722714 for i , ev in enumerate (end_vowels ):
723715 if ev == v :
@@ -1096,17 +1088,14 @@ def autoregressive_generate(model, prompt: torch.Tensor, n_new: int,
10961088 for tid in seq [0 ].tolist ():
10971089 if tid < len (vocab ):
10981090 tok = vocab [tid ]
1091+ syl_pos += _approx_syllables (tok )
10991092 if tok in ('.' , '!' , '?' , '\n ' ):
1100- # Sentence/line boundary: reset iambic + needs.
1101- syl_pos = 0
11021093 open_needs = 0
11031094 cluster_len = 0
1104- else :
1105- syl_pos += _approx_syllables (tok )
1106- if tid > content_thresh :
1107- open_needs += 1
1108- elif n_chars_local <= tid <= content_thresh :
1109- open_needs = max (0 , open_needs - 1 )
1095+ elif tid > content_thresh :
1096+ open_needs += 1
1097+ elif n_chars_local <= tid <= content_thresh :
1098+ open_needs = max (0 , open_needs - 1 )
11101099 # Cluster tracking from trailing chars of token.
11111100 if tok :
11121101 for ch in tok :
@@ -1185,17 +1174,14 @@ def autoregressive_generate(model, prompt: torch.Tensor, n_new: int,
11851174 nid = int (next_tok [0 , 0 ])
11861175 if nid < len (vocab ):
11871176 tok = vocab [nid ]
1177+ syl_pos += _approx_syllables (tok )
11881178 if tok in ('.' , '!' , '?' , '\n ' ):
1189- # Sentence boundary reset.
1190- syl_pos = 0
11911179 open_needs = 0
11921180 cluster_len = 0
1193- else :
1194- syl_pos += _approx_syllables (tok )
1195- if nid > content_thresh :
1196- open_needs += 1
1197- elif n_chars_local <= nid <= content_thresh :
1198- open_needs = max (0 , open_needs - 1 )
1181+ elif nid > content_thresh :
1182+ open_needs += 1
1183+ elif n_chars_local <= nid <= content_thresh :
1184+ open_needs = max (0 , open_needs - 1 )
11991185 if tok :
12001186 for ch in tok :
12011187 if ch in _IAMBIC_VOWELS :
@@ -1272,16 +1258,11 @@ def _single_stage_refine(model, draft, vocab_size, scorer, mode: str,
12721258 int (new [0 , t_draft - 1 ]), bigram_prior , pos_probs ,
12731259 context_tokens = ctx_back , vocab = vocab )
12741260 # Iambic stress rhythm (period-2 weak/STRONG).
1275- # Reset syl_pos at sentence boundaries.
12761261 if vocab is not None :
12771262 syl_pos = 0
12781263 for tid in new [0 , :t_draft ].tolist ():
12791264 if tid < len (vocab ):
1280- tk = vocab [tid ]
1281- if tk in ('.' , '!' , '?' , '\n ' ):
1282- syl_pos = 0
1283- else :
1284- syl_pos += _approx_syllables (tk )
1265+ syl_pos += _approx_syllables (vocab [tid ])
12851266 pos_probs = substrate_iambic_phase (
12861267 syl_pos , pos_probs , vocab_size_local )
12871268 # Symbolic substitution disabled (v60 results).
0 commit comments