@@ -496,6 +496,51 @@ def substrate_vocab_curriculum(probs: torch.Tensor,
496496 return out / s
497497
498498
499+ _IAMBIC_VOWELS = set ("aeiouAEIOU" )
500+
501+
502+ def _approx_syllables (tok_str : str ) -> int :
503+ """Approximate syllable count = number of vowel-clusters.
504+ Pure substrate (char-class arithmetic). Min 1 for non-empty tokens.
505+ """
506+ if not tok_str :
507+ return 0
508+ n = 0
509+ prev_v = False
510+ for ch in tok_str :
511+ v = ch in _IAMBIC_VOWELS
512+ if v and not prev_v :
513+ n += 1
514+ prev_v = v
515+ return max (1 , n )
516+
517+
518+ def substrate_iambic_phase (syl_pos : int , probs : torch .Tensor ,
519+ vocab_size : int ) -> torch .Tensor :
520+ """Iambic stress rhythm: period-2 (F(3)) weak/STRONG alternation.
521+
522+ syl_pos even -> WEAK position -> boost LOW rank (function words)
523+ syl_pos odd -> STRONG position -> boost HIGH rank (content words)
524+
525+ Polarity: 1 - 2*rank/(V-1). Log-boost: log(phi) * (+1 or -1) * pol.
526+ Bounded [1/phi, phi]. Pure substrate (period 2 = F(3), polarity from
527+ rank-tier).
528+
529+ Shakespeare's iambic-pentameter signature reified as a sampling-time
530+ bias.
531+ """
532+ if vocab_size <= 1 :
533+ return probs
534+ sign = 1.0 if (syl_pos % 2 == 0 ) else - 1.0
535+ ranks = torch .arange (vocab_size , dtype = probs .dtype ,
536+ device = probs .device )
537+ rank_pol = 1.0 - 2.0 * ranks / (vocab_size - 1 )
538+ log_boost = math .log (_PHI_FOR_SAMPLING ) * sign * rank_pol
539+ boost = torch .exp (log_boost )
540+ out = probs * boost
541+ return out / (out .sum () + 1e-8 )
542+
543+
499544def substrate_golden_phase (t_pos : int , probs : torch .Tensor ,
500545 vocab_size : int ) -> torch .Tensor :
501546 """Golden-angle phase: functional/content rhythm primitive.
@@ -776,6 +821,12 @@ def autoregressive_generate(model, prompt: torch.Tensor, n_new: int,
776821 model .eval ()
777822 with torch .no_grad ():
778823 seq = prompt .clone ()
824+ # Iambic syllable counter: sum syllables of prompt tokens.
825+ syl_pos = 0
826+ if vocab is not None :
827+ for tid in seq [0 ].tolist ():
828+ if tid < len (vocab ):
829+ syl_pos += _approx_syllables (vocab [tid ])
779830 for _ in range (n_new ):
780831 T = seq .shape [1 ]
781832 ctx = seq if T <= model .seq_len else seq [:, - model .seq_len :]
@@ -793,9 +844,9 @@ def autoregressive_generate(model, prompt: torch.Tensor, n_new: int,
793844 probs [0 ] = substrate_syntax_blend (
794845 int (seq [0 , - 1 ]), bigram_prior , probs [0 ],
795846 context_tokens = ctx_back , vocab = vocab )
796- # Golden-phase rhythm (functional/content alternation).
797- probs [0 ] = substrate_golden_phase (
798- seq . shape [ 1 ] , probs [0 ], vocab_size )
847+ # Iambic stress rhythm (period-2 weak/STRONG alternation).
848+ probs [0 ] = substrate_iambic_phase (
849+ syl_pos , probs [0 ], vocab_size )
799850 # Theme momentum (subject-matter coherence).
800851 if token_signatures is not None and seq .shape [1 ] >= 1 :
801852 recent_list = seq [0 , - 13 :].tolist ()
@@ -821,6 +872,11 @@ def autoregressive_generate(model, prompt: torch.Tensor, n_new: int,
821872 probs [0 ], active_vocab_size )
822873 next_tok = torch .multinomial (probs , num_samples = 1 )
823874 seq = torch .cat ([seq , next_tok ], dim = 1 )
875+ # Advance iambic syllable counter.
876+ if vocab is not None :
877+ nid = int (next_tok [0 , 0 ])
878+ if nid < len (vocab ):
879+ syl_pos += _approx_syllables (vocab [nid ])
824880 model .train ()
825881 return seq
826882
@@ -883,9 +939,14 @@ def _single_stage_refine(model, draft, vocab_size, scorer, mode: str,
883939 pos_probs = substrate_syntax_blend (
884940 int (new [0 , t_draft - 1 ]), bigram_prior , pos_probs ,
885941 context_tokens = ctx_back , vocab = vocab )
886- # Golden-phase rhythm (functional/content alternation).
887- pos_probs = substrate_golden_phase (
888- t_draft , pos_probs , vocab_size_local )
942+ # Iambic stress rhythm (period-2 weak/STRONG).
943+ if vocab is not None :
944+ syl_pos = 0
945+ for tid in new [0 , :t_draft ].tolist ():
946+ if tid < len (vocab ):
947+ syl_pos += _approx_syllables (vocab [tid ])
948+ pos_probs = substrate_iambic_phase (
949+ syl_pos , pos_probs , vocab_size_local )
889950 # Theme momentum (subject-matter coherence).
890951 if token_signatures is not None and t_draft >= 1 :
891952 recent_start = max (0 , t_draft - 13 )
0 commit comments