Skip to content

Commit 1aeee21

Browse files
committed
transformerless_lm: iambic stress primitive (period 2 = F(3))
Period-2 weak/STRONG alternation modulates rank polarity at the syllable scale. Syllables approximated by vowel-cluster count (substrate-pure: char-class arithmetic). Replaces golden_phase in generation paths -- iambic is the language-structural rhythm (Shakespeare's signature) where golden was the universal-irrational rhythm.
1 parent 2663c7f commit 1aeee21

1 file changed

Lines changed: 67 additions & 6 deletions

File tree

experiments/transformerless_lm/train_self_recursive.py

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,51 @@ def substrate_vocab_curriculum(probs: torch.Tensor,
496496
return out / s
497497

498498

499+
_IAMBIC_VOWELS = set("aeiouAEIOU")
500+
501+
502+
def _approx_syllables(tok_str: str) -> int:
503+
"""Approximate syllable count = number of vowel-clusters.
504+
Pure substrate (char-class arithmetic). Min 1 for non-empty tokens.
505+
"""
506+
if not tok_str:
507+
return 0
508+
n = 0
509+
prev_v = False
510+
for ch in tok_str:
511+
v = ch in _IAMBIC_VOWELS
512+
if v and not prev_v:
513+
n += 1
514+
prev_v = v
515+
return max(1, n)
516+
517+
518+
def substrate_iambic_phase(syl_pos: int, probs: torch.Tensor,
519+
vocab_size: int) -> torch.Tensor:
520+
"""Iambic stress rhythm: period-2 (F(3)) weak/STRONG alternation.
521+
522+
syl_pos even -> WEAK position -> boost LOW rank (function words)
523+
syl_pos odd -> STRONG position -> boost HIGH rank (content words)
524+
525+
Polarity: 1 - 2*rank/(V-1). Log-boost: log(phi) * (+1 or -1) * pol.
526+
Bounded [1/phi, phi]. Pure substrate (period 2 = F(3), polarity from
527+
rank-tier).
528+
529+
Shakespeare's iambic-pentameter signature reified as a sampling-time
530+
bias.
531+
"""
532+
if vocab_size <= 1:
533+
return probs
534+
sign = 1.0 if (syl_pos % 2 == 0) else -1.0
535+
ranks = torch.arange(vocab_size, dtype=probs.dtype,
536+
device=probs.device)
537+
rank_pol = 1.0 - 2.0 * ranks / (vocab_size - 1)
538+
log_boost = math.log(_PHI_FOR_SAMPLING) * sign * rank_pol
539+
boost = torch.exp(log_boost)
540+
out = probs * boost
541+
return out / (out.sum() + 1e-8)
542+
543+
499544
def substrate_golden_phase(t_pos: int, probs: torch.Tensor,
500545
vocab_size: int) -> torch.Tensor:
501546
"""Golden-angle phase: functional/content rhythm primitive.
@@ -776,6 +821,12 @@ def autoregressive_generate(model, prompt: torch.Tensor, n_new: int,
776821
model.eval()
777822
with torch.no_grad():
778823
seq = prompt.clone()
824+
# Iambic syllable counter: sum syllables of prompt tokens.
825+
syl_pos = 0
826+
if vocab is not None:
827+
for tid in seq[0].tolist():
828+
if tid < len(vocab):
829+
syl_pos += _approx_syllables(vocab[tid])
779830
for _ in range(n_new):
780831
T = seq.shape[1]
781832
ctx = seq if T <= model.seq_len else seq[:, -model.seq_len:]
@@ -793,9 +844,9 @@ def autoregressive_generate(model, prompt: torch.Tensor, n_new: int,
793844
probs[0] = substrate_syntax_blend(
794845
int(seq[0, -1]), bigram_prior, probs[0],
795846
context_tokens=ctx_back, vocab=vocab)
796-
# Golden-phase rhythm (functional/content alternation).
797-
probs[0] = substrate_golden_phase(
798-
seq.shape[1], probs[0], vocab_size)
847+
# Iambic stress rhythm (period-2 weak/STRONG alternation).
848+
probs[0] = substrate_iambic_phase(
849+
syl_pos, probs[0], vocab_size)
799850
# Theme momentum (subject-matter coherence).
800851
if token_signatures is not None and seq.shape[1] >= 1:
801852
recent_list = seq[0, -13:].tolist()
@@ -821,6 +872,11 @@ def autoregressive_generate(model, prompt: torch.Tensor, n_new: int,
821872
probs[0], active_vocab_size)
822873
next_tok = torch.multinomial(probs, num_samples=1)
823874
seq = torch.cat([seq, next_tok], dim=1)
875+
# Advance iambic syllable counter.
876+
if vocab is not None:
877+
nid = int(next_tok[0, 0])
878+
if nid < len(vocab):
879+
syl_pos += _approx_syllables(vocab[nid])
824880
model.train()
825881
return seq
826882

@@ -883,9 +939,14 @@ def _single_stage_refine(model, draft, vocab_size, scorer, mode: str,
883939
pos_probs = substrate_syntax_blend(
884940
int(new[0, t_draft - 1]), bigram_prior, pos_probs,
885941
context_tokens=ctx_back, vocab=vocab)
886-
# Golden-phase rhythm (functional/content alternation).
887-
pos_probs = substrate_golden_phase(
888-
t_draft, pos_probs, vocab_size_local)
942+
# Iambic stress rhythm (period-2 weak/STRONG).
943+
if vocab is not None:
944+
syl_pos = 0
945+
for tid in new[0, :t_draft].tolist():
946+
if tid < len(vocab):
947+
syl_pos += _approx_syllables(vocab[tid])
948+
pos_probs = substrate_iambic_phase(
949+
syl_pos, pos_probs, vocab_size_local)
889950
# Theme momentum (subject-matter coherence).
890951
if token_signatures is not None and t_draft >= 1:
891952
recent_start = max(0, t_draft - 13)

0 commit comments

Comments
 (0)