Skip to content

Commit ef72160

Browse files
committed
transformerless_lm: symbolic primitives (substitution + reference)
Two language-symbolic primitives: (1) Equivalence-classes: each token classed by (Fibonacci rank-tier, morphology suffix). At sampling, alpha=1/phi^pi of mass smoothed uniformly within class -- variety without breaking grammar. (2) Reference-chain: pronoun-shape tokens (low rank, monosyllabic, no suffix) get boost proportional to recent CONTENT pressure sum_k F(k)/phi^(pi*k). Substrate anaphora. Wired into autoregressive_generate and _single_stage_refine. Pure substrate: rank-tier + suffix + syllable-count (no word lists).
1 parent cf10678 commit ef72160

1 file changed

Lines changed: 191 additions & 16 deletions

File tree

experiments/transformerless_lm/train_self_recursive.py

Lines changed: 191 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,143 @@ def substrate_vocab_curriculum(probs: torch.Tensor,
499499
_IAMBIC_VOWELS = set("aeiouAEIOU")
500500

501501

502+
def _token_morphology(tok: str) -> str:
503+
"""Universal morphology class from suffix (no English word lists).
504+
char | verb_archaic | gerund | past | adverb | plural | root.
505+
"""
506+
if not tok or len(tok) <= 1:
507+
return 'char'
508+
if tok.endswith('eth') or tok.endswith('est'):
509+
return 'verb_archaic'
510+
if tok.endswith('ing'):
511+
return 'gerund'
512+
if tok.endswith('ed'):
513+
return 'past'
514+
if tok.endswith('ly'):
515+
return 'adverb'
516+
if tok.endswith('s') and len(tok) > 2:
517+
return 'plural'
518+
return 'root'
519+
520+
521+
def build_symbol_classes(vocab: list, n_chars: int = 65) -> tuple:
522+
"""Each token's class = (rank_tier, morphology). Rank-tier is the
523+
Fibonacci-walk band the token's rank falls into (within the word
524+
region). Chars get their own tier. Morphology from suffix.
525+
Pure substrate (F-tier + suffix shape, no word lists).
526+
527+
Returns (class_id_tensor[V], n_classes).
528+
"""
529+
F = _FIB_NUMS_FOR_BIGRAM # [1,1,2,3,5,8,13,21,34,55,89,144]
530+
cum_tiers = []
531+
cum = n_chars
532+
for f in F:
533+
cum += f
534+
cum_tiers.append(cum)
535+
536+
def rank_tier(i: int) -> int:
537+
if i < n_chars:
538+
return -1
539+
for ti, ct in enumerate(cum_tiers):
540+
if i < ct:
541+
return ti
542+
return len(cum_tiers)
543+
544+
morphs = ['char', 'verb_archaic', 'gerund', 'past',
545+
'adverb', 'plural', 'root']
546+
morph_to_idx = {m: i for i, m in enumerate(morphs)}
547+
n_morph = len(morphs)
548+
# Class id = (tier + 1) * n_morph + morph_idx
549+
class_ids = []
550+
for i, tok in enumerate(vocab):
551+
tier = rank_tier(i)
552+
m = _token_morphology(tok)
553+
cid = (tier + 1) * n_morph + morph_to_idx[m]
554+
class_ids.append(cid)
555+
class_id_tensor = torch.tensor(class_ids, dtype=torch.long)
556+
n_classes = int(class_id_tensor.max().item()) + 1
557+
return class_id_tensor, n_classes
558+
559+
560+
def substrate_symbolic_substitution(probs: torch.Tensor,
561+
class_id_tensor: torch.Tensor,
562+
n_classes: int,
563+
alpha: float = None) -> torch.Tensor:
564+
"""Smooth probability mass within symbol equivalence classes.
565+
566+
Per class: redistribute alpha-fraction of mass uniformly across
567+
siblings; keep (1-alpha) at the original spike. Variety without
568+
breaking grammar -- tokens in the same (rank-tier, morphology)
569+
class are mutually substitutable.
570+
571+
alpha defaults to 1/phi^pi (substrate-canonical, ~0.221).
572+
"""
573+
if alpha is None:
574+
alpha = 1.0 / (_PHI_FOR_SAMPLING ** math.pi)
575+
cids = class_id_tensor.to(probs.device)
576+
class_totals = torch.zeros(n_classes, dtype=probs.dtype,
577+
device=probs.device)
578+
class_totals.scatter_add_(0, cids, probs)
579+
counts = torch.zeros(n_classes, dtype=probs.dtype,
580+
device=probs.device)
581+
counts.scatter_add_(0, cids, torch.ones_like(probs))
582+
counts.clamp_(min=1.0)
583+
uniform_per_class = class_totals / counts
584+
uniform_per_token = uniform_per_class[cids]
585+
out = (1.0 - alpha) * probs + alpha * uniform_per_token
586+
return out / (out.sum() + 1e-8)
587+
588+
589+
def build_pronoun_mask(vocab: list) -> torch.Tensor:
590+
"""Identify pronoun-shape tokens: low rank + monosyllabic + no suffix.
591+
Pure substrate (rank + syllable + morphology shape).
592+
"""
593+
V = len(vocab)
594+
mask = torch.zeros(V)
595+
for i, tok in enumerate(vocab):
596+
if not tok or len(tok) == 1:
597+
continue
598+
is_low_rank = i < 78 # 65 chars + F(7)=13 most common words
599+
no_suffix = _token_morphology(tok) == 'root'
600+
is_monosyl = _approx_syllables(tok) == 1
601+
if is_low_rank and no_suffix and is_monosyl:
602+
mask[i] = 1.0
603+
return mask
604+
605+
606+
def substrate_reference_chain(recent_tokens: list,
607+
pronoun_mask: torch.Tensor,
608+
probs: torch.Tensor,
609+
n_chars: int = 65) -> torch.Tensor:
610+
"""Anaphora: boost pronoun-shape tokens when recent content tokens
611+
create open reference slots.
612+
613+
Pressure = sum_k F(k)/phi^(pi*k) over recent CONTENT tokens at
614+
distance k (k=0 most-recent). Bounded log-boost = log(phi) *
615+
pressure / (1 + pressure). Pure substrate (F-decay + rank-tier).
616+
"""
617+
if not recent_tokens:
618+
return probs
619+
phi = _PHI_FOR_SAMPLING
620+
phi_pi = phi ** math.pi
621+
content_thresh = n_chars + _FIB_NUMS_FOR_BIGRAM[7] # 65 + 13 = 78
622+
pressure = 0.0
623+
for i, tid in enumerate(reversed(recent_tokens)):
624+
if i >= 13:
625+
break
626+
if tid > content_thresh:
627+
k = min(i, len(_FIB_NUMS_FOR_BIGRAM) - 1)
628+
pressure += _FIB_NUMS_FOR_BIGRAM[k] / (phi_pi ** k)
629+
if pressure <= 0:
630+
return probs
631+
log_boost = math.log(phi) * pressure / (1.0 + pressure)
632+
boost_factor = math.exp(log_boost)
633+
pmask = pronoun_mask.to(probs.device).to(probs.dtype)
634+
boost = 1.0 + pmask * (boost_factor - 1.0)
635+
out = probs * boost
636+
return out / (out.sum() + 1e-8)
637+
638+
502639
def _approx_syllables(tok_str: str) -> int:
503640
"""Approximate syllable count = number of vowel-clusters.
504641
Pure substrate (char-class arithmetic). Min 1 for non-empty tokens.
@@ -807,7 +944,10 @@ def autoregressive_generate(model, prompt: torch.Tensor, n_new: int,
807944
bigram_prior: torch.Tensor = None,
808945
vocab: list = None,
809946
token_signatures: torch.Tensor = None,
810-
active_vocab_size: int = None):
947+
active_vocab_size: int = None,
948+
class_id_tensor: torch.Tensor = None,
949+
n_classes: int = 0,
950+
pronoun_mask: torch.Tensor = None):
811951
"""Sample n_new tokens autoregressively with substrate sampling AND
812952
a substrate-canonical recency penalty.
813953
@@ -847,6 +987,15 @@ def autoregressive_generate(model, prompt: torch.Tensor, n_new: int,
847987
# Iambic stress rhythm (period-2 weak/STRONG alternation).
848988
probs[0] = substrate_iambic_phase(
849989
syl_pos, probs[0], vocab_size)
990+
# Symbolic substitution (within-class mass smoothing).
991+
if class_id_tensor is not None and n_classes > 0:
992+
probs[0] = substrate_symbolic_substitution(
993+
probs[0], class_id_tensor, n_classes)
994+
# Symbolic reference chain (pronoun anaphora).
995+
if pronoun_mask is not None and seq.shape[1] >= 1:
996+
recent_list = seq[0, -13:].tolist()
997+
probs[0] = substrate_reference_chain(
998+
recent_list, pronoun_mask, probs[0])
850999
# Theme momentum (subject-matter coherence).
8511000
if token_signatures is not None and seq.shape[1] >= 1:
8521001
recent_list = seq[0, -13:].tolist()
@@ -888,7 +1037,10 @@ def _single_stage_refine(model, draft, vocab_size, scorer, mode: str,
8881037
bigram_prior: torch.Tensor = None,
8891038
vocab: list = None,
8901039
token_signatures: torch.Tensor = None,
891-
active_vocab_size: int = None):
1040+
active_vocab_size: int = None,
1041+
class_id_tensor: torch.Tensor = None,
1042+
n_classes: int = 0,
1043+
pronoun_mask: torch.Tensor = None):
8921044
"""One refinement stage: optimize a single score until plateau.
8931045
8941046
mode: 'min' (harmony, quality) or 'max' (creativity).
@@ -947,6 +1099,16 @@ def _single_stage_refine(model, draft, vocab_size, scorer, mode: str,
9471099
syl_pos += _approx_syllables(vocab[tid])
9481100
pos_probs = substrate_iambic_phase(
9491101
syl_pos, pos_probs, vocab_size_local)
1102+
# Symbolic substitution (within-class smoothing).
1103+
if class_id_tensor is not None and n_classes > 0:
1104+
pos_probs = substrate_symbolic_substitution(
1105+
pos_probs, class_id_tensor, n_classes)
1106+
# Symbolic reference chain (pronoun anaphora).
1107+
if pronoun_mask is not None and t_draft >= 1:
1108+
recent_start = max(0, t_draft - 13)
1109+
recent_list = new[0, recent_start:t_draft].tolist()
1110+
pos_probs = substrate_reference_chain(
1111+
recent_list, pronoun_mask, pos_probs)
9501112
# Theme momentum (subject-matter coherence).
9511113
if token_signatures is not None and t_draft >= 1:
9521114
recent_start = max(0, t_draft - 13)
@@ -1004,7 +1166,10 @@ def staged_refine(model, prompt, n_new, vocab_size,
10041166
bigram_prior: torch.Tensor = None,
10051167
vocab: list = None,
10061168
token_signatures: torch.Tensor = None,
1007-
active_vocab_size: int = None):
1169+
active_vocab_size: int = None,
1170+
class_id_tensor: torch.Tensor = None,
1171+
n_classes: int = 0,
1172+
pronoun_mask: torch.Tensor = None):
10081173
"""Staircase refinement: hit one score, then the next, then the next.
10091174
10101175
Stage 1: substrate alignment (minimize harmony) -- match the shape.
@@ -1020,7 +1185,7 @@ def staged_refine(model, prompt, n_new, vocab_size,
10201185
with torch.no_grad():
10211186
draft = autoregressive_generate(model, prompt, n_new=n_new,
10221187
vocab_size=vocab_size,
1023-
temperature=temperature, bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size)
1188+
temperature=temperature, bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size, class_id_tensor=class_id_tensor, n_classes=n_classes, pronoun_mask=pronoun_mask)
10241189
stages_out = {}
10251190
stages_out["initial"] = {"seq": draft.clone(),
10261191
"harmony": harmony_scorer(draft),
@@ -1033,7 +1198,7 @@ def staged_refine(model, prompt, n_new, vocab_size,
10331198
n_iters=n_iters_per_stage,
10341199
resample_frac=resample_frac,
10351200
prompt_len=prompt_len,
1036-
temperature=temperature, bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size)
1201+
temperature=temperature, bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size, class_id_tensor=class_id_tensor, n_classes=n_classes, pronoun_mask=pronoun_mask)
10371202
stages_out["after_harmony"] = {"seq": draft.clone(),
10381203
"trajectory": h_traj,
10391204
"harmony": harmony_scorer(draft),
@@ -1046,7 +1211,7 @@ def staged_refine(model, prompt, n_new, vocab_size,
10461211
n_iters=n_iters_per_stage,
10471212
resample_frac=resample_frac,
10481213
prompt_len=prompt_len,
1049-
temperature=temperature, bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size)
1214+
temperature=temperature, bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size, class_id_tensor=class_id_tensor, n_classes=n_classes, pronoun_mask=pronoun_mask)
10501215
stages_out["after_quality"] = {"seq": draft.clone(),
10511216
"trajectory": q_traj,
10521217
"harmony": harmony_scorer(draft),
@@ -1060,7 +1225,7 @@ def staged_refine(model, prompt, n_new, vocab_size,
10601225
n_iters=n_iters_per_stage,
10611226
resample_frac=resample_frac,
10621227
prompt_len=prompt_len,
1063-
temperature=temperature, bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size)
1228+
temperature=temperature, bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size, class_id_tensor=class_id_tensor, n_classes=n_classes, pronoun_mask=pronoun_mask)
10641229
stages_out["after_creativity"] = {"seq": draft.clone(),
10651230
"trajectory": c_traj,
10661231
"harmony": harmony_scorer(draft),
@@ -1094,7 +1259,7 @@ def iterative_refine(model, prompt, n_new, vocab_size,
10941259
# Step 1: initial draft.
10951260
draft = autoregressive_generate(model, prompt, n_new=n_new,
10961261
vocab_size=vocab_size,
1097-
temperature=temperature, bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size)
1262+
temperature=temperature, bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size, class_id_tensor=class_id_tensor, n_classes=n_classes, pronoun_mask=pronoun_mask)
10981263
history = []
10991264
h0 = harmony_scorer(draft) if harmony_scorer is not None else None
11001265
q0 = quality_scorer(draft) if quality_scorer is not None else None
@@ -1489,12 +1654,22 @@ def quality_fn(seq_tokens):
14891654
bigram_prior = build_substrate_bigram(vocab_size)
14901655
print(f" refined substrate bigram (shape * POS): {bigram_prior.shape}")
14911656

1492-
# Substrate token signatures (theme momentum) -- F-frequency cos basis
1493-
# over char codes, phi-decayed across positions. L2-normalized.
1494-
# NOTE: v57 showed theme momentum drags mean creativity ~-0.01.
1495-
# Disabled for v59 to isolate iambic + threading.
1657+
# Theme momentum disabled (v57 showed it drags ~-0.01).
14961658
token_signatures = None
14971659

1660+
# Symbolic primitives (v60+): equivalence classes + reference chain.
1661+
if vocab_for_bigram is not None:
1662+
n_chars_local = sum(1 for t in vocab_for_bigram if len(t) == 1)
1663+
class_id_tensor, n_classes = build_symbol_classes(
1664+
vocab_for_bigram, n_chars=n_chars_local)
1665+
pronoun_mask = build_pronoun_mask(vocab_for_bigram)
1666+
print(f" symbol classes: {n_classes} | "
1667+
f"pronoun candidates: {int(pronoun_mask.sum().item())}")
1668+
else:
1669+
class_id_tensor = None
1670+
n_classes = 0
1671+
pronoun_mask = None
1672+
14981673
# Active training base: starts as tiny_seed, GROWS by appending each
14991674
# cycle's best refined output -- only if (a) creativity > corpus
15001675
# baseline AND (b) anchor weight constraint still satisfied.
@@ -1561,14 +1736,14 @@ def quality_fn(seq_tokens):
15611736
draft = autoregressive_generate(
15621737
model, prompt_s, n_new=growth_n_new,
15631738
vocab_size=vocab_size, temperature=0.8,
1564-
bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size)
1739+
bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size, class_id_tensor=class_id_tensor, n_classes=n_classes, pronoun_mask=pronoun_mask)
15651740
refined_s, _ = staged_refine(
15661741
model, prompt_s, n_new=growth_n_new, vocab_size=vocab_size,
15671742
harmony_scorer=harmony_fn, quality_scorer=quality_fn,
15681743
creativity_scorer=creativity_fn,
15691744
n_iters_per_stage=30, resample_frac=0.35,
15701745
prompt_len=16, temperature=0.5,
1571-
bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size)
1746+
bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size, class_id_tensor=class_id_tensor, n_classes=n_classes, pronoun_mask=pronoun_mask)
15721747
samples.append((refined_s.squeeze(0).clone(),
15731748
creativity_fn(refined_s)))
15741749
# Sort by creativity desc, keep top K.
@@ -1638,14 +1813,14 @@ def quality_fn(seq_tokens):
16381813
final_gen = autoregressive_generate(model, prompt, n_new=n_new,
16391814
vocab_size=vocab_size,
16401815
temperature=0.8,
1641-
bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size)
1816+
bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size, class_id_tensor=class_id_tensor, n_classes=n_classes, pronoun_mask=pronoun_mask)
16421817
final_refined, _ = staged_refine(
16431818
model, prompt, n_new=n_new, vocab_size=vocab_size,
16441819
harmony_scorer=harmony_fn, quality_scorer=quality_fn,
16451820
creativity_scorer=creativity_fn,
16461821
n_iters_per_stage=200, resample_frac=0.35,
16471822
prompt_len=16, temperature=0.5,
1648-
bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size)
1823+
bigram_prior=bigram_prior, vocab=vocab, token_signatures=token_signatures, active_vocab_size=active_vocab_size, class_id_tensor=class_id_tensor, n_classes=n_classes, pronoun_mask=pronoun_mask)
16491824

16501825
return {"name": name, "mode": "self_distillation",
16511826
"n_params": n_params,

0 commit comments

Comments
 (0)