syzygyhack
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎LICENSE‎
Lines changed: 21 additions & 0 deletions b/‎LICENSE‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 0 deletions b/‎README.md‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎examples/dietary_composition.py‎
Lines changed: 298 additions & 0 deletions b/‎examples/dietary_composition.py‎
Lines changed: 298 additions & 0 deletions
@@ -19,3 +19,6 @@ htmlcov/
 .cardinal/
 .avril/
 .claude/
+
+# Agent instructions (local, not shipped)
+agents.md
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 fisher-simplex Contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -91,6 +91,8 @@ The `examples/` directory contains runnable scripts demonstrating domain-specifi
 | `frontier_enrichment.py` | Research | Degree-8 enrichment coordinates, residual diagnostics |
 | `frontier_discrimination.py` | Research | Frontier-based model discrimination |
 | `llm_token_geometry.py` | ML / LLMs | Top-k to simplex, frontier coordinates, drift detection |
+| `dietary_composition.py` | Epidemiology | Closure distortion, forced-pair dietary concentration, boundary safety with zeros |
+| `microbiome_trajectory.py` | Microbiology | Antibiotic response trajectories, Psi boundary alarm, tangent perturbation fingerprinting |
 | `attention_atlas.md` | ML / Interpretability | Chart discovery, shared tangent modes, and replay validation for transformer attention heads |
 
 ## Documentation
 
@@ -0,0 +1,298 @@
+"""Fisher geometry of dietary composition: when correct math changes the answer.
+
+Nutritional epidemiology studies diet as proportions of energy from
+macronutrients -- points on the probability simplex.  But standard
+analyses treat these proportions as unconstrained Euclidean variables,
+creating spurious correlations (Aitchison's closure problem) and
+distorting apparent dietary associations.
+
+The Fisher amplitude lift corrects the geometry without breaking at
+zeros -- critical because roughly a third of US adults are habitual
+non-drinkers, producing exact zeros in the alcohol component.
+Log-ratio corrections (Aitchison ILR) cannot handle these zeros without
+pseudocounts that distort the boundary.
+
+This example generates synthetic dietary compositions resembling NHANES
+macronutrient profiles and demonstrates three results:
+  1. Closure creates large spurious correlations that Fisher invariants
+     avoid.
+  2. Dietary concentration (Q_delta) is an independent structural signal
+     invisible to raw proportions.
+  3. Fisher geometry preserves the full sample where log-ratio methods
+     discard a third of it.
+
+Demonstrates: forced_coordinates, q_delta, h3, fisher_distance,
+              fisher_mean, fisher_pca, shannon_entropy,
+              divergence_analysis, distributional_shift,
+              pairwise_fisher_distances.
+"""
+
+import numpy as np
+
+import fisher_simplex as fs
+
+print("=== Fisher Geometry of Dietary Composition ===\n")
+
+rng = np.random.default_rng(42)
+
+# ── Generate synthetic dietary compositions ──────────────────────────
+# Four macronutrient energy components on Delta^3:
+#   p = (protein, carbohydrate, fat, alcohol)
+# Typical US diet (NHANES 2017-2018): ~16% protein, ~47% carb, ~36% fat,
+# ~3% alcohol.  About 31% of adults are habitual non-drinkers.
+
+N_SUBJECTS = 400
+
+# Three dietary archetypes via asymmetric Dirichlet, calibrated to
+# NHANES reduced-rank-regression quintiles (Shan et al. JAMA 2019):
+#   balanced:  near population mean (~16% prot, ~47% carb, ~36% fat)
+#   high_carb: upper carb quintile (~14% prot, ~59% carb, ~26% fat)
+#   high_fat:  upper fat quintile  (~17% prot, ~38% carb, ~44% fat)
+archetypes = {
+    "balanced": (4.0, 12.0, 9.0, 0.7),
+    "high_carb": (3.5, 15.0, 6.5, 0.5),
+    "high_fat": (4.0, 9.5, 11.0, 0.5),
+}
+
+n_per = N_SUBJECTS // 3
+remainder = N_SUBJECTS - 3 * n_per
+
+compositions = []
+labels = []
+for i, (name, alpha) in enumerate(archetypes.items()):
+    m = n_per + (remainder if i == 0 else 0)
+    batch = rng.dirichlet(alpha, size=m)
+    compositions.append(batch)
+    labels.extend([name] * m)
+
+diets = np.vstack(compositions)  # shape (N_SUBJECTS, 4)
+labels = np.array(labels)
+
+# Zero out alcohol for ~31% of subjects (habitual non-drinkers),
+# then renormalize.  NHANES 2017-2018: ~31% lifetime + past-year
+# abstainers combined.
+non_drinker_mask = rng.random(N_SUBJECTS) < 0.31
+diets[non_drinker_mask, 3] = 0.0
+diets = diets / diets.sum(axis=1, keepdims=True)
+
+n_nondrinkers = non_drinker_mask.sum()
+print(f"Subjects: {N_SUBJECTS}  (non-drinkers: {n_nondrinkers}, "
+      f"{100 * n_nondrinkers / N_SUBJECTS:.0f}%)")
+print("Components: protein, carbohydrate, fat, alcohol  (N=4)\n")
+
+# Print archetype means.
+print(f"{'Archetype':12s}  {'%prot':>6s}  {'%carb':>6s}  {'%fat':>6s}  {'%alc':>6s}")
+print("-" * 44)
+for name in archetypes:
+    mask = labels == name
+    m = diets[mask].mean(axis=0)
+    print(f"{name:12s}  {m[0]:6.1%}  {m[1]:6.1%}  {m[2]:6.1%}  {m[3]:6.1%}")
+
+# ── Closure distortion: spurious Euclidean correlations ──────────────
+# Because proportions sum to 1, increasing %fat FORCES %carb downward.
+# This creates a strong negative correlation that is a mathematical
+# artifact of the constraint, not a biological signal.
+
+R_euclidean = np.corrcoef(diets.T)
+
+print("\n--- Euclidean correlation matrix (raw proportions) ---")
+col_names = ["prot", "carb", "fat ", "alc "]
+print(f"{'':6s}  {'prot':>6s}  {'carb':>6s}  {'fat':>6s}  {'alc':>6s}")
+for i, n in enumerate(col_names):
+    row = "  ".join(f"{R_euclidean[i, j]:+6.3f}" for j in range(4))
+    print(f"{n:6s}  {row}")
+
+print("\nNote: fat-carb correlation is strongly negative (closure artifact).")
+print("This is not biology -- it is forced by the sum-to-one constraint.")
+
+# Fisher invariants live on the sphere and are not subject to closure.
+coords = fs.forced_coordinates(diets)  # shape (N_SUBJECTS, 2)
+q_vals = coords[:, 0]
+h_vals = coords[:, 1]
+
+# Correlation between Q_delta and H_3 is intrinsic, not an artifact.
+r_qh = np.corrcoef(q_vals, h_vals)[0, 1]
+print(f"\nFisher forced-pair correlation (Q_delta, H_3): {r_qh:+.3f}")
+print("This correlation is geometric (intrinsic), not a closure artifact.")
+
+# ── Dietary concentration: Q_delta as structural signal ──────────────
+# Q_delta measures how dominated the diet is by one or two macros.
+# High Q_delta = nutritional imbalance; low Q_delta = macro balance.
+
+print("\n--- Dietary concentration by archetype ---")
+print(f"{'Archetype':12s}  {'Q_delta':>18s}  {'H_3':>18s}  {'Shannon':>10s}")
+print(f"{'':12s}  {'mean +/- std':>18s}  {'mean +/- std':>18s}  {'mean':>10s}")
+print("-" * 64)
+for name in archetypes:
+    mask = labels == name
+    q = q_vals[mask]
+    h = h_vals[mask]
+    ent = fs.shannon_entropy(diets[mask])
+    print(f"{name:12s}  {q.mean():.4f} +/- {q.std():.4f}"
+          f"  {h.mean():+.2e} +/- {h.std():.2e}"
+          f"  {ent.mean():10.3f}")
+
+print("\nHigh-carb diets have clearly higher Q_delta (single-macro dominance).")
+print("High-fat diets have similar Q_delta to balanced -- shifting fat for")
+print("carb does not increase concentration.  Q_delta captures structure")
+print("that raw %fat or %carb alone cannot express.")
+
+# ── Concentration predicts risk better than raw proportions ──────────
+# Synthetic metabolic risk that depends on compositional structure
+# (dietary concentration) rather than any single macronutrient.
+
+noise = rng.normal(0, 0.3, N_SUBJECTS)
+risk_score = 2.0 * q_vals + 5.0 * h_vals + noise
+
+# Compare: correlation of risk with raw %fat vs with Q_delta.
+fat = diets[:, 2]
+r_fat_risk = np.corrcoef(fat, risk_score)[0, 1]
+r_carb_risk = np.corrcoef(diets[:, 1], risk_score)[0, 1]
+r_q_risk = np.corrcoef(q_vals, risk_score)[0, 1]
+r_h_risk = np.corrcoef(h_vals, risk_score)[0, 1]
+
+print("\n--- Correlation with synthetic metabolic risk ---")
+print(f"  %fat  vs risk:   {r_fat_risk:+.3f}")
+print(f"  %carb vs risk:   {r_carb_risk:+.3f}")
+print(f"  Q_delta vs risk: {r_q_risk:+.3f}")
+print(f"  H_3   vs risk:   {r_h_risk:+.3f}")
+
+
+def _ols_r2(X, y):
+    """R-squared from OLS regression of y on columns of X."""
+    X_aug = np.column_stack([np.ones(len(y)), X])
+    beta = np.linalg.lstsq(X_aug, y, rcond=None)[0]
+    y_hat = X_aug @ beta
+    ss_res = np.sum((y - y_hat) ** 2)
+    ss_tot = np.sum((y - y.mean()) ** 2)
+    return 1.0 - ss_res / ss_tot
+
+
+r2_raw = _ols_r2(diets[:, :3], risk_score)  # %prot, %carb, %fat
+r2_fisher = _ols_r2(coords, risk_score)  # Q_delta, H_3
+
+print(f"\n  R^2 (raw %prot, %carb, %fat -> risk):  {r2_raw:.3f}  (3 predictors)")
+print(f"  R^2 (Q_delta, H_3 -> risk):            {r2_fisher:.3f}  (2 predictors)")
+print("\nFisher invariants explain more variance with fewer predictors.")
+
+# ── Boundary safety: alcohol zeros ───────────────────────────────────
+# Log-ratio (Aitchison) methods require all components > 0.
+# Subjects with zero alcohol must be excluded or imputed.
+# Fisher geometry handles zeros naturally: sqrt(0) = 0.
+
+drinkers = diets[~non_drinker_mask]
+nondrinkers = diets[non_drinker_mask]
+
+print("\n--- Boundary safety: alcohol zeros ---")
+print(f"  Drinkers:     {len(drinkers):4d} subjects (Aitchison-eligible)")
+print(f"  Non-drinkers: {len(nondrinkers):4d} subjects (excluded by Aitchison)")
+print(f"  Full sample:  {N_SUBJECTS:4d} subjects (all usable by Fisher)")
+
+# Fisher distances are well-defined for all subjects, including
+# those on the simplex boundary (p_alcohol = 0).
+mean_all = fs.fisher_mean(diets)
+mean_drinkers = fs.fisher_mean(drinkers)
+
+d_means = fs.fisher_distance(mean_all, mean_drinkers)
+print(f"\n  Fisher distance, full-sample mean vs drinkers-only: {d_means:.4f}")
+
+# Distributional shift between drinkers and non-drinkers.
+shift = fs.distributional_shift(drinkers, nondrinkers)
+print("  Cloud shift (drinkers vs non-drinkers):")
+print(f"    Mean distance:   {shift['mean_distance']:.4f}")
+print(f"    Cloud distance:  {shift['cloud_distance']:.4f}")
+print(f"    Ref dispersion:  {shift['ref_dispersion']:.4f}")
+print(f"    Test dispersion: {shift['test_dispersion']:.4f}")
+
+# Log-ratio on non-drinkers produces -inf.
+with np.errstate(divide="ignore"):
+    log_nondrinkers = np.log(nondrinkers)
+n_inf = np.isinf(log_nondrinkers).any(axis=1).sum()
+print(f"\n  Log-ratio on non-drinkers: {n_inf}/{len(nondrinkers)} rows contain -inf")
+print("  Fisher lift on non-drinkers: all finite (sqrt(0) = 0)")
+
+# ── Overlap divergence: Phi vs Psi at the boundary ──────────────────
+# Psi = N^N * prod(p_i) collapses to zero whenever any component is
+# zero, making the Phi-Psi gap diagnostic of boundary proximity.
+
+div = fs.divergence_analysis(diets)
+print("\n--- Overlap divergence (Phi vs Psi) ---")
+print(f"  Mean |Phi - Psi|:        {div['mean_divergence']:.4f}")
+print(f"  Max |Phi - Psi|:         {div['max_divergence']:.4f}")
+print(f"  Fraction consequential:  {div['fraction_consequential']:.1%}")
+print(f"  Recommendation:          {div['recommendation']}")
+
+div_drinkers = fs.divergence_analysis(drinkers)
+div_nondrinkers = fs.divergence_analysis(nondrinkers)
+print(f"\n  Drinkers mean divergence:     {div_drinkers['mean_divergence']:.4f}")
+print(f"  Non-drinkers mean divergence: {div_nondrinkers['mean_divergence']:.4f}")
+print("  Non-drinkers have maximal divergence (Psi = 0 at boundary).")
+
+# ── Dietary pattern discovery: Fisher PCA ────────────────────────────
+# Tangent PCA at the Fisher mean reveals the principal axes of dietary
+# variation on the sphere -- geometrically meaningful directions.
+
+pca = fs.fisher_pca(diets, n_components=3)
+print("\n--- Fisher tangent PCA ---")
+evr = pca["explained_variance_ratio"]
+print(f"  Variance explained: {evr[0]:.1%}, {evr[1]:.1%}, {evr[2]:.1%}")
+print(f"  Cumulative:         {sum(evr[:2]):.1%} in first two components")
+
+# Compare Fisher vs Euclidean distances between archetype means.
+arch_names = list(archetypes.keys())
+arch_means = np.array([
+    fs.fisher_mean(diets[labels == name]) for name in arch_names
+])
+
+D_fisher = fs.pairwise_fisher_distances(arch_means)
+D_euclid = np.zeros((3, 3))
+for i in range(3):
+    for j in range(3):
+        D_euclid[i, j] = np.linalg.norm(arch_means[i] - arch_means[j])
+
+print("\n--- Inter-archetype distances ---")
+print(f"{'Pair':30s}  {'Fisher':>8s}  {'Euclid':>8s}  {'Ratio':>7s}")
+print("-" * 57)
+for i in range(3):
+    for j in range(i + 1, 3):
+        ratio = D_fisher[i, j] / max(D_euclid[i, j], 1e-12)
+        print(f"{arch_names[i]:>14s} - {arch_names[j]:<14s}"
+              f"  {D_fisher[i, j]:8.4f}  {D_euclid[i, j]:8.4f}  {ratio:7.2f}")
+
+print("\nFisher distance amplifies separation between dietary archetypes,")
+print("especially those differing near the simplex boundary.")
+
+# ── Fat subtriangle (Delta^2) ────────────────────────────────────────
+# Decompose total fat into (saturated, monounsaturated, polyunsaturated).
+# NHANES baseline among the three: ~36% SFA, ~38% MUFA, ~26% PUFA
+# (NCI Cancer Trends Progress Report, 2021-2023).
+# On Delta^2, Q_delta and H_3 capture the full invariant structure
+# through degree 6.
+
+fat_alpha = {
+    "balanced": (4.7, 4.9, 3.4),
+    "high_sat": (12, 4, 2),
+    "high_poly": (3, 4, 10),
+}
+fat_comps = []
+fat_labels = []
+for name, alpha in fat_alpha.items():
+    batch = rng.dirichlet(alpha, size=100)
+    fat_comps.append(batch)
+    fat_labels.extend([name] * 100)
+fat_diets = np.vstack(fat_comps)
+fat_labels = np.array(fat_labels)
+
+print("\n--- Fat subtriangle (sat, mono, poly) on Delta^2 ---")
+q_fat = fs.q_delta(fat_diets)
+h_fat = fs.h3(fat_diets)
+print(f"{'Fat profile':12s}  {'Q_delta':>12s}  {'H_3':>14s}")
+print("-" * 42)
+for name in fat_alpha:
+    mask = np.array(fat_labels) == name
+    print(f"{name:12s}  {q_fat[mask].mean():12.4f}  {h_fat[mask].mean():+14.2e}")
+
+print("\nQ_delta separates concentrated fat profiles (high-saturated)")
+print("from balanced ones; H_3 captures asymmetry in the imbalance.")
+
+print("\n--- Dietary composition example complete ---")