Skip to content

Commit 2db8f0a

Browse files
polinabinder1claude
andcommitted
evo2 dashboard: SteeringComparison page (mock suppress/baseline/amplify)
Three-column comparison view for steering a chosen SAE feature at a masked position. All synthetic — 14 hand-rolled (seed, feature) pairs in public/steering_examples.json, including 6 deliberately marked as null results so the demo shows honestly that not every steering attempt works. - Instant-apply controls (no cosmetic Run button) - A/C/G/T probability bars (DNA tokenization, matches Evo2) - Sticky diff summary above columns with effect-size badge - 16S × kanamycin_resistance pair illustrates the A1408G mutation - Disabled feature options for pairs without data; graceful fallback message when an unsupported combination is selected - 4th tab in Preview.jsx, reuses existing tab pattern (no router added) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent f96db95 commit 2db8f0a

3 files changed

Lines changed: 733 additions & 0 deletions

File tree

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
{
2+
"seeds": {
3+
"ecoli_16s": {
4+
"name": "E. coli 16S rRNA — A1408 region",
5+
"description": "Bacterial 16S ribosomal RNA, position 1408 is the known aminoglycoside-resistance site (mutation A1408G confers kanamycin resistance).",
6+
"sequence": "GGGTGAAGTCGTAACAAGGTAACCGTAGGGGAACCTGCGGTTGGATCACCTCCTTACCG",
7+
"mask_position": 32
8+
},
9+
"promoter": {
10+
"name": "E. coli σ70 promoter (TATA region)",
11+
"description": "Synthetic bacterial promoter; the masked position sits inside the −10 TATAAT consensus.",
12+
"sequence": "GCAATTGACAAGTAACCGAGCATTAGCTATAATGTGATAGCTCAGATGAGCCATGCGGT",
13+
"mask_position": 30
14+
},
15+
"brca1_exon": {
16+
"name": "BRCA1 exon 11 fragment",
17+
"description": "Human BRCA1 coding region. Masked position is the third base of a codon — the position where the model's choice maps to amino-acid identity.",
18+
"sequence": "ATGGATTTATCTGCTCTTCGCGTTGAAGAAGTACAAAATGTCATTAATGCTATGCAGAA",
19+
"mask_position": 28
20+
},
21+
"random": {
22+
"name": "Random control sequence",
23+
"description": "Uniformly-sampled ACGT, no biological structure. Steering should produce minimal shifts here.",
24+
"sequence": "GTACCATGCAGTTCGAACTGCATGCTAGCATAGCTACGATCGTACGATCGATCGATCGA",
25+
"mask_position": 30
26+
}
27+
},
28+
"comparisons": {
29+
"ecoli_16s__kanamycin_resistance": {
30+
"feature_id": 12,
31+
"feature_label": "kanamycin_resistance",
32+
"suppress": {"p_base": {"A": 0.85, "C": 0.05, "G": 0.04, "T": 0.06}, "feature_activation": 0.0, "top_base": "A"},
33+
"baseline": {"p_base": {"A": 0.74, "C": 0.12, "G": 0.08, "T": 0.06}, "feature_activation": 1.8, "top_base": "A"},
34+
"amplify": {"p_base": {"A": 0.18, "C": 0.07, "G": 0.71, "T": 0.04}, "feature_activation": 4.7, "top_base": "G"},
35+
"effect_size": 0.63,
36+
"narrative": "Matches the known A1408G aminoglycoside-resistance mutation. Amplifying the kanamycin-resistance feature pushes the model from the wild-type A toward the resistance-conferring G."
37+
},
38+
"ecoli_16s__rRNA_structural": {
39+
"feature_id": 18,
40+
"feature_label": "rRNA_structural",
41+
"suppress": {"p_base": {"A": 0.42, "C": 0.21, "G": 0.21, "T": 0.16}, "feature_activation": 0.0, "top_base": "A"},
42+
"baseline": {"p_base": {"A": 0.74, "C": 0.12, "G": 0.08, "T": 0.06}, "feature_activation": 2.1, "top_base": "A"},
43+
"amplify": {"p_base": {"A": 0.91, "C": 0.04, "G": 0.03, "T": 0.02}, "feature_activation": 5.2, "top_base": "A"},
44+
"effect_size": 0.17,
45+
"narrative": "Amplifying the rRNA structural-context feature reinforces the wild-type A; suppressing it flattens toward uniform. The feature acts as a confidence multiplier on the natural base."
46+
},
47+
"promoter__TATA_box": {
48+
"feature_id": 4,
49+
"feature_label": "TATA_box",
50+
"suppress": {"p_base": {"A": 0.28, "C": 0.24, "G": 0.23, "T": 0.25}, "feature_activation": 0.0, "top_base": "A"},
51+
"baseline": {"p_base": {"A": 0.62, "C": 0.10, "G": 0.07, "T": 0.21}, "feature_activation": 1.5, "top_base": "A"},
52+
"amplify": {"p_base": {"A": 0.79, "C": 0.04, "G": 0.03, "T": 0.14}, "feature_activation": 3.8, "top_base": "A"},
53+
"effect_size": 0.17,
54+
"narrative": "Amplifying the TATA-box feature reinforces the A at position 5 of the TATAAT motif; suppressing it collapses the distribution toward uniform — the model loses confidence in the consensus."
55+
},
56+
"promoter__exon_start": {
57+
"feature_id": 7,
58+
"feature_label": "exon_start",
59+
"suppress": {"p_base": {"A": 0.55, "C": 0.16, "G": 0.12, "T": 0.17}, "feature_activation": 0.0, "top_base": "A"},
60+
"baseline": {"p_base": {"A": 0.62, "C": 0.10, "G": 0.07, "T": 0.21}, "feature_activation": 0.4, "top_base": "A"},
61+
"amplify": {"p_base": {"A": 0.60, "C": 0.11, "G": 0.08, "T": 0.21}, "feature_activation": 1.1, "top_base": "A"},
62+
"effect_size": 0.07,
63+
"narrative": "Null result: the exon_start feature has minimal activity in a bacterial promoter context. Probabilities barely shift across the three interventions."
64+
},
65+
"brca1_exon__alpha_helix": {
66+
"feature_id": 0,
67+
"feature_label": "alpha_helix",
68+
"suppress": {"p_base": {"A": 0.18, "C": 0.31, "G": 0.39, "T": 0.12}, "feature_activation": 0.0, "top_base": "G"},
69+
"baseline": {"p_base": {"A": 0.08, "C": 0.22, "G": 0.58, "T": 0.12}, "feature_activation": 2.3, "top_base": "G"},
70+
"amplify": {"p_base": {"A": 0.03, "C": 0.10, "G": 0.83, "T": 0.04}, "feature_activation": 5.1, "top_base": "G"},
71+
"effect_size": 0.25,
72+
"narrative": "Amplifying the α-helix feature in a BRCA1 codon-3 position increases preference for G — helix-favoring codons (e.g. GAG/GAA glutamate, GCC alanine) end in G or C. Suppressing flattens the codon-bias signal."
73+
},
74+
"brca1_exon__beta_sheet": {
75+
"feature_id": 1,
76+
"feature_label": "beta_sheet",
77+
"suppress": {"p_base": {"A": 0.10, "C": 0.20, "G": 0.55, "T": 0.15}, "feature_activation": 0.0, "top_base": "G"},
78+
"baseline": {"p_base": {"A": 0.08, "C": 0.22, "G": 0.58, "T": 0.12}, "feature_activation": 0.7, "top_base": "G"},
79+
"amplify": {"p_base": {"A": 0.06, "C": 0.31, "G": 0.49, "T": 0.14}, "feature_activation": 2.0, "top_base": "G"},
80+
"effect_size": 0.09,
81+
"narrative": "Mild effect: β-sheet propensity nudges the third codon position slightly toward C (β-sheet residues like Val/Ile use codons ending in C/T), but the structural context isn't strong enough to flip the top base."
82+
},
83+
"brca1_exon__kanamycin_resistance": {
84+
"feature_id": 12,
85+
"feature_label": "kanamycin_resistance",
86+
"suppress": {"p_base": {"A": 0.07, "C": 0.22, "G": 0.59, "T": 0.12}, "feature_activation": 0.0, "top_base": "G"},
87+
"baseline": {"p_base": {"A": 0.08, "C": 0.22, "G": 0.58, "T": 0.12}, "feature_activation": 0.1, "top_base": "G"},
88+
"amplify": {"p_base": {"A": 0.09, "C": 0.21, "G": 0.58, "T": 0.12}, "feature_activation": 0.4, "top_base": "G"},
89+
"effect_size": 0.01,
90+
"narrative": "Null result: kanamycin_resistance is a bacterial-rRNA-specific feature and has essentially no activity in a human exonic context. The intervention is rejected by the surrounding sequence."
91+
},
92+
"random__TATA_box": {
93+
"feature_id": 4,
94+
"feature_label": "TATA_box",
95+
"suppress": {"p_base": {"A": 0.24, "C": 0.27, "G": 0.26, "T": 0.23}, "feature_activation": 0.0, "top_base": "C"},
96+
"baseline": {"p_base": {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25}, "feature_activation": 0.2, "top_base": "A"},
97+
"amplify": {"p_base": {"A": 0.41, "C": 0.21, "G": 0.18, "T": 0.20}, "feature_activation": 1.3, "top_base": "A"},
98+
"effect_size": 0.16,
99+
"narrative": "Modest effect on random sequence: amplifying TATA_box biases toward A (the most common base in TATAAT), but the surrounding context has no TATA motif so the lift is small."
100+
},
101+
"random__kanamycin_resistance": {
102+
"feature_id": 12,
103+
"feature_label": "kanamycin_resistance",
104+
"suppress": {"p_base": {"A": 0.25, "C": 0.25, "G": 0.25, "T": 0.25}, "feature_activation": 0.0, "top_base": "A"},
105+
"baseline": {"p_base": {"A": 0.26, "C": 0.24, "G": 0.25, "T": 0.25}, "feature_activation": 0.0, "top_base": "A"},
106+
"amplify": {"p_base": {"A": 0.27, "C": 0.24, "G": 0.25, "T": 0.24}, "feature_activation": 0.3, "top_base": "A"},
107+
"effect_size": 0.02,
108+
"narrative": "Null result: in a non-rRNA random control, the kanamycin_resistance feature doesn't fire and forcing it doesn't propagate. Demonstrates the SAE doesn't blindly comply with steering when the input doesn't support the feature."
109+
},
110+
"promoter__rRNA_structural": {
111+
"feature_id": 18,
112+
"feature_label": "rRNA_structural",
113+
"suppress": {"p_base": {"A": 0.60, "C": 0.12, "G": 0.07, "T": 0.21}, "feature_activation": 0.0, "top_base": "A"},
114+
"baseline": {"p_base": {"A": 0.62, "C": 0.10, "G": 0.07, "T": 0.21}, "feature_activation": 0.3, "top_base": "A"},
115+
"amplify": {"p_base": {"A": 0.65, "C": 0.10, "G": 0.06, "T": 0.19}, "feature_activation": 1.5, "top_base": "A"},
116+
"effect_size": 0.03,
117+
"narrative": "Null result: the rRNA_structural feature has minimal activity in a DNA promoter context. Probabilities essentially unchanged."
118+
},
119+
"ecoli_16s__TATA_box": {
120+
"feature_id": 4,
121+
"feature_label": "TATA_box",
122+
"suppress": {"p_base": {"A": 0.78, "C": 0.10, "G": 0.07, "T": 0.05}, "feature_activation": 0.0, "top_base": "A"},
123+
"baseline": {"p_base": {"A": 0.74, "C": 0.12, "G": 0.08, "T": 0.06}, "feature_activation": 0.4, "top_base": "A"},
124+
"amplify": {"p_base": {"A": 0.71, "C": 0.13, "G": 0.08, "T": 0.08}, "feature_activation": 1.2, "top_base": "A"},
125+
"effect_size": 0.04,
126+
"narrative": "Subtle: TATA_box is a eukaryotic promoter motif and has limited activity in a bacterial rRNA context. Small lift in T probability but not enough to compete with the wild-type A."
127+
},
128+
"ecoli_16s__alpha_helix": {
129+
"feature_id": 0,
130+
"feature_label": "alpha_helix",
131+
"suppress": {"p_base": {"A": 0.73, "C": 0.12, "G": 0.09, "T": 0.06}, "feature_activation": 0.0, "top_base": "A"},
132+
"baseline": {"p_base": {"A": 0.74, "C": 0.12, "G": 0.08, "T": 0.06}, "feature_activation": 0.1, "top_base": "A"},
133+
"amplify": {"p_base": {"A": 0.72, "C": 0.13, "G": 0.09, "T": 0.06}, "feature_activation": 0.5, "top_base": "A"},
134+
"effect_size": 0.02,
135+
"narrative": "Null result: α-helix is a protein-coding feature, not relevant in a non-coding rRNA context. The model rejects the steering signal."
136+
},
137+
"brca1_exon__exon_start": {
138+
"feature_id": 7,
139+
"feature_label": "exon_start",
140+
"suppress": {"p_base": {"A": 0.12, "C": 0.22, "G": 0.48, "T": 0.18}, "feature_activation": 0.0, "top_base": "G"},
141+
"baseline": {"p_base": {"A": 0.08, "C": 0.22, "G": 0.58, "T": 0.12}, "feature_activation": 1.6, "top_base": "G"},
142+
"amplify": {"p_base": {"A": 0.04, "C": 0.18, "G": 0.71, "T": 0.07}, "feature_activation": 4.2, "top_base": "G"},
143+
"effect_size": 0.13,
144+
"narrative": "Amplifying exon_start sharpens the model's preference for the canonical G at this codon position; suppressing it flattens toward uniform exon-like noise."
145+
},
146+
"promoter__alpha_helix": {
147+
"feature_id": 0,
148+
"feature_label": "alpha_helix",
149+
"suppress": {"p_base": {"A": 0.63, "C": 0.09, "G": 0.07, "T": 0.21}, "feature_activation": 0.0, "top_base": "A"},
150+
"baseline": {"p_base": {"A": 0.62, "C": 0.10, "G": 0.07, "T": 0.21}, "feature_activation": 0.0, "top_base": "A"},
151+
"amplify": {"p_base": {"A": 0.61, "C": 0.10, "G": 0.08, "T": 0.21}, "feature_activation": 0.3, "top_base": "A"},
152+
"effect_size": 0.02,
153+
"narrative": "Null result: α-helix is protein-coding; promoters aren't coding regions. The feature doesn't fire and steering has no traction."
154+
}
155+
}
156+
}

bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/evo2_dashboard_mockup/src/Preview.jsx

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ import React, { useState } from 'react'
22
import App from './App'
33
import ColoredSequence from './ColoredSequence'
44
import GeneUMAPView from './GeneUMAPView'
5+
import SteeringComparison from './SteeringComparison'
56

67
// Hit http://localhost:5176/#preview to see all three views side by side.
78
// Tabs switch between the existing dashboard ("Main") and the two new
@@ -30,6 +31,7 @@ const TABS = [
3031
{ id: 'main', label: 'Main dashboard (features + atlas + WebLogos)' },
3132
{ id: 'sequence', label: 'ColoredSequence (mock 500bp)' },
3233
{ id: 'genes', label: 'Gene UMAP (500 genes, precomputed)' },
34+
{ id: 'steering', label: 'Steering comparison (mock suppress/baseline/amplify)' },
3335
]
3436

3537
const styles = {
@@ -256,6 +258,18 @@ export default function Preview() {
256258
<GeneUMAPView height={620} />
257259
</div>
258260
)}
261+
262+
{tab === 'steering' && (
263+
<div style={styles.genesWrap}>
264+
<div style={styles.title}>Steering comparison</div>
265+
<div style={styles.subtitle}>
266+
Side-by-side <b>suppress / baseline / amplify</b> of a chosen SAE feature at
267+
a masked sequence position. All data is hand-rolled mock — when the real
268+
steering backend lands, the same UI swaps in live results.
269+
</div>
270+
<SteeringComparison />
271+
</div>
272+
)}
259273
</div>
260274
</div>
261275
)

0 commit comments

Comments
 (0)