Skip to content

Commit f794c31

Browse files
polinabinder1claude
andcommitted
fix(eval-labels): CodeRabbit review — splice_donor consensus + multi-parent GFF rows
- splice_donor regex now requires the terminal T (GT[AG]AGT consensus), not GT[AG]AG - euk_windows: split comma-separated GFF Parent IDs so a shared exon/CDS attaches to every transcript Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> Signed-off-by: Polina Binder <pbinder@nvidia.com>
1 parent 0c76d38 commit f794c31

2 files changed

Lines changed: 7 additions & 5 deletions

File tree

bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/scripts/euk_windows.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,11 +67,13 @@ def parse_gff(gff_path):
6767
tx_gene[tid] = a.get("Parent", "").replace("gene:", "")
6868
tx_biotype[tid] = a.get("biotype", "")
6969
elif typ == "exon":
70-
tid = a.get("Parent", "").replace("transcript:", "")
71-
tx_exon[tid].append((s, e))
70+
for tid in a.get("Parent", "").replace("transcript:", "").split(","):
71+
if tid:
72+
tx_exon[tid].append((s, e))
7273
elif typ == "CDS":
73-
tid = a.get("Parent", "").replace("transcript:", "")
74-
tx_cds[tid].append((s, e))
74+
for tid in a.get("Parent", "").replace("transcript:", "").split(","):
75+
if tid:
76+
tx_cds[tid].append((s, e))
7577
genes = {}
7678
for tid, gid in tx_gene.items():
7779
if gene_biotype.get(gid) != "protein_coding" or tx_biotype.get(tid) != "protein_coding":

bionemo-recipes/interpretability/sparse_autoencoders/recipes/evo2/scripts/labelers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ def _kozak(ctx):
225225
@labeler("splice_donor", complex=True)
226226
def _sd(ctx):
227227
# 5' donor consensus GT(A/G)AGT — mark the GT
228-
return _dna_mask(ctx, _starts(ctx.dna, r"GT[AG]AG"))
228+
return _dna_mask(ctx, _starts(ctx.dna, r"GT[AG]AGT"))
229229

230230

231231
@labeler("splice_acceptor", complex=True)

0 commit comments

Comments
 (0)