Skip to content

Commit f7194e5

Browse files
authored
fixsplit_kmer.rs
1 parent fe5b57c commit f7194e5

1 file changed

Lines changed: 31 additions & 13 deletions

File tree

src/split_kmer.rs

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,13 @@ use std::io::{BufRead, BufReader};
1818
const MARKER_KMER_LEN: usize = 31;
1919

2020
/// Represents a single marker, capable of describing all variant types.
21+
/// MODIFIED: The `lineages` field is now a vector of strings to hold the hierarchy.
2122
#[derive(Debug, Clone)]
2223
pub struct Marker {
2324
pub pos: usize,
2425
pub ref_allele: String,
2526
pub alt_allele: String,
26-
pub lineage: String,
27+
pub lineages: Vec<String>, // e.g., ["L2", "L2.2", "L2.2.1"]
2728
pub alt_kmer: Vec<u8>,
2829
pub ref_kmer: Vec<u8>,
2930
pub annotations: Vec<String>,
@@ -183,6 +184,7 @@ pub fn build_markers(ref_fasta: &str, tsv_markers: &str) -> Result<Vec<Marker>>
183184
}
184185
let fields: Vec<&str> = line_str.split('\t').collect();
185186
if fields.len() < 4 {
187+
warn!("Skipping marker line with fewer than 4 columns: {}", line_str);
186188
continue;
187189
}
188190

@@ -195,12 +197,28 @@ pub fn build_markers(ref_fasta: &str, tsv_markers: &str) -> Result<Vec<Marker>>
195197
};
196198
let ref_allele_str = fields[1].to_string();
197199
let alt_allele_str = fields[2].to_string();
198-
let lineage = fields[3].to_string();
199-
let annotations = if fields.len() > 4 {
200-
fields[4..].iter().map(|s| s.to_string()).collect()
201-
} else {
202-
Vec::new()
203-
};
200+
201+
// MODIFICATION: Read multiple lineage columns
202+
let mut lineage_cols: Vec<String> = Vec::new();
203+
let mut annotation_cols: Vec<String> = Vec::new();
204+
let mut reading_lineages = true;
205+
206+
for field in fields[3..].iter() {
207+
if field.trim().is_empty() {
208+
reading_lineages = false; // Stop reading lineages after the first empty cell
209+
continue;
210+
}
211+
if reading_lineages {
212+
lineage_cols.push(field.to_string());
213+
} else {
214+
annotation_cols.push(field.to_string());
215+
}
216+
}
217+
218+
if lineage_cols.is_empty() {
219+
warn!("Skipping marker at pos {} due to no lineage information.", pos0 + 1);
220+
continue;
221+
}
204222

205223
let ref_allele = ref_allele_str.as_bytes();
206224
let alt_allele = alt_allele_str.as_bytes();
@@ -214,24 +232,24 @@ pub fn build_markers(ref_fasta: &str, tsv_markers: &str) -> Result<Vec<Marker>>
214232
{
215233
markers.push(Marker {
216234
pos: pos0,
217-
lineage,
235+
lineages: lineage_cols,
218236
ref_allele: ref_allele_str,
219237
alt_allele: alt_allele_str,
220238
ref_kmer,
221239
alt_kmer,
222-
annotations,
240+
annotations: annotation_cols,
223241
});
224242
}
225243
} else {
226244
// --- Logic for Large Variants (SVs) ---
227245
let base_marker = Marker {
228246
pos: pos0,
229-
lineage,
230-
ref_allele: ref_allele_str.clone(), // FIX: Clone the string before moving
231-
alt_allele: alt_allele_str.clone(), // FIX: Clone the string before moving
247+
lineages: lineage_cols,
248+
ref_allele: ref_allele_str.clone(),
249+
alt_allele: alt_allele_str.clone(),
232250
ref_kmer: Vec::new(), // Placeholder
233251
alt_kmer: Vec::new(), // Placeholder
234-
annotations,
252+
annotations: annotation_cols,
235253
};
236254
let sv_markers =
237255
build_large_variant_kmers(pos0, ref_allele, alt_allele, &ref_seq, &base_marker);

0 commit comments

Comments
 (0)