@@ -18,12 +18,13 @@ use std::io::{BufRead, BufReader};
1818const MARKER_KMER_LEN : usize = 31 ;
1919
2020/// Represents a single marker, capable of describing all variant types.
21+ /// MODIFIED: The `lineages` field is now a vector of strings to hold the hierarchy.
2122#[ derive( Debug , Clone ) ]
2223pub struct Marker {
2324 pub pos : usize ,
2425 pub ref_allele : String ,
2526 pub alt_allele : String ,
26- pub lineage : String ,
27+ pub lineages : Vec < String > , // e.g., ["L2", "L2.2", "L2.2.1"]
2728 pub alt_kmer : Vec < u8 > ,
2829 pub ref_kmer : Vec < u8 > ,
2930 pub annotations : Vec < String > ,
@@ -183,6 +184,7 @@ pub fn build_markers(ref_fasta: &str, tsv_markers: &str) -> Result<Vec<Marker>>
183184 }
184185 let fields: Vec < & str > = line_str. split ( '\t' ) . collect ( ) ;
185186 if fields. len ( ) < 4 {
187+ warn ! ( "Skipping marker line with fewer than 4 columns: {}" , line_str) ;
186188 continue ;
187189 }
188190
@@ -195,12 +197,28 @@ pub fn build_markers(ref_fasta: &str, tsv_markers: &str) -> Result<Vec<Marker>>
195197 } ;
196198 let ref_allele_str = fields[ 1 ] . to_string ( ) ;
197199 let alt_allele_str = fields[ 2 ] . to_string ( ) ;
198- let lineage = fields[ 3 ] . to_string ( ) ;
199- let annotations = if fields. len ( ) > 4 {
200- fields[ 4 ..] . iter ( ) . map ( |s| s. to_string ( ) ) . collect ( )
201- } else {
202- Vec :: new ( )
203- } ;
200+
201+ // MODIFICATION: Read multiple lineage columns
202+ let mut lineage_cols: Vec < String > = Vec :: new ( ) ;
203+ let mut annotation_cols: Vec < String > = Vec :: new ( ) ;
204+ let mut reading_lineages = true ;
205+
206+ for field in fields[ 3 ..] . iter ( ) {
207+ if field. trim ( ) . is_empty ( ) {
208+ reading_lineages = false ; // Stop reading lineages after the first empty cell
209+ continue ;
210+ }
211+ if reading_lineages {
212+ lineage_cols. push ( field. to_string ( ) ) ;
213+ } else {
214+ annotation_cols. push ( field. to_string ( ) ) ;
215+ }
216+ }
217+
218+ if lineage_cols. is_empty ( ) {
219+ warn ! ( "Skipping marker at pos {} due to no lineage information." , pos0 + 1 ) ;
220+ continue ;
221+ }
204222
205223 let ref_allele = ref_allele_str. as_bytes ( ) ;
206224 let alt_allele = alt_allele_str. as_bytes ( ) ;
@@ -214,24 +232,24 @@ pub fn build_markers(ref_fasta: &str, tsv_markers: &str) -> Result<Vec<Marker>>
214232 {
215233 markers. push ( Marker {
216234 pos : pos0,
217- lineage ,
235+ lineages : lineage_cols ,
218236 ref_allele : ref_allele_str,
219237 alt_allele : alt_allele_str,
220238 ref_kmer,
221239 alt_kmer,
222- annotations,
240+ annotations : annotation_cols ,
223241 } ) ;
224242 }
225243 } else {
226244 // --- Logic for Large Variants (SVs) ---
227245 let base_marker = Marker {
228246 pos : pos0,
229- lineage ,
230- ref_allele : ref_allele_str. clone ( ) , // FIX: Clone the string before moving
231- alt_allele : alt_allele_str. clone ( ) , // FIX: Clone the string before moving
247+ lineages : lineage_cols ,
248+ ref_allele : ref_allele_str. clone ( ) ,
249+ alt_allele : alt_allele_str. clone ( ) ,
232250 ref_kmer : Vec :: new ( ) , // Placeholder
233251 alt_kmer : Vec :: new ( ) , // Placeholder
234- annotations,
252+ annotations : annotation_cols ,
235253 } ;
236254 let sv_markers =
237255 build_large_variant_kmers ( pos0, ref_allele, alt_allele, & ref_seq, & base_marker) ;
0 commit comments