@@ -12,13 +12,17 @@ use std::{
1212} ;
1313
1414use bio:: { alphabets:: dna, data_structures:: suffix_array:: SuffixArray } ;
15- use bstr:: { BString , ByteSlice } ;
15+ use bstr:: { BStr , BString , ByteSlice } ;
1616use clap:: crate_description;
1717use log:: { debug, info, trace} ;
1818use min_max_heap:: MinMaxHeap ;
1919use noodles:: {
2020 bam,
21- sam:: { self , alignment:: io:: Write as SamWrite } ,
21+ sam:: {
22+ self ,
23+ alignment:: io:: Write as SamWrite ,
24+ header:: record:: value:: map:: { Map , ReadGroup } ,
25+ } ,
2226} ;
2327use rand:: RngCore ;
2428use rayon:: prelude:: * ;
@@ -56,6 +60,7 @@ pub fn run(
5660 out_file_path : & str ,
5761 force_overwrite : bool ,
5862 alignment_parameters : & AlignmentParameters ,
63+ read_group : Option < ( BString , Map < ReadGroup > ) > ,
5964) -> Result < ( ) > {
6065 let reads_path = Path :: new ( reads_path) ;
6166 let out_file_path = Path :: new ( out_file_path) ;
@@ -97,7 +102,11 @@ pub fn run(
97102
98103 info ! ( "Map reads" ) ;
99104 let mut input_source = InputSource :: from_path ( reads_path) ?;
100- let out_header = create_bam_header ( input_source. header ( ) , & identifier_position_map) ?;
105+ let out_header = create_bam_header (
106+ input_source. header ( ) ,
107+ & identifier_position_map,
108+ read_group. clone ( ) ,
109+ ) ?;
101110 out_file. write_header ( & out_header) ?;
102111 run_inner (
103112 input_source. task_queue ( alignment_parameters. chunk_size ) ,
@@ -107,6 +116,7 @@ pub fn run(
107116 & identifier_position_map,
108117 & original_symbols,
109118 & out_header,
119+ read_group. as_ref ( ) . map ( |( id, _map) | id. as_bstr ( ) ) ,
110120 & mut out_file,
111121 ) ?;
112122
@@ -125,6 +135,7 @@ fn run_inner<S, T, W>(
125135 identifier_position_map : & FastaIdPositions ,
126136 original_symbols : & OriginalSymbols ,
127137 out_header : & sam:: Header ,
138+ read_group : Option < & BStr > ,
128139 out_file : & mut bam:: io:: Writer < W > ,
129140) -> Result < ( ) >
130141where
@@ -269,6 +280,7 @@ where
269280 original_symbols,
270281 Some ( & duration) ,
271282 alignment_parameters,
283+ read_group,
272284 & mut rng,
273285 )
274286 } ,
@@ -288,6 +300,7 @@ where
288300pub fn create_bam_header (
289301 src_header : Option < & sam:: Header > ,
290302 identifier_position_map : & FastaIdPositions ,
303+ read_group : Option < ( BString , Map < ReadGroup > ) > ,
291304) -> Result < sam:: Header > {
292305 use sam:: header:: record:: value:: map:: { self , Map } ;
293306
@@ -343,9 +356,13 @@ pub fn create_bam_header(
343356 sam_header_builder = sam_header_builder. add_comment ( comment. as_bstr ( ) ) ;
344357 }
345358
346- for ( id, read_group) in src_header. read_groups ( ) {
347- sam_header_builder =
348- sam_header_builder. add_read_group ( id. as_bstr ( ) , read_group. clone ( ) ) ;
359+ if let Some ( ( id, map) ) = read_group {
360+ sam_header_builder = sam_header_builder. add_read_group ( id, map) ;
361+ } else {
362+ for ( id, read_group) in src_header. read_groups ( ) {
363+ sam_header_builder =
364+ sam_header_builder. add_read_group ( id. as_bstr ( ) , read_group. clone ( ) ) ;
365+ }
349366 }
350367 }
351368
@@ -390,6 +407,7 @@ pub fn intervals_to_bam<R, S>(
390407 original_symbols : & OriginalSymbols ,
391408 duration : Option < & Duration > ,
392409 alignment_parameters : & AlignmentParameters ,
410+ read_group : Option < & BStr > ,
393411 rng : & mut R ,
394412) -> Result < sam:: alignment:: RecordBuf >
395413where
@@ -517,6 +535,7 @@ where
517535 duration,
518536 Some ( alternative_hits) ,
519537 original_symbols,
538+ read_group,
520539 ) ;
521540 }
522541 None => {
@@ -544,6 +563,7 @@ where
544563 duration,
545564 None ,
546565 original_symbols,
566+ read_group,
547567 )
548568}
549569
@@ -712,6 +732,7 @@ fn create_bam_record(
712732 // Contains valid content for the `YA` tag
713733 alternative_hits : Option < AlternativeAlignments > ,
714734 original_symbols : & OriginalSymbols ,
735+ read_group : Option < & BStr > ,
715736) -> Result < sam:: alignment:: RecordBuf > {
716737 let mut bam_builder = sam:: alignment:: RecordBuf :: builder ( ) ;
717738
@@ -822,9 +843,18 @@ fn create_bam_record(
822843 . into_iter ( )
823844 // Remove BWA (+ mapAD) specific auxiliary fields (avoids potential confusion)
824845 . filter ( |( tag, _v) | !tag_filter. contains ( & tag) )
846+ // Don't copy input read group if one is given via cmdline
847+ . filter ( |( tag, _v) | !( tag == b"RG" && read_group. is_some ( ) ) )
825848 . map ( |( tag, value) | Ok ( ( tag. into ( ) , value. into ( ) ) ) )
826849 . collect :: < Result < Vec < _ > > > ( ) ?;
827850
851+ if let Some ( read_group) = read_group {
852+ aux_data. push ( (
853+ sam:: alignment:: record:: data:: field:: tag:: Tag :: READ_GROUP ,
854+ sam:: alignment:: record_buf:: data:: field:: Value :: String ( read_group. to_owned ( ) ) ,
855+ ) ) ;
856+ }
857+
828858 if let Some ( hit_interval) = hit_interval {
829859 aux_data. push ( (
830860 sam:: alignment:: record:: data:: field:: tag:: Tag :: ALIGNMENT_SCORE ,
0 commit comments