@@ -1502,17 +1502,20 @@ int next_cigar_op(uint32_t *cigar, uint32_t ncigar, int *skip, int *spos,
15021502
15031503// Ensure ref and hist are large enough.
15041504static inline int extend_ref (char * * ref , uint32_t (* * hist )[5 ], hts_pos_t pos ,
1505- hts_pos_t ref_start , hts_pos_t * ref_end ) {
1505+ hts_pos_t ref_start , hts_pos_t * ref_end ,
1506+ hts_pos_t * ref_end_alloc ) {
1507+ if (* ref_end < pos )
1508+ * ref_end = pos ;
15061509 if (pos < ref_start )
15071510 return -1 ;
1508- if (pos < * ref_end )
1511+ if (pos < * ref_end_alloc )
15091512 return 0 ;
15101513
15111514 // realloc
15121515 if (pos - ref_start > UINT_MAX )
15131516 return -2 ; // protect overflow in new_end calculation
15141517
1515- hts_pos_t old_end = * ref_end ? * ref_end : ref_start ;
1518+ hts_pos_t old_end = * ref_end_alloc ? * ref_end_alloc : ref_start ;
15161519 hts_pos_t new_end = ref_start + 1000 + (pos - ref_start )* 1.5 ;
15171520
15181521 // Refuse to work on excessively large blocks.
@@ -1531,7 +1534,7 @@ static inline int extend_ref(char **ref, uint32_t (**hist)[5], hts_pos_t pos,
15311534 if (!tmp5 )
15321535 return -1 ;
15331536 * hist = tmp5 ;
1534- * ref_end = new_end ;
1537+ * ref_end_alloc = new_end ;
15351538
15361539 // initialise
15371540 old_end -= ref_start ;
@@ -1546,7 +1549,7 @@ static inline int extend_ref(char **ref, uint32_t (**hist)[5], hts_pos_t pos,
15461549// Returns 1 on success, <0 on failure
15471550static int cram_add_to_ref_MD (bam1_t * b , char * * ref , uint32_t (* * hist )[5 ],
15481551 hts_pos_t ref_start , hts_pos_t * ref_end ,
1549- const uint8_t * MD ) {
1552+ hts_pos_t * ref_end_alloc , const uint8_t * MD ) {
15501553 uint8_t * seq = bam_get_seq (b );
15511554 uint32_t * cigar = bam_get_cigar (b );
15521555 uint32_t ncigar = b -> core .n_cigar ;
@@ -1558,7 +1561,7 @@ static int cram_add_to_ref_MD(bam1_t *b, char **ref, uint32_t (**hist)[5],
15581561
15591562 // No sequence means extend based on CIGAR instead
15601563 if (!b -> core .l_qseq && extend_ref (ref , hist , rseq_end ,
1561- ref_start , ref_end ) < 0 )
1564+ ref_start , ref_end , ref_end_alloc ) < 0 )
15621565 return -1 ;
15631566
15641567 int iseq = 0 , next_op ;
@@ -1573,7 +1576,7 @@ static int cram_add_to_ref_MD(bam1_t *b, char **ref, uint32_t (**hist)[5],
15731576 int len = hts_str2uint ((char * )MD , (char * * )& MD , 31 , & overflow );
15741577 if (overflow ||
15751578 extend_ref (ref , hist , iref + ref_start + len ,
1576- ref_start , ref_end ) < 0 )
1579+ ref_start , ref_end , ref_end_alloc ) < 0 )
15771580 return -1 ;
15781581 while (iseq < b -> core .l_qseq && len ) {
15791582 // rewrite to have internal loops?
@@ -1605,7 +1608,7 @@ static int cram_add_to_ref_MD(bam1_t *b, char **ref, uint32_t (**hist)[5],
16051608 MD ++ ;
16061609 while (isalpha (* MD )) {
16071610 if (extend_ref (ref , hist , iref + ref_start , ref_start ,
1608- ref_end ) < 0 )
1611+ ref_end , ref_end_alloc ) < 0 )
16091612 return -1 ;
16101613 if ((next_op = next_cigar_op (cigar , ncigar , cig_skip ,
16111614 & iseq , & cig_ind , & cig_op ,
@@ -1621,7 +1624,8 @@ static int cram_add_to_ref_MD(bam1_t *b, char **ref, uint32_t (**hist)[5],
16211624 }
16221625 } else {
16231626 // substitution
1624- if (extend_ref (ref , hist , iref + ref_start , ref_start , ref_end ) < 0 )
1627+ if (extend_ref (ref , hist , iref + ref_start , ref_start , ref_end ,
1628+ ref_end_alloc ) < 0 )
16251629 return -1 ;
16261630 if ((next_op = next_cigar_op (cigar , ncigar , cig_skip ,
16271631 & iseq , & cig_ind , & cig_op ,
@@ -1650,12 +1654,14 @@ static int cram_add_to_ref_MD(bam1_t *b, char **ref, uint32_t (**hist)[5],
16501654// Returns >=0 on success,
16511655// -1 on failure (eg inconsistent data)
16521656static int cram_add_to_ref (bam1_t * b , char * * ref , uint32_t (* * hist )[5 ],
1653- hts_pos_t ref_start , hts_pos_t * ref_end ) {
1657+ hts_pos_t ref_start , hts_pos_t * ref_end ,
1658+ hts_pos_t * ref_end_alloc ) {
16541659 const uint8_t * MD = bam_aux_get (b , "MD" );
16551660 int ret = 0 ;
16561661 if (MD && * MD == 'Z' ) {
16571662 // We can use MD to directly compute the reference
1658- int ret = cram_add_to_ref_MD (b , ref , hist , ref_start , ref_end , MD + 1 );
1663+ int ret = cram_add_to_ref_MD (b , ref , hist , ref_start , ref_end ,
1664+ ref_end_alloc , MD + 1 );
16591665
16601666 if (ret > 0 )
16611667 return ret ;
@@ -1683,7 +1689,7 @@ static int cram_add_to_ref(bam1_t *b, char **ref, uint32_t (**hist)[5],
16831689 static uint8_t L16 [16 ] = {4 ,0 ,1 ,4 , 2 ,4 ,4 ,4 , 3 ,4 ,4 ,4 , 4 ,4 ,4 ,4 };
16841690
16851691 if (extend_ref (ref , hist , iref + ref_start + len ,
1686- ref_start , ref_end ) < 0 )
1692+ ref_start , ref_end , ref_end_alloc ) < 0 )
16871693 return -1 ;
16881694 if (iseq + len <= b -> core .l_qseq ) {
16891695 // Nullify failed MD:Z if appropriate
@@ -1726,15 +1732,16 @@ static int cram_generate_reference(cram_container *c, cram_slice *s, int r1) {
17261732 // user told us to do embed_ref=2.
17271733 char * ref = NULL ;
17281734 uint32_t (* hist )[5 ] = NULL ;
1729- hts_pos_t ref_start = c -> bams [r1 ]-> core .pos , ref_end = 0 ;
1735+ hts_pos_t ref_start = c -> bams [r1 ]-> core .pos , ref_end = 0 ,
1736+ ref_end_alloc = 0 ;
17301737 if (ref_start < 0 )
17311738 return -1 ; // cannot build consensus from unmapped data
17321739
17331740 // initial allocation
17341741 if (extend_ref (& ref , & hist ,
17351742 c -> bams [r1 + s -> hdr -> num_records - 1 ]-> core .pos +
17361743 c -> bams [r1 + s -> hdr -> num_records - 1 ]-> core .l_qseq ,
1737- ref_start , & ref_end ) < 0 )
1744+ ref_start , & ref_end , & ref_end_alloc ) < 0 )
17381745 return -1 ;
17391746
17401747 // Add each bam file to the reference/consensus arrays
@@ -1746,7 +1753,8 @@ static int cram_generate_reference(cram_container *c, cram_slice *s, int r1) {
17461753 goto err ;
17471754 }
17481755 last_pos = c -> bams [r1 ]-> core .pos ;
1749- if (cram_add_to_ref (c -> bams [r1 ], & ref , & hist , ref_start , & ref_end ) < 0 )
1756+ if (cram_add_to_ref (c -> bams [r1 ], & ref , & hist , ref_start , & ref_end ,
1757+ & ref_end_alloc ) < 0 )
17501758 goto err ;
17511759 }
17521760
@@ -1770,6 +1778,7 @@ static int cram_generate_reference(cram_container *c, cram_slice *s, int r1) {
17701778 c -> ref_start = ref_start + 1 ;
17711779 c -> ref_end = ref_end + 1 ;
17721780 c -> ref_free = 1 ;
1781+
17731782 return 0 ;
17741783
17751784 err :
@@ -2012,8 +2021,10 @@ int cram_encode_container(cram_fd *fd, cram_container *c) {
20122021 pthread_mutex_unlock (& fd -> ref_lock );
20132022 }
20142023
2015- if (c -> ref_end > fd -> refs -> ref_id [c -> ref_id ]-> LN_length )
2016- c -> ref_end = fd -> refs -> ref_id [c -> ref_id ]-> LN_length ;
2024+ hts_pos_t rlen = MAX (fd -> refs -> ref_id [c -> ref_id ]-> LN_length ,
2025+ fd -> refs -> ref_id [c -> ref_id ]-> length );
2026+ if (c -> ref_end > rlen && rlen )
2027+ c -> ref_end = rlen ;
20172028 }
20182029
20192030 // Iterate through records creating the cram blocks for some
@@ -3927,7 +3938,8 @@ static int process_one_read(cram_fd *fd, cram_container *c,
39273938 if (p -> cram_flags & CRAM_FLAG_STATS_ADDED ) {
39283939 cram_stats_del (c -> stats [DS_NP ], p -> mate_pos );
39293940 cram_stats_del (c -> stats [DS_MF ], p -> mate_flags );
3930- if (!(p -> cram_flags & CRAM_FLAG_EXPLICIT_TLEN ))
3941+ if (!(p -> cram_flags & CRAM_FLAG_EXPLICIT_TLEN )
3942+ && !explicit_tlen )
39313943 cram_stats_del (c -> stats [DS_TS ], p -> tlen );
39323944 cram_stats_del (c -> stats [DS_NS ], p -> mate_ref_id );
39333945 }
0 commit comments