@@ -4139,6 +4139,8 @@ int bam_plp_insertion(const bam_pileup1_t *p, kstring_t *ins, int *del_len) {
41394139KHASH_MAP_INIT_STR (olap_hash , lbnode_t * )
41404140typedef khash_t (olap_hash ) olap_hash_t ;
41414141
4142+ #define MAX_AHEAD 4096
4143+
41424144struct __bam_plp_t {
41434145 mempool_t * mp ;
41444146 lbnode_t * head , * tail ;
@@ -4153,6 +4155,10 @@ struct __bam_plp_t {
41534155 void * data ;
41544156 olap_hash_t * overlaps ;
41554157
4158+ uint32_t depth [MAX_AHEAD ];
4159+ uint64_t last_depth_pos ;
4160+ double depth_pos_fract ;
4161+
41564162 // For notification of creation and destruction events
41574163 // and associated client-owned pointer.
41584164 int (* plp_construct )(void * data , const bam1_t * b , bam_pileup_cd * cd );
@@ -4172,6 +4178,9 @@ bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data)
41724178 iter -> data = data ;
41734179 iter -> b = bam_init1 ();
41744180 }
4181+ memset (iter -> depth , 0 , MAX_AHEAD * sizeof (* iter -> depth ));
4182+ iter -> depth_pos_fract = 0 ; // fraction between 0 = left and 1 = right
4183+ iter -> last_depth_pos = 0 ;
41754184 return iter ;
41764185}
41774186
@@ -4494,21 +4503,60 @@ const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_
44944503
44954504int bam_plp_push (bam_plp_t iter , const bam1_t * b )
44964505{
4506+ uint64_t endpos = 0 ;
4507+
44974508 if (iter -> error ) return -1 ;
44984509 if (b ) {
44994510 if (b -> core .tid < 0 ) { overlap_remove (iter , b ); return 0 ; }
45004511 // Skip only unmapped reads here, any additional filtering must be done in iter->func
45014512 if (b -> core .flag & BAM_FUNMAP ) { overlap_remove (iter , b ); return 0 ; }
4502- if (iter -> tid == b -> core .tid && iter -> pos == b -> core .pos && iter -> mp -> cnt > iter -> maxcnt )
4503- {
4513+ if (iter -> depth_pos_fract == 0
4514+ && iter -> tid == b -> core .tid
4515+ && iter -> pos == b -> core .pos
4516+ && iter -> mp -> cnt > iter -> maxcnt ) {
4517+ // Removal from left-end depth; mp->cnt
45044518 overlap_remove (iter , b );
45054519 return 0 ;
4520+ } else if (iter -> depth_pos_fract > 0
4521+ && iter -> tid == b -> core .tid
4522+ && iter -> pos == b -> core .pos ) {
4523+ // Removal from depth somewhere else along read
4524+ endpos = bam_endpos (b );
4525+ uint32_t len = (uint32_t )(endpos - b -> core .pos - 1 );
4526+ len = len * iter -> depth_pos_fract + 0.4999 ;
4527+ if (b -> core .pos + len < iter -> last_depth_pos
4528+ && iter -> depth [(b -> core .pos + len )%MAX_AHEAD ] > iter -> maxcnt ) {
4529+ // NB this means read longer than MAX_AHEAD (after downsizing
4530+ // by depth_pos fraction) will be retained as by definition
4531+ // it'll have zero coverage that far out.
4532+ overlap_remove (iter , b );
4533+ return 0 ;
4534+ }
4535+
4536+ // Zero newly observed iter depth elemtns.
4537+ uint64_t end_clipped = endpos , p ;
4538+ if (end_clipped > iter -> pos + MAX_AHEAD )
4539+ end_clipped = iter -> pos + MAX_AHEAD ;
4540+ if (iter -> last_depth_pos < end_clipped ) {
4541+ //iter->last_depth_pos = end_clipped;
4542+ if (iter -> last_depth_pos < end_clipped - MAX_AHEAD )
4543+ iter -> last_depth_pos = end_clipped - MAX_AHEAD ;
4544+ for (p = iter -> last_depth_pos ; p < end_clipped ; p ++ )
4545+ iter -> depth [p % MAX_AHEAD ] = 0 ;
4546+ iter -> last_depth_pos = end_clipped ;
4547+ }
4548+
4549+ // Increment depth
4550+ for (p = b -> core .pos ; p < end_clipped ; p ++ )
4551+ iter -> depth [p % MAX_AHEAD ]++ ;
45064552 }
45074553 if (bam_copy1 (& iter -> tail -> b , b ) == NULL )
45084554 return -1 ;
45094555 iter -> tail -> b .id = iter -> id ++ ;
45104556 iter -> tail -> beg = b -> core .pos ;
4511- iter -> tail -> end = bam_endpos (b );
4557+ if (!endpos )
4558+ endpos = bam_endpos (b );
4559+ iter -> tail -> end = endpos ;
45124560 iter -> tail -> s = g_cstate_null ; iter -> tail -> s .end = iter -> tail -> end - 1 ; // initialize cstate_t
45134561 if (b -> core .tid < iter -> max_tid ) {
45144562 hts_log_error ("The input is not sorted (chromosomes out of order)" );
@@ -4602,6 +4650,11 @@ void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt)
46024650 iter -> maxcnt = maxcnt ;
46034651}
46044652
4653+ void bam_plp_set_cntpos (bam_plp_t iter , double end )
4654+ {
4655+ iter -> depth_pos_fract = end ;
4656+ }
4657+
46054658/************************
46064659 *** Mpileup iterator ***
46074660 ************************/
@@ -4651,6 +4704,13 @@ void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt)
46514704 iter -> iter [i ]-> maxcnt = maxcnt ;
46524705}
46534706
4707+ void bam_mplp_set_cntpos (bam_mplp_t iter , double end )
4708+ {
4709+ int i ;
4710+ for (i = 0 ; i < iter -> n ; ++ i )
4711+ iter -> iter [i ]-> depth_pos_fract = end ;
4712+ }
4713+
46544714void bam_mplp_destroy (bam_mplp_t iter )
46554715{
46564716 int i ;
0 commit comments