diff --git a/hts.c b/hts.c index 14134a01f..2687d92bc 100644 --- a/hts.c +++ b/hts.c @@ -4320,6 +4320,7 @@ int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data) int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, void *r) { void *fp; + void *readrec_data; int ret, tid, i, cr, ci; hts_pos_t beg, end; hts_reglist_t *found_reg; @@ -4332,6 +4333,8 @@ int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, void *r) fp = fd->fp.bgzf; } + readrec_data = iter->readrec_data ? iter->readrec_data : fd; + if (iter->read_rest) { if (iter->curr_off) { // seek to the start if (iter->seek(fp, iter->curr_off, SEEK_SET) < 0) { @@ -4341,7 +4344,7 @@ int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, void *r) iter->curr_off = 0; // only seek once } - ret = iter->readrec(fp, fd, r, &tid, &beg, &end); + ret = iter->readrec(fp, readrec_data, r, &tid, &beg, &end); if (ret < 0) iter->finished = 1; @@ -4425,7 +4428,7 @@ int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, void *r) // contain a few mapped reads, so scroll // forward until finding the first unmapped read. do { - ret = iter->readrec(fp, fd, r, &tid, &beg, &end); + ret = iter->readrec(fp, readrec_data, r, &tid, &beg, &end); } while (tid >= 0 && ret >=0); if (ret < 0) @@ -4531,7 +4534,7 @@ int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, void *r) } } - ret = iter->readrec(fp, fd, r, &tid, &beg, &end); + ret = iter->readrec(fp, readrec_data, r, &tid, &beg, &end); if (ret < 0) { if (iter->is_cram && cram_eof(fp)) { // Skip to end of range diff --git a/htslib.map b/htslib.map index e3cb96c31..1c3038828 100644 --- a/htslib.map +++ b/htslib.map @@ -261,6 +261,7 @@ HTSLIB_1.0 { tbx_index_build; tbx_index_load; tbx_name2id; + tbx_itr_regions; tbx_readrec; tbx_seqnames; vcf_format; diff --git a/htslib/hts.h b/htslib/hts.h index 861bffa25..18103716f 100644 --- a/htslib/hts.h +++ b/htslib/hts.h @@ -886,6 +886,7 @@ typedef struct hts_itr_t { hts_readrec_func *readrec; hts_seek_func *seek; hts_tell_func *tell; + void *readrec_data; // If set, passed to readrec instead of fd struct { int n, m; int *a; diff --git a/htslib/tbx.h b/htslib/tbx.h index f4b5bd856..c0bb4d7e2 100644 --- a/htslib/tbx.h +++ b/htslib/tbx.h @@ -61,10 +61,25 @@ extern const tbx_conf_t tbx_conf_gff, tbx_conf_bed, tbx_conf_psltbl, tbx_conf_sa #define tbx_itr_querys(tbx, s) hts_itr_querys((tbx)->idx, (s), (hts_name2id_f)(tbx_name2id), (tbx), hts_itr_query, tbx_readrec) #define tbx_itr_next(htsfp, tbx, itr, r) hts_itr_next(hts_get_bgzfp(htsfp), (itr), (r), (tbx)) #define tbx_bgzf_itr_next(bgzfp, tbx, itr, r) hts_itr_next((bgzfp), (itr), (r), (tbx)) + #define tbx_itr_multi_next(htsfp, itr, r) hts_itr_multi_next((htsfp), (itr), (r)) HTSLIB_EXPORT int tbx_name2id(tbx_t *tbx, const char *ss); +/// Create a multi-region iterator for a tabix-indexed file +/** @param tbx Tabix index + @param reglist Region list created by hts_reglist_create() + @param regcount Number of regions in the list + @return An iterator, or NULL on failure + + The multi-region iterator handles overlapping regions by deduplicating + records that fall in multiple regions, so each record is returned at + most once. Free the iterator with tbx_itr_destroy() when done. + Use tbx_itr_multi_next() to advance the iterator. +*/ + HTSLIB_EXPORT + hts_itr_t *tbx_itr_regions(tbx_t *tbx, hts_reglist_t *reglist, int regcount); + /* Internal helper function used by tbx_itr_next() */ HTSLIB_EXPORT BGZF *hts_get_bgzfp(htsFile *fp); diff --git a/tbx.c b/tbx.c index dfb9c6313..c0e1a1ac7 100644 --- a/tbx.c +++ b/tbx.c @@ -640,3 +640,35 @@ const char **tbx_seqnames(tbx_t *tbx, int *n) return names; } +static int tbx_pseek(void *fp, int64_t offset, int whence) +{ + BGZF *fd = (BGZF *)fp; + return bgzf_seek(fd, offset, whence); +} + +static int64_t tbx_ptell(void *fp) +{ + BGZF *fd = (BGZF *)fp; + if (!fd) + return -1L; + return bgzf_tell(fd); +} + +hts_itr_t *tbx_itr_regions(tbx_t *tbx, hts_reglist_t *reglist, int regcount) +{ + hts_itr_t *itr; + + if (!tbx || !reglist) + return NULL; + + itr = hts_itr_regions(tbx->idx, reglist, regcount, + (hts_name2id_f)(tbx_name2id), tbx, + hts_itr_multi_bam, tbx_readrec, + tbx_pseek, tbx_ptell); + + if (itr) + itr->readrec_data = tbx; + + return itr; +} +