@@ -2514,8 +2514,13 @@ long long hts_parse_decimal(const char *str, char **strend, int flags)
25142514 * This is necessary due to GRCh38 HLA additions which have reference names
25152515 * like "HLA-DRB1*12:17".
25162516 *
2517- * On success the end of the reference is returned (colon or end of string).
2518- * On failure NULL is returned, and if tid/getid are supplied *tid will be -1.
2517+ * getid is optional and may be passed in as NULL. If given it is used to
2518+ * validate the reference name exists and is unambiguously parseable. If not
2519+ * given the best guess will be made but no has guarantees in validity.
2520+ *
2521+ * On success the end of the reference is returned (colon or end of string)
2522+ * beg/end will be set, plus tid if getid has been supplied.
2523+ * On failure NULL is returned.
25192524 */
25202525const char * hts_parse_reg2 (const char * s , int * tid , int * beg , int * end ,
25212526 hts_name2id_f getid , void * hdr )
@@ -2525,18 +2530,37 @@ const char *hts_parse_reg2(const char *s, int *tid, int *beg, int *end,
25252530 int tid_ , s_len = strlen (s ); // int is sufficient given beg/end types
25262531 if (!tid ) tid = & tid_ ; // simplifies code below
25272532
2533+ // No colon implies entirety of the reference
25282534 const char * colon = strrchr (s , ':' );
25292535 if (colon == NULL ) {
25302536 * beg = 0 ; * end = INT_MAX ;
25312537 * tid = getid ? getid (hdr , s ) : 0 ;
25322538 return * tid >= 0 ? s + s_len : NULL ;
25332539 }
25342540
2535- // Has a colon, but check whole name first
2541+ // Has a colon, but check whole name first.
25362542 if (getid ) {
25372543 * beg = 0 ; * end = INT_MAX ;
2538- if ((* tid = getid (hdr , s )) >= 0 )
2544+ if ((* tid = getid (hdr , s )) >= 0 ) {
2545+ // Entire name matches, but also check this isn't
2546+ // ambiguous. eg we have ref chr1 and ref chr1:100-200
2547+ // both present.
2548+ kstring_t ks = { 0 , 0 , NULL };
2549+ kputsn (s , colon - s , & ks ); // convert to nul terminated string
2550+ if (!ks .s ) {
2551+ * tid = -1 ;
2552+ return NULL ;
2553+ }
2554+ if (getid (hdr , ks .s ) >= 0 ) {
2555+ free (ks .s );
2556+ * tid = -1 ;
2557+ hts_log_error ("Range %s is ambiguous" , s );
2558+ return NULL ;
2559+ }
2560+ free (ks .s );
2561+
25392562 return s + s_len ;
2563+ }
25402564 }
25412565
25422566 char * hyphen ;
@@ -2557,6 +2581,8 @@ const char *hts_parse_reg2(const char *s, int *tid, int *beg, int *end,
25572581 }
25582582 * tid = getid (hdr , ks .s );
25592583 free (ks .s );
2584+ if (* tid < 0 )
2585+ return NULL ;
25602586 } else {
25612587 * tid = 0 ;
25622588 }
@@ -2577,14 +2603,9 @@ hts_itr_t *hts_itr_querys(const hts_idx_t *idx, const char *reg, hts_name2id_f g
25772603 else if (strcmp (reg , "*" ) == 0 )
25782604 return itr_query (idx , HTS_IDX_NOCOOR , 0 , 0 , readrec );
25792605
2580- if ((tid = getid (hdr , reg )) >= 0 ) {
2581- beg = 0 ; end = INT_MAX ;
2582- return itr_query (idx , tid , beg , end , readrec );
2583- }
2584-
2585- hts_parse_reg2 (reg , & tid , & beg , & end , getid , hdr );
2606+ if (!hts_parse_reg2 (reg , & tid , & beg , & end , getid , hdr ))
2607+ return NULL ;
25862608
2587- if (tid < 0 ) return NULL ;
25882609 return itr_query (idx , tid , beg , end , readrec );
25892610}
25902611
0 commit comments