Skip to content

Commit 79cd218

Browse files
committed
Added support for brace-quoting of reference names.
Eg with contigs named "chr1" and "chr1:100-200" we can specify "{chr1}:100-200" and "{chr1:100-200}" to disambiguate.
1 parent 8d7e222 commit 79cd218

1 file changed

Lines changed: 41 additions & 6 deletions

File tree

hts.c

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2518,6 +2518,10 @@ long long hts_parse_decimal(const char *str, char **strend, int flags)
25182518
* validate the reference name exists and is unambiguously parseable. If not
25192519
* given the best guess will be made but no has guarantees in validity.
25202520
*
2521+
* To work around these issues quoting is also permitted via {ref}:start-end.
2522+
* In this case, the return value will point to '}' and not the end of the
2523+
* reference (but this is a useful indication that it started with '{').
2524+
*
25212525
* On success the end of the reference is returned (colon or end of string)
25222526
* beg/end will be set, plus tid if getid has been supplied.
25232527
* On failure NULL is returned.
@@ -2530,16 +2534,45 @@ const char *hts_parse_reg2(const char *s, int *tid, int *beg, int *end,
25302534
int tid_, s_len = strlen(s); // int is sufficient given beg/end types
25312535
if (!tid) tid = &tid_; // simplifies code below
25322536

2533-
// No colon implies entirety of the reference
2534-
const char *colon = strrchr(s, ':');
2537+
const char *colon = NULL;
2538+
int quoted = 0;
2539+
2540+
// Braced quoting of references is permitted to resolve ambiguities.
2541+
if (*s == '{') {
2542+
const char *close = strrchr(s, '}');
2543+
if (!close) {
2544+
hts_log_error("Mismatching braces in \"%s\"", s);
2545+
return NULL;
2546+
}
2547+
s++;
2548+
s_len--;
2549+
if (close[1] == ':')
2550+
colon = close+1;
2551+
quoted = 1; // number of trailing characters to trim
2552+
} else {
2553+
colon = strrchr(s, ':');
2554+
}
2555+
25352556
if (colon == NULL) {
25362557
*beg = 0; *end = INT_MAX;
2537-
*tid = getid ? getid(hdr, s) : 0;
2558+
if (getid) {
2559+
kstring_t ks = { 0, 0, NULL };
2560+
kputsn(s, s_len-quoted, &ks); // convert to nul terminated string
2561+
if (!ks.s) {
2562+
*tid = -1;
2563+
return NULL;
2564+
}
2565+
2566+
*tid = getid(hdr, ks.s);
2567+
free(ks.s);
2568+
} else {
2569+
*tid = 0;
2570+
}
25382571
return *tid >= 0 ? s + s_len : NULL;
25392572
}
25402573

25412574
// Has a colon, but check whole name first.
2542-
if (getid) {
2575+
if (!quoted && getid) {
25432576
*beg = 0; *end = INT_MAX;
25442577
if ((*tid = getid(hdr, s)) >= 0) {
25452578
// Entire name matches, but also check this isn't
@@ -2554,7 +2587,9 @@ const char *hts_parse_reg2(const char *s, int *tid, int *beg, int *end,
25542587
if (getid(hdr, ks.s) >= 0) {
25552588
free(ks.s);
25562589
*tid = -1;
2557-
hts_log_error("Range %s is ambiguous", s);
2590+
hts_log_error("Range is ambiguous. "
2591+
"Use {%s} or {%.*s}%s instead",
2592+
s, (int)(colon-s), s, colon);
25582593
return NULL;
25592594
}
25602595
free(ks.s);
@@ -2574,7 +2609,7 @@ const char *hts_parse_reg2(const char *s, int *tid, int *beg, int *end,
25742609
if (*beg >= *end) return NULL;
25752610
if (getid) {
25762611
kstring_t ks = { 0, 0, NULL };
2577-
kputsn(s, colon-s, &ks); // convert to nul terminated string
2612+
kputsn(s, colon-s-quoted, &ks); // convert to nul terminated string
25782613
if (!ks.s) {
25792614
*tid = -1;
25802615
return NULL;

0 commit comments

Comments
 (0)