@@ -31,9 +31,10 @@ typedef struct reglist
3131{
3232 uint32_t n , m ;
3333 uint64_t * a ;
34+ int tid ;
3435} reglist_t ;
3536
36- KHASH_MAP_INIT_STR (reg , reglist_t )
37+ KHASH_MAP_INIT_INT (reg , reglist_t )
3738typedef kh_reg_t reghash_t ;
3839
3940static int compare_uint64 (const void * a , const void * b )
@@ -52,7 +53,7 @@ static void reg_print(reghash_t *h) {
5253 reglist_t * p ;
5354 khint_t k ;
5455 uint32_t i ;
55- const char * reg ;
56+ khint32_t key ;
5657 uint32_t beg , end ;
5758
5859 if (!h ) {
@@ -61,16 +62,16 @@ static void reg_print(reghash_t *h) {
6162 }
6263 for (k = kh_begin (h ); k < kh_end (h ); k ++ ) {
6364 if (kh_exist (h ,k )) {
64- reg = kh_key (h ,k );
65- fprintf (stderr , "Region: '%s' \n" , reg );
65+ key = kh_key (h ,k );
66+ fprintf (stderr , "Region: key %u tid %d \n" , key , p -> tid );
6667 if ((p = & kh_val (h ,k )) != NULL && p -> n > 0 ) {
6768 for (i = 0 ; i < p -> n ; i ++ ) {
6869 beg = (uint32_t )(p -> a [i ]>>32 );
6970 end = (uint32_t )(p -> a [i ]);
7071 fprintf (stderr , "\tinterval[%d]: %d-%d\n" , i , beg , end );
7172 }
7273 } else {
73- fprintf (stderr , "Region '%s' has no intervals!\n" , reg );
74+ fprintf (stderr , "Region key %u has no intervals!\n" , key );
7475 }
7576 }
7677 }
@@ -109,7 +110,7 @@ static int reg_compact(reghash_t *h) {
109110 return count ;
110111}
111112
112- static int reg_insert (reghash_t * h , char * reg , unsigned int beg , unsigned int end ) {
113+ static int reg_insert (reghash_t * h , int tid , unsigned int beg , unsigned int end ) {
113114
114115 khint_t k ;
115116 reglist_t * p ;
@@ -118,17 +119,15 @@ static int reg_insert(reghash_t *h, char *reg, unsigned int beg, unsigned int en
118119 return -1 ;
119120
120121 // Put reg in the hash table if not already there
121- k = kh_get (reg , h , reg ); //looks strange, but only the second reg is the actual region name.
122+ k = kh_get (reg , h , tid );
122123 if (k == kh_end (h )) { // absent from the hash table
123124 int ret ;
124- char * s = strdup (reg );
125- if (NULL == s ) return -1 ;
126- k = kh_put (reg , h , s , & ret );
125+ k = kh_put (reg , h , tid , & ret );
127126 if (-1 == ret ) {
128- free (s );
129127 return -1 ;
130128 }
131129 memset (& kh_val (h , k ), 0 , sizeof (reglist_t ));
130+ kh_val (h , k ).tid = tid ;
132131 }
133132 p = & kh_val (h , k );
134133
@@ -156,7 +155,6 @@ static void reg_destroy(reghash_t *h) {
156155 for (k = 0 ; k < kh_end (h ); ++ k ) {
157156 if (kh_exist (h , k )) {
158157 free (kh_val (h , k ).a );
159- free ((char * )kh_key (h , k ));
160158 }
161159 }
162160 kh_destroy (reg , h );
@@ -175,11 +173,10 @@ hts_reglist_t *hts_reglist_create(char **argv, int argc, int *r_count, void *hdr
175173 hts_reglist_t * h_reglist = NULL ;
176174
177175 khint_t k ;
178- int i , l_count = 0 ;
176+ int i , l_count = 0 , tid ;
179177 uint32_t j ;
180- char reg [1024 ];
181178 const char * q ;
182- int beg , end ;
179+ int64_t beg , end ;
183180
184181 /* First, transform the char array into a hash table */
185182 h = kh_init (reg );
@@ -189,65 +186,56 @@ hts_reglist_t *hts_reglist_create(char **argv, int argc, int *r_count, void *hdr
189186 }
190187
191188 for (i = 0 ; i < argc ; i ++ ) {
192- q = hts_parse_reg (argv [i ], & beg , & end );
193- if (q ) {
194- if (q - argv [i ] > sizeof (reg ) - 1 ) {
195- hts_log_error ("Region name '%s' is too long (bigger than %d)" , argv [i ], (int ) sizeof (reg ) - 1 );
196- continue ;
197- }
198- memcpy (reg , argv [i ], q - argv [i ]);
199- reg [q - argv [i ]] = 0 ;
189+ if (!strcmp (argv [i ], "." )) {
190+ q = argv [i ] + 1 ;
191+ tid = HTS_IDX_START ; beg = 0 ; end = INT64_MAX ;
192+ } else if (!strcmp (argv [i ], "*" )) {
193+ q = argv [i ] + 1 ;
194+ tid = HTS_IDX_NOCOOR ; beg = 0 ; end = INT64_MAX ;
200195 } else {
201- // not parsable as a region, but possibly a sequence named "foo:a"
202- if (strlen (argv [i ]) > sizeof (reg ) - 1 ) {
203- hts_log_error ("Region name '%s' is too long (bigger than %d)" , argv [i ], (int ) sizeof (reg ) - 1 );
204- continue ;
205- }
206- strcpy (reg , argv [i ]);
207- beg = 0 ; end = INT_MAX ;
196+ q = hts_parse_region (argv [i ], & tid , & beg , & end , getid , hdr ,
197+ HTS_PARSE_THOUSANDS_SEP );
208198 }
199+ if (!q ) {
200+ // not parsable as a region
201+ hts_log_warning ("Region '%s' specifies an unknown reference name. Continue anyway" , argv [i ]);
202+ continue ;
203+ }
204+
205+ if (beg > INT_MAX ) beg = INT_MAX ; // Remove when fully 64-bit compliant
206+ if (end > INT_MAX ) end = INT_MAX ; // Remove when fully 64-bit compliant
209207
210- if (reg_insert (h , reg , beg , end ) != 0 ) {
208+ if (reg_insert (h , tid , beg , end ) != 0 ) {
211209 hts_log_error ("Error when inserting region='%s' in the bed hash table at address=%p" , argv [i ], (void * ) h );
212210 goto fail ;
213211 }
214212 }
215213
216214 * r_count = reg_compact (h );
217215 if (!* r_count )
218- return NULL ;
216+ goto fail ;
219217
220218 /* Transform the hash table into a list */
221219 h_reglist = (hts_reglist_t * )calloc (* r_count , sizeof (hts_reglist_t ));
222220 if (!h_reglist )
223- return NULL ;
221+ goto fail ;
224222
225223 for (k = kh_begin (h ); k < kh_end (h ) && l_count < * r_count ; k ++ ) {
226224 if (!kh_exist (h ,k ) || !(p = & kh_val (h ,k )))
227225 continue ;
228226
229- char * reg_name = (char * )kh_key (h ,k );
230- if (!strcmp (reg_name , "." )) {
231- h_reglist [l_count ].tid = HTS_IDX_START ;
232- } else if (!strcmp (reg_name , "*" )) {
233- h_reglist [l_count ].tid = HTS_IDX_NOCOOR ;
234- } else {
235- h_reglist [l_count ].tid = getid (hdr , reg_name );
236- if (h_reglist [l_count ].tid < 0 )
237- hts_log_warning ("Region '%s' specifies an unknown reference name. Continue anyway" , reg_name );
238- }
239-
240- h_reglist [l_count ].intervals = (hts_pair32_t * )calloc (p -> n , sizeof (hts_pair32_t ));
227+ h_reglist [l_count ].tid = p -> tid ;
228+ h_reglist [l_count ].intervals = calloc (p -> n , sizeof (h_reglist [l_count ].intervals [0 ]));
241229 if (!(h_reglist [l_count ].intervals )) {
242- hts_log_error ("Could not allocate memory for intervals for region='%s'" , kh_key ( h , k ) );
230+ hts_log_error ("Could not allocate memory for intervals" );
243231 goto fail ;
244232 }
245233 h_reglist [l_count ].count = p -> n ;
246234 h_reglist [l_count ].max_end = 0 ;
247235
248236 for (j = 0 ; j < p -> n ; j ++ ) {
249237 h_reglist [l_count ].intervals [j ].beg = (uint32_t )(p -> a [j ]>>32 );
250- h_reglist [l_count ].intervals [j ].end = (uint32_t )(p -> a [j ]);
238+ h_reglist [l_count ].intervals [j ].end = (uint32_t )(p -> a [j ] & 0xffffffffU );
251239
252240 if (h_reglist [l_count ].intervals [j ].end > h_reglist [l_count ].max_end )
253241 h_reglist [l_count ].max_end = h_reglist [l_count ].intervals [j ].end ;
0 commit comments