Skip to content

Commit 15a4091

Browse files
jmarshallpd3
authored andcommitted
Portability improvements for vcfstats.c
Use htslib/hts_endian.h functions to avoid problems with unaligned access, and to ensure numbers are byteswapped on platforms that need it. Signed-off-by: John Marshall <jmarshall@hey.com>
1 parent 573950f commit 15a4091

2 files changed

Lines changed: 35 additions & 32 deletions

File tree

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ vcfcnv.o: vcfcnv.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_kstrin
267267
vcfhead.o: vcfhead.c $(htslib_kstring_h) $(htslib_vcf_h) $(bcftools_h)
268268
vcfsom.o: vcfsom.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_hts_os_h) $(htslib_hts_defs_h) $(bcftools_h)
269269
vcfsort.o: vcfsort.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_hts_os_h) $(htslib_hts_defs_h) $(htslib_bgzf_h) kheap.h $(bcftools_h)
270-
vcfstats.o: vcfstats.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) $(bcftools_h) $(filter_h) bin.h dist.h
270+
vcfstats.o: vcfstats.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(htslib_faidx_h) $(htslib_hts_endian_h) $(bcftools_h) $(filter_h) bin.h dist.h
271271
vcfview.o: vcfview.c $(htslib_vcf_h) $(htslib_synced_bcf_reader_h) $(htslib_vcfutils_h) $(bcftools_h) $(filter_h) $(htslib_khash_str2int_h) $(htslib_kbitset_h)
272272
reheader.o: reheader.c $(htslib_vcf_h) $(htslib_bgzf_h) $(htslib_tbx_h) $(htslib_kseq_h) $(htslib_thread_pool_h) $(htslib_faidx_h) $(htslib_khash_str2int_h) $(bcftools_h) $(khash_str2str_h)
273273
tabix.o: tabix.c $(htslib_bgzf_h) $(htslib_tbx_h)

vcfstats.c

Lines changed: 34 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ THE SOFTWARE. */
3737
#include <htslib/synced_bcf_reader.h>
3838
#include <htslib/vcfutils.h>
3939
#include <htslib/faidx.h>
40+
#include <htslib/hts_endian.h>
4041
#include <inttypes.h>
4142
#include "bcftools.h"
4243
#include "filter.h"
@@ -899,36 +900,37 @@ static inline int get_ad(bcf1_t *line, bcf_fmt_t *ad_fmt_ptr, int ismpl, int *ia
899900
{
900901
int iv, ad = 0;
901902
*ial = 0;
902-
#define BRANCH_INT(type_t,missing,vector_end) { \
903-
type_t *ptr = (type_t *) (ad_fmt_ptr->p + ad_fmt_ptr->size*ismpl); \
903+
#define BRANCH_INT(type_t,convert,missing,vector_end) { \
904+
uint8_t *x = ad_fmt_ptr->p + ad_fmt_ptr->size*ismpl; \
904905
for (iv=1; iv<ad_fmt_ptr->n && iv<line->n_allele; iv++) \
905906
{ \
906-
if ( ptr[iv]==vector_end ) break; \
907-
if ( ptr[iv]==missing ) continue; \
908-
if ( ad < ptr[iv] ) { ad = ptr[iv]; *ial = iv; }\
907+
type_t val = convert(&x[iv * sizeof(type_t)]); \
908+
if ( val==vector_end ) break; \
909+
if ( val==missing ) continue; \
910+
if ( ad < val ) { ad = val; *ial = iv; }\
909911
} \
910912
}
911913
switch (ad_fmt_ptr->type) {
912-
case BCF_BT_INT8: BRANCH_INT(int8_t, bcf_int8_missing, bcf_int8_vector_end); break;
913-
case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_missing, bcf_int16_vector_end); break;
914-
case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_missing, bcf_int32_vector_end); break;
914+
case BCF_BT_INT8: BRANCH_INT(int8_t, le_to_i8, bcf_int8_missing, bcf_int8_vector_end); break;
915+
case BCF_BT_INT16: BRANCH_INT(int16_t, le_to_i16, bcf_int16_missing, bcf_int16_vector_end); break;
916+
case BCF_BT_INT32: BRANCH_INT(int32_t, le_to_i32, bcf_int32_missing, bcf_int32_vector_end); break;
915917
default: fprintf(stderr, "[E::%s] todo: %d\n", __func__, ad_fmt_ptr->type); exit(1); break;
916918
}
917919
#undef BRANCH_INT
918920
return ad;
919921
}
920922
static inline int get_iad(bcf1_t *line, bcf_fmt_t *ad_fmt_ptr, int ismpl, int ial)
921923
{
922-
#define BRANCH_INT(type_t,missing,vector_end) { \
923-
type_t *ptr = (type_t *) (ad_fmt_ptr->p + ad_fmt_ptr->size*ismpl); \
924-
if ( ptr[ial]==vector_end ) return 0; \
925-
if ( ptr[ial]==missing ) return 0; \
926-
return ptr[ial]; \
924+
#define BRANCH_INT(type_t,convert,missing,vector_end) { \
925+
type_t val = convert(ad_fmt_ptr->p + ad_fmt_ptr->size*ismpl + ial*sizeof(type_t)); \
926+
if ( val==vector_end ) return 0; \
927+
if ( val==missing ) return 0; \
928+
return val; \
927929
}
928930
switch (ad_fmt_ptr->type) {
929-
case BCF_BT_INT8: BRANCH_INT(int8_t, bcf_int8_missing, bcf_int8_vector_end); break;
930-
case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_missing, bcf_int16_vector_end); break;
931-
case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_missing, bcf_int32_vector_end); break;
931+
case BCF_BT_INT8: BRANCH_INT(int8_t, le_to_i8, bcf_int8_missing, bcf_int8_vector_end); break;
932+
case BCF_BT_INT16: BRANCH_INT(int16_t, le_to_i16, bcf_int16_missing, bcf_int16_vector_end); break;
933+
case BCF_BT_INT32: BRANCH_INT(int32_t, le_to_i32, bcf_int32_missing, bcf_int32_vector_end); break;
932934
default: fprintf(stderr, "[E::%s] todo: %d\n", __func__, ad_fmt_ptr->type); exit(1); break;
933935
}
934936
#undef BRANCH_INT
@@ -957,36 +959,37 @@ static inline int calc_sample_depth(args_t *args, int ismpl, bcf_fmt_t *ad_fmt_p
957959
{
958960
if ( dp_fmt_ptr )
959961
{
960-
#define BRANCH_INT(type_t,missing,vector_end) { \
961-
type_t *ptr = (type_t *) (dp_fmt_ptr->p + dp_fmt_ptr->size*ismpl); \
962-
if ( *ptr==missing || *ptr==vector_end ) return -1; \
963-
return *ptr; \
962+
#define BRANCH_INT(type_t,convert,missing,vector_end) { \
963+
type_t val = convert(dp_fmt_ptr->p + dp_fmt_ptr->size*ismpl); \
964+
if ( val==missing || val==vector_end ) return -1; \
965+
return val; \
964966
}
965967
switch (dp_fmt_ptr->type) {
966-
case BCF_BT_INT8: BRANCH_INT(int8_t, bcf_int8_missing, bcf_int8_vector_end); break;
967-
case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_missing, bcf_int16_vector_end); break;
968-
case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_missing, bcf_int32_vector_end); break;
968+
case BCF_BT_INT8: BRANCH_INT(int8_t, le_to_i8, bcf_int8_missing, bcf_int8_vector_end); break;
969+
case BCF_BT_INT16: BRANCH_INT(int16_t, le_to_i16, bcf_int16_missing, bcf_int16_vector_end); break;
970+
case BCF_BT_INT32: BRANCH_INT(int32_t, le_to_i32, bcf_int32_missing, bcf_int32_vector_end); break;
969971
default: fprintf(stderr, "[E::%s] todo: %d\n", __func__, dp_fmt_ptr->type); exit(1); break;
970972
}
971973
#undef BRANCH_INT
972974
}
973975
if ( ad_fmt_ptr )
974976
{
975977
int iv, dp = 0, has_value = 0;
976-
#define BRANCH_INT(type_t,missing,vector_end) { \
977-
type_t *ptr = (type_t *) (ad_fmt_ptr->p + ad_fmt_ptr->size*ismpl); \
978+
#define BRANCH_INT(type_t,convert,missing,vector_end) { \
979+
uint8_t *x = ad_fmt_ptr->p + ad_fmt_ptr->size*ismpl; \
978980
for (iv=0; iv<ad_fmt_ptr->n; iv++) \
979981
{ \
980-
if ( ptr[iv]==vector_end ) break; \
981-
if ( ptr[iv]==missing ) continue; \
982+
type_t val = convert(&x[iv * sizeof(type_t)]); \
983+
if ( val==vector_end ) break; \
984+
if ( val==missing ) continue; \
982985
has_value = 1; \
983-
dp += ptr[iv]; \
986+
dp += val; \
984987
} \
985988
}
986989
switch (ad_fmt_ptr->type) {
987-
case BCF_BT_INT8: BRANCH_INT(int8_t, bcf_int8_missing, bcf_int8_vector_end); break;
988-
case BCF_BT_INT16: BRANCH_INT(int16_t, bcf_int16_missing, bcf_int16_vector_end); break;
989-
case BCF_BT_INT32: BRANCH_INT(int32_t, bcf_int32_missing, bcf_int32_vector_end); break;
990+
case BCF_BT_INT8: BRANCH_INT(int8_t, le_to_i8, bcf_int8_missing, bcf_int8_vector_end); break;
991+
case BCF_BT_INT16: BRANCH_INT(int16_t, le_to_i16, bcf_int16_missing, bcf_int16_vector_end); break;
992+
case BCF_BT_INT32: BRANCH_INT(int32_t, le_to_i32, bcf_int32_missing, bcf_int32_vector_end); break;
990993
default: fprintf(stderr, "[E::%s] todo: %d\n", __func__, ad_fmt_ptr->type); exit(1); break;
991994
}
992995
#undef BRANCH_INT

0 commit comments

Comments
 (0)