Skip to content

Commit 449a8af

Browse files
committed
Add part #1 of #2524, adding a test and fixing the introduced bug
1 parent 3f8280c commit 449a8af

6 files changed

Lines changed: 66 additions & 35 deletions

File tree

test/annotate.int.1.out

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
##fileformat=VCFv4.2
2+
##FILTER=<ID=PASS,Description="All filters passed">
3+
##contig=<ID=1>
4+
##INFO=<ID=IA,Number=A,Type=Integer,Description="int Number=A">
5+
##INFO=<ID=IR,Number=R,Type=Integer,Description="int Number=R">
6+
#CHROM POS ID REF ALT QUAL FILTER INFO
7+
1 2 . C T,G . . IA=1,99;IR=1,99,3

test/annotate.int.dst.vcf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
##fileformat=VCFv4.2
2+
##contig=<ID=1>
3+
##INFO=<ID=IA,Number=A,Type=Integer,Description="int Number=A">
4+
##INFO=<ID=IR,Number=R,Type=Integer,Description="int Number=R">
5+
#CHROM POS ID REF ALT QUAL FILTER INFO
6+
1 2 . C T,G . . IA=.,99;IR=.,99,.

test/annotate.int.src.vcf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
##fileformat=VCFv4.2
2+
##contig=<ID=1>
3+
##INFO=<ID=IA,Number=A,Type=Integer,Description="int Number=A">
4+
##INFO=<ID=IR,Number=R,Type=Integer,Description="int Number=R">
5+
#CHROM POS ID REF ALT QUAL FILTER INFO
6+
1 2 . C T,G . . IA=1,2;IR=1,2,3

test/annotate8.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,5 @@
1010
##reference=file:///lustre/scratch105/projects/g1k/ref/main_project/human_g1k_v37.fasta
1111
#CHROM POS ID REF ALT QUAL FILTER INFO
1212
1 1 . C T . . FA=1.1;FR=1.1,2.2;IA=1;IR=1,2;SA=11;SR=1,222
13-
1 2 . C T,G . . FA=1.1,9.9;FR=1.1,9.9,3.3;IA=1,2;IR=1,2,3;SA=11,99;SR=111,99,3
13+
1 2 . C T,G . . FA=1.1,9.9;FR=1.1,9.9,3.3;IA=1,99;IR=1,99,3;SA=11,99;SR=111,99,3
1414
1 3 . C A,T . . FA=.,1.1;FR=1.1,.,2.2;IA=.,1;IR=1,.,2;SA=.,11;SR=11,.,2

test/test.pl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -565,6 +565,7 @@
565565
run_test(\&test_vcf_sort,$opts,in=>'sort',out=>'sort.out',args=>q[-m 0],fmt=>'%CHROM\\t%POS\\t%REF,%ALT\\n');
566566
run_test(\&test_vcf_sort,$opts,in=>'sort',out=>'sort.out',args=>q[-m 1000],fmt=>'%CHROM\\t%POS\\t%REF,%ALT\\n');
567567
run_test(\&test_vcf_regions,$opts,in=>'regions');
568+
+run_test(\&test_vcf_annotate,$opts,in=>'annotate.int.dst',vcf=>'annotate.int.src',out=>'annotate.int.1.out',args=>q[-c +INFO/IA,+INFO/IR]);
568569
run_test(\&test_vcf_annotate,$opts,in=>'annotate.rename',out=>'annotate.rename.1.out',args=>q[-c INFO/ii:=INFO/XX]);
569570
run_test(\&test_vcf_annotate,$opts,in=>'annotate.rename',out=>'annotate.rename.2.out',args=>q[-c FORMAT/ff:=FORMAT/XX]);
570571
run_test(\&test_vcf_annotate,$opts,in=>'annotate36',bed=>'annots36',out=>'annotate36.1.out',args=>q[-c CHROM,POS,-,~X,-,AF,-,-,- -i'SVTYPE={X}' -k]);

vcfannotate.c

Lines changed: 45 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -793,7 +793,7 @@ static int setter_ARinfo_int32(args_t *args, bcf1_t *line, annot_col_t *col, int
793793
if ( !map ) error("REF alleles not compatible at %s:%"PRId64"\n", bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
794794

795795
// fill in any missing values in the target VCF (or all, if not present)
796-
int ntmpi2 = bcf_get_info_float(args->hdr, line, col->hdr_key_dst, &args->tmpi2, &args->mtmpi2);
796+
int ntmpi2 = bcf_get_info_int32(args->hdr, line, col->hdr_key_dst, &args->tmpi2, &args->mtmpi2);
797797
if ( ntmpi2 < ndst ) hts_expand(int32_t,ndst,args->mtmpi2,args->tmpi2);
798798

799799
int i;
@@ -1751,36 +1751,47 @@ static int setter_format_str(args_t *args, bcf1_t *line, annot_col_t *col, void
17511751
for (ismpl=0; ismpl<args->nsmpl_annot; ismpl++) free(args->tmpp[ismpl]);
17521752
return ret;
17531753
}
1754-
static int determine_ploidy(int nals, int *vals, int nvals1, uint8_t *smpl, int nsmpl)
1755-
{
1756-
int i, j, ndip = nals*(nals+1)/2, max_ploidy = 0;
1757-
for (i=0; i<nsmpl; i++)
1758-
{
1759-
int *ptr = vals + i*nvals1;
1760-
int has_value = 0;
1761-
for (j=0; j<nvals1; j++)
1762-
{
1763-
if ( ptr[j]==bcf_int32_vector_end ) break;
1764-
if ( ptr[j]!=bcf_int32_missing ) has_value = 1;
1765-
}
1766-
if ( has_value )
1767-
{
1768-
if ( j==ndip )
1769-
{
1770-
smpl[i] = 2;
1771-
max_ploidy = 2;
1772-
}
1773-
else if ( j==nals )
1774-
{
1775-
smpl[i] = 1;
1776-
if ( !max_ploidy ) max_ploidy = 1;
1777-
}
1778-
else return -j;
1779-
}
1780-
else smpl[i] = 0;
1781-
}
1782-
return max_ploidy;
1754+
#define DEFINE_DETERMINE_PLOIDY(NAME, TYPE, IS_MISSING, IS_VECTOR_END) \
1755+
static int NAME(int nals, TYPE *vals, int nvals1, uint8_t *smpl, int nsmpl) \
1756+
{ \
1757+
int i, j, ndip = nals*(nals+1)/2, max_ploidy = 0; \
1758+
for (i=0; i<nsmpl; i++) \
1759+
{ \
1760+
TYPE *ptr = vals + i*nvals1; \
1761+
int has_value = 0; \
1762+
for (j=0; j<nvals1; j++) \
1763+
{ \
1764+
if ( IS_VECTOR_END(ptr[j]) ) break; \
1765+
if ( !IS_MISSING(ptr[j]) ) has_value = 1; \
1766+
} \
1767+
if ( has_value ) \
1768+
{ \
1769+
if ( j==ndip ) \
1770+
{ \
1771+
smpl[i] = 2; \
1772+
max_ploidy = 2; \
1773+
} \
1774+
else if ( j==nals ) \
1775+
{ \
1776+
smpl[i] = 1; \
1777+
if ( !max_ploidy ) max_ploidy = 1; \
1778+
} \
1779+
else return -j; \
1780+
} \
1781+
else smpl[i] = 0; \
1782+
} \
1783+
return max_ploidy; \
17831784
}
1785+
1786+
#define INT32_IS_MISSING(x) ((x) == bcf_int32_missing)
1787+
#define INT32_IS_VECTOR_END(x) ((x) == bcf_int32_vector_end)
1788+
1789+
#define FLOAT_IS_MISSING(x) bcf_float_is_missing(x)
1790+
#define FLOAT_IS_VECTOR_END(x) bcf_float_is_vector_end(x)
1791+
1792+
DEFINE_DETERMINE_PLOIDY(determine_ploidy_int32, int32_t, INT32_IS_MISSING, INT32_IS_VECTOR_END)
1793+
DEFINE_DETERMINE_PLOIDY(determine_ploidy_float, float, FLOAT_IS_MISSING, FLOAT_IS_VECTOR_END)
1794+
17841795
static int vcf_setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, void *data)
17851796
{
17861797
bcf1_t *rec = (bcf1_t*) data;
@@ -1833,10 +1844,10 @@ static int vcf_setter_format_int(args_t *args, bcf1_t *line, annot_col_t *col, v
18331844
args->src_smpl_pld = (uint8_t*) malloc(nsmpl_src);
18341845
args->dst_smpl_pld = (uint8_t*) malloc(nsmpl_dst);
18351846
}
1836-
int pld_src = determine_ploidy(rec->n_allele, args->tmpi, nsrc1, args->src_smpl_pld, nsmpl_src);
1847+
int pld_src = determine_ploidy_int32(rec->n_allele, args->tmpi, nsrc1, args->src_smpl_pld, nsmpl_src);
18371848
if ( pld_src<0 )
18381849
error("Unexpected number of %s values (%d) for %d alleles at %s:%"PRId64"\n", col->hdr_key_src,-pld_src, rec->n_allele, bcf_seqname(bcf_sr_get_header(args->files,1),rec),(int64_t) rec->pos+1);
1839-
int pld_dst = determine_ploidy(line->n_allele, args->tmpi2, ndst1, args->dst_smpl_pld, nsmpl_dst);
1850+
int pld_dst = determine_ploidy_int32(line->n_allele, args->tmpi2, ndst1, args->dst_smpl_pld, nsmpl_dst);
18401851
if ( pld_dst<0 )
18411852
error("Unexpected number of %s values (%d) for %d alleles at %s:%"PRId64"\n", col->hdr_key_src,-pld_dst, line->n_allele, bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
18421853

@@ -1940,10 +1951,10 @@ static int vcf_setter_format_real(args_t *args, bcf1_t *line, annot_col_t *col,
19401951
args->src_smpl_pld = (uint8_t*) malloc(nsmpl_src);
19411952
args->dst_smpl_pld = (uint8_t*) malloc(nsmpl_dst);
19421953
}
1943-
int pld_src = determine_ploidy(rec->n_allele, args->tmpi, nsrc1, args->src_smpl_pld, nsmpl_src);
1954+
int pld_src = determine_ploidy_float(rec->n_allele, args->tmpf, nsrc1, args->src_smpl_pld, nsmpl_src);
19441955
if ( pld_src<0 )
19451956
error("Unexpected number of %s values (%d) for %d alleles at %s:%"PRId64"\n", col->hdr_key_src,-pld_src, rec->n_allele, bcf_seqname(bcf_sr_get_header(args->files,1),rec),(int64_t) rec->pos+1);
1946-
int pld_dst = determine_ploidy(line->n_allele, args->tmpi2, ndst1, args->dst_smpl_pld, nsmpl_dst);
1957+
int pld_dst = determine_ploidy_float(line->n_allele, args->tmpf2, ndst1, args->dst_smpl_pld, nsmpl_dst);
19471958
if ( pld_dst<0 )
19481959
error("Unexpected number of %s values (%d) for %d alleles at %s:%"PRId64"\n", col->hdr_key_src,-pld_dst, line->n_allele, bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
19491960

0 commit comments

Comments
 (0)