Skip to content

Commit 1dee8d5

Browse files
committed
Merge CRAM decoding memory allocation fixes (PR #357)
2 parents c618771 + 5af1b93 commit 1dee8d5

7 files changed

Lines changed: 74 additions & 26 deletions

File tree

.gitattributes

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
# Ensure BAM and CRAM files are left as binary. To display changes in them
2+
# by converting to SAM, add to your configuration (perhaps with --global):
3+
#
4+
# git config diff.bam.textconv "samtools view -h"
5+
# git config diff.cram.textconv "samtools view -h"
6+
*.bam -text diff=bam
7+
*.cram -text diff=cram
8+
19
# Omit these files from release tarballs.
210
.git* export-ignore
311
/.travis.yml export-ignore

cram/cram_codecs.c

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,8 @@ int cram_external_decode_char(cram_slice *slice, cram_codec *c,
344344
if (!cp)
345345
return -1;
346346

347-
memcpy(out, cp, *out_size);
347+
if (out)
348+
memcpy(out, cp, *out_size);
348349
return 0;
349350
}
350351

@@ -498,11 +499,16 @@ int cram_beta_decode_char(cram_slice *slice, cram_codec *c, cram_block *in, char
498499
if (cram_not_enough_bits(in, c->beta.nbits))
499500
return -1;
500501

501-
for (i = 0, n = *out_size; i < n; i++)
502-
out[i] = get_bits_MSB(in, c->beta.nbits) - c->beta.offset;
502+
if (out)
503+
for (i = 0, n = *out_size; i < n; i++)
504+
out[i] = get_bits_MSB(in, c->beta.nbits) - c->beta.offset;
505+
else
506+
for (i = 0, n = *out_size; i < n; i++)
507+
get_bits_MSB(in, c->beta.nbits);
503508
} else {
504-
for (i = 0, n = *out_size; i < n; i++)
505-
out[i] = -c->beta.offset;
509+
if (out)
510+
for (i = 0, n = *out_size; i < n; i++)
511+
out[i] = -c->beta.offset;
506512
}
507513

508514
return 0;
@@ -820,6 +826,9 @@ int cram_huffman_decode_char0(cram_slice *slice, cram_codec *c,
820826
cram_block *in, char *out, int *out_size) {
821827
int i, n;
822828

829+
if (!out)
830+
return 0;
831+
823832
/* Special case of 0 length codes */
824833
for (i = 0, n = *out_size; i < n; i++) {
825834
out[i] = c->huffman.codes[0].symbol;
@@ -1534,11 +1543,20 @@ static int cram_byte_array_stop_decode_char(cram_slice *slice, cram_codec *c,
15341543
return -1;
15351544

15361545
cp = (char *)b->data + b->idx;
1537-
while ((ch = *cp) != (char)c->byte_array_stop.stop) {
1538-
if (cp - (char *)b->data >= b->uncomp_size)
1539-
return -1;
1540-
*out++ = ch;
1541-
cp++;
1546+
if (out) {
1547+
while ((ch = *cp) != (char)c->byte_array_stop.stop) {
1548+
if (cp - (char *)b->data >= b->uncomp_size)
1549+
return -1;
1550+
*out++ = ch;
1551+
cp++;
1552+
}
1553+
} else {
1554+
// Consume input, but produce no output
1555+
while ((ch = *cp) != (char)c->byte_array_stop.stop) {
1556+
if (cp - (char *)b->data >= b->uncomp_size)
1557+
return -1;
1558+
cp++;
1559+
}
15421560
}
15431561

15441562
*out_size = cp - (char *)(b->data + b->idx);

cram/cram_decode.c

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1330,7 +1330,9 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
13301330
r |= c->comp_hdr->codecs[DS_IN]
13311331
? c->comp_hdr->codecs[DS_IN]
13321332
->decode(s, c->comp_hdr->codecs[DS_IN],
1333-
blk, &seq[pos-1], &out_sz2)
1333+
blk,
1334+
cr->len ? &seq[pos-1] : NULL,
1335+
&out_sz2)
13341336
: (seq[pos-1] = 'N', out_sz2 = 1, 0);
13351337
have_sc = 1;
13361338
}
@@ -1341,7 +1343,9 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
13411343
r |= c->comp_hdr->codecs[DS_SC]
13421344
? c->comp_hdr->codecs[DS_SC]
13431345
->decode(s, c->comp_hdr->codecs[DS_SC],
1344-
blk, &seq[pos-1], &out_sz2)
1346+
blk,
1347+
cr->len ? &seq[pos-1] : NULL,
1348+
&out_sz2)
13451349
: (seq[pos-1] = 'N', out_sz2 = 1, 0);
13461350
have_sc = 1;
13471351
}
@@ -1375,7 +1379,8 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
13751379
r |= c->comp_hdr->codecs[DS_BS]
13761380
->decode(s, c->comp_hdr->codecs[DS_BS], blk,
13771381
(char *)&base, &out_sz);
1378-
seq[pos-1] = 'N'; // FIXME look up BS=base value
1382+
if (pos-1 < cr->len)
1383+
seq[pos-1] = 'N'; // FIXME look up BS=base value
13791384
}
13801385
cig_op = BAM_CBASE_MISMATCH;
13811386
#else
@@ -1391,8 +1396,9 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
13911396
(char *)&base, &out_sz);
13921397
if (r) return -1;
13931398
if (ref_pos >= bfd->ref[cr->ref_id].len || !s->ref) {
1394-
seq[pos-1] = c->comp_hdr->
1395-
substitution_matrix[fd->L1['N']][base];
1399+
if (pos-1 < cr->len)
1400+
seq[pos-1] = c->comp_hdr->
1401+
substitution_matrix[fd->L1['N']][base];
13961402
if (decode_md || decode_nm) {
13971403
if (md_dist >= 0 && decode_md)
13981404
BLOCK_APPEND_UINT(s->aux_blk, md_dist);
@@ -1404,8 +1410,9 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
14041410
? (uc)s->ref[ref_pos - s->ref_start +1]
14051411
: 'N';
14061412
ref_base = fd->L1[ref_call];
1407-
seq[pos-1] = c->comp_hdr->
1408-
substitution_matrix[ref_base][base];
1413+
if (pos-1 < cr->len)
1414+
seq[pos-1] = c->comp_hdr->
1415+
substitution_matrix[ref_base][base];
14091416
if (decode_md) {
14101417
BLOCK_APPEND_UINT(s->aux_blk, md_dist);
14111418
BLOCK_APPEND_CHAR(s->aux_blk, ref_call);
@@ -1484,7 +1491,8 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
14841491
if (!c->comp_hdr->codecs[DS_IN]) return -1;
14851492
r |= c->comp_hdr->codecs[DS_IN]
14861493
->decode(s, c->comp_hdr->codecs[DS_IN], blk,
1487-
&seq[pos-1], &out_sz2);
1494+
cr->len ? &seq[pos-1] : NULL,
1495+
&out_sz2);
14881496
if (r) return r;
14891497
cig_op = BAM_CINS;
14901498
cig_len += out_sz2;
@@ -1504,7 +1512,8 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
15041512
if (!c->comp_hdr->codecs[DS_BA]) return -1;
15051513
r |= c->comp_hdr->codecs[DS_BA]
15061514
->decode(s, c->comp_hdr->codecs[DS_BA], blk,
1507-
(char *)&seq[pos-1], &out_sz);
1515+
cr->len ? &seq[pos-1] : NULL,
1516+
&out_sz);
15081517
if (r) return r;
15091518
}
15101519
cig_op = BAM_CINS;
@@ -1526,7 +1535,8 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
15261535
if (!c->comp_hdr->codecs[DS_BB]) return -1;
15271536
r |= c->comp_hdr->codecs[DS_BB]
15281537
->decode(s, c->comp_hdr->codecs[DS_BB], blk,
1529-
(char *)&seq[pos-1], &len);
1538+
cr->len ? &seq[pos-1] : NULL,
1539+
&len);
15301540
if (r) return r;
15311541

15321542
if (decode_md || decode_nm) {
@@ -1602,7 +1612,8 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
16021612
if (!c->comp_hdr->codecs[DS_BA]) return -1;
16031613
r |= c->comp_hdr->codecs[DS_BA]
16041614
->decode(s, c->comp_hdr->codecs[DS_BA], blk,
1605-
(char *)&seq[pos-1], &out_sz);
1615+
cr->len ? &seq[pos-1] : NULL,
1616+
&out_sz);
16061617

16071618
if (decode_md || decode_nm) {
16081619
if (md_dist >= 0 && decode_md)
@@ -1721,17 +1732,20 @@ static int cram_decode_seq(cram_fd *fd, cram_container *c, cram_slice *s,
17211732
whinged = 1;
17221733
rlen = bfd->ref[cr->ref_id].len - ref_pos;
17231734
if (rlen > 0) {
1724-
memcpy(&seq[seq_pos-1],
1725-
&s->ref[ref_pos - s->ref_start +1], rlen);
1735+
if (seq_pos-1 + rlen < cr->len)
1736+
memcpy(&seq[seq_pos-1],
1737+
&s->ref[ref_pos - s->ref_start +1], rlen);
17261738
if ((cr->len - seq_pos + 1) - rlen > 0)
17271739
memset(&seq[seq_pos-1+rlen], 'N',
17281740
(cr->len - seq_pos + 1) - rlen);
17291741
} else {
1730-
memset(&seq[seq_pos-1], 'N', cr->len - seq_pos + 1);
1742+
if (cr->len - seq_pos + 1 > 0)
1743+
memset(&seq[seq_pos-1], 'N', cr->len - seq_pos + 1);
17311744
}
17321745
} else {
1733-
memcpy(&seq[seq_pos-1], &s->ref[ref_pos - s->ref_start +1],
1734-
cr->len - seq_pos + 1);
1746+
if (cr->len - seq_pos + 1 > 0)
1747+
memcpy(&seq[seq_pos-1], &s->ref[ref_pos - s->ref_start +1],
1748+
cr->len - seq_pos + 1);
17351749
ref_pos += cr->len - seq_pos + 1;
17361750
if (md_dist >= 0)
17371751
md_dist += cr->len - seq_pos + 1;

test/aux#aux_java.cram

3.69 KB
Binary file not shown.

test/ce#5b_java.cram

6.63 KB
Binary file not shown.

test/test.pl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,14 @@ sub test_view
261261
testv "./test_view $cram > $cram.sam_";
262262
testv "./compare_sam.pl -nomd $sam $cram.sam_";
263263

264+
# Java pre-made CRAM -> SAM
265+
my $jcram = "${base}_java.cram";
266+
if (-e $jcram) {
267+
my $jsam = "${base}_java.tmp.sam";
268+
testv "./test_view -i reference=$ref $jcram > $jsam";
269+
testv "./compare_sam.pl -nomd $sam $jsam";
270+
}
271+
264272
if ($test_view_failures == 0)
265273
{
266274
passed($opts, "$sam conversions");

test/xx#large_aux_java.cram

22.1 KB
Binary file not shown.

0 commit comments

Comments
 (0)