Skip to content

Commit b64c0e0

Browse files
committed
Refine CompressionHeader map serialization
Document why buffering is required (length-prefixed format), pre-size the ByteArrayOutputStream to 100 KB so the common case avoids any reallocation, and reuse a single buffer across both map blocks via reset(). Use writeTo() instead of toByteArray() to avoid an extra copy.
1 parent cc246b0 commit b64c0e0

1 file changed

Lines changed: 18 additions & 15 deletions

File tree

src/main/java/htsjdk/samtools/cram/structure/CompressionHeader.java

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -290,21 +290,26 @@ else if (TD_tagIdsDictionary.equals(key)) {
290290
}
291291

292292
private void internalWrite(final OutputStream outputStream) throws IOException {
293+
// Each map below is written to outputStream as a length-prefixed byte
294+
// array, so we need to know the full serialized size before writing.
295+
// Buffer to a ByteArrayOutputStream first, then emit [length][bytes].
296+
// Pre-sized to 100 KB (matches the previous fixed-buffer size, so the
297+
// common case fits without any reallocation) but allowed to grow for
298+
// rich tag sets (PacBio/Ultima flow-space, ONT mod bases) where the
299+
// TD dictionary can exceed 100 KB. Reused across both blocks via reset().
300+
final ByteArrayOutputStream mapStream = new ByteArrayOutputStream(100 * 1024);
301+
293302
{ // preservation map:
294-
// Use ByteArrayOutputStream (grows as needed) rather than a fixed-size
295-
// ByteBuffer: for rich tag sets (PacBio/Ultima flow-space, ONT mod bases)
296-
// the TD dictionary alone can exceed 100 KB.
297-
final ByteArrayOutputStream mapStream = new ByteArrayOutputStream();
298303
ITF8.writeUnsignedITF8(5, mapStream);
299304

300305
mapStream.write(RN_readNamesIncluded.getBytes());
301-
mapStream.write((byte) (preserveReadNames ? 1 : 0));
306+
mapStream.write(preserveReadNames ? 1 : 0);
302307

303308
mapStream.write(AP_alignmentPositionIsDelta.getBytes());
304-
mapStream.write((byte) (APDelta ? 1 : 0));
309+
mapStream.write(APDelta ? 1 : 0);
305310

306311
mapStream.write(RR_referenceRequired.getBytes());
307-
mapStream.write((byte) (referenceRequired ? 1 : 0));
312+
mapStream.write(referenceRequired ? 1 : 0);
308313

309314
mapStream.write(SM_substitutionMatrix.getBytes());
310315
mapStream.write(substitutionMatrix.getEncodedMatrix());
@@ -314,28 +319,26 @@ private void internalWrite(final OutputStream outputStream) throws IOException {
314319
ITF8.writeUnsignedITF8(dictionaryBytes.length, mapStream);
315320
mapStream.write(dictionaryBytes);
316321

317-
final byte[] mapBytes = mapStream.toByteArray();
318-
ITF8.writeUnsignedITF8(mapBytes.length, outputStream);
319-
outputStream.write(mapBytes);
322+
ITF8.writeUnsignedITF8(mapStream.size(), outputStream);
323+
mapStream.writeTo(outputStream);
320324
}
321325

322326
encodingMap.write(outputStream);
323327

324328
{ // tag encoding map:
325-
final ByteArrayOutputStream mapStream = new ByteArrayOutputStream();
329+
mapStream.reset();
326330
ITF8.writeUnsignedITF8(tagEncodingMap.size(), mapStream);
327331
for (final Integer dataSeries : tagEncodingMap.keySet()) {
328332
ITF8.writeUnsignedITF8(dataSeries, mapStream);
329333

330334
final EncodingDescriptor params = tagEncodingMap.get(dataSeries);
331-
mapStream.write((byte) (0xFF & params.getEncodingID().getId()));
335+
mapStream.write(0xFF & params.getEncodingID().getId());
332336
ITF8.writeUnsignedITF8(params.getEncodingParameters().length, mapStream);
333337
mapStream.write(params.getEncodingParameters());
334338
}
335-
final byte[] mapBytes = mapStream.toByteArray();
336339

337-
ITF8.writeUnsignedITF8(mapBytes.length, outputStream);
338-
outputStream.write(mapBytes);
340+
ITF8.writeUnsignedITF8(mapStream.size(), outputStream);
341+
mapStream.writeTo(outputStream);
339342
}
340343
}
341344

0 commit comments

Comments
 (0)