-
Notifications
You must be signed in to change notification settings - Fork 35
Expand file tree
/
Copy pathInflaterInputStream.java
More file actions
1314 lines (1180 loc) · 59.2 KB
/
InflaterInputStream.java
File metadata and controls
1314 lines (1180 loc) · 59.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* DEFLATE library (Java)
* Copyright (c) Project Nayuki
*
* https://www.nayuki.io/page/deflate-library-java
* https://github.com/nayuki/DEFLATE-library-Java
* Modified for Eclipse Memory Analyzer for:
* cloning of inflator
* pass-through mode for multi-chunk zips
* mark/reset
* merge of output buffer and dictionary
*/
package io.nayuki.deflate;
import java.io.EOFException;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Objects;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
/**
* Decompresses a DEFLATE data stream (raw format without zlib or gzip headers or footers) into a byte stream.
*/
@SuppressFBWarnings(value = "DB_DUPLICATE_BRANCHES", justification = "vendored")
public final class InflaterInputStream extends FilterInputStream {
/*---- Fields ----*/
/* Data buffers */
// Buffer of bytes read from in.read() (the underlying input stream)
private byte[] inputBuffer; // Can have any positive length (but longer means less overhead)
private int inputBufferLength; // Number of valid prefix bytes, at least 0
private int inputBufferIndex; // Index of next byte to consume
// Buffer of bits packed from the bytes in 'inputBuffer'
private long inputBitBuffer; // 0 <= value < 2^inputBitBufferLength
private int inputBitBufferLength; // Always in the range [0, 63]
// Queued bytes to yield first when this.read() is called
private int outputBufferLength; // Number of valid prefix bytes, at least 0
private int outputBufferIndex; // Index of next byte to produce, in the range [0, outputBufferLength]
// Buffer of last 32 KiB of decoded data, for LZ77 decompression
private byte[] dictionary;
private int dictionaryIndex;
// Generally speaking, the overall data flow of this decompressor looks like this:
// in (the underlying input stream, declared in the superclass) -> in.read()
// -> inputBuffer -> packing logic in readBits()
// -> inputBitBuffer -> readBit() or equivalent
// -> various literal and length-distance symbols -> LZ77 decoding logic
// -> dictionary, sometimes also outputBuffer -> copying to the caller's array
// -> b (the array passed into this.read(byte[],int,int)).
/* Configuration */
// Indicates whether mark() should be called when the underlying
// input stream is read, and whether calling detach() is allowed.
private final boolean isDetachable;
/* State */
// The state of the decompressor:
// -4: The decompressor is in pass-through mode, indefinite uncompressed
// -3: This decompressor stream has been closed. Equivalent to in == null.
// -2: A data format exception has been thrown. Equivalent to exception != null.
// -1: Currently processing a Huffman-compressed block.
// 0: Initial state, or a block just ended.
// 1 to 65535: Currently processing an uncompressed block, number of bytes remaining.
private int state;
// A saved exception that is thrown on every read() or detach().
private IOException exception;
// Indicates whether a block header with the "bfinal" flag has been seen.
// This starts as false, should eventually become true, and never changes back to false.
// except for the Eclipse MAT changes when a new segment is attached
private boolean isLastBlock;
// Current code trees for when state == -1. When state != -1, both must be null.
private short[] literalLengthCodeTree; // When state == -1, this must be not null
private short[] literalLengthCodeTable; // Derived from literalLengthCodeTree; same nullness
private short[] distanceCodeTree; // When state == -1, this can be null or not null
private short[] distanceCodeTable; // Derived from distanceCodeTree; same nullness
/*
* For marking.
* markPos >= 0 is position of mark in buffer [dictionary]
* markPos = -1 not set
* markPos = -2 has been set, but nothing has been read
* markPos = -3 has been set but too much has been read
*/
private int markPos = -1;
/*---- Constructors ----*/
/**
* Constructs an inflater input stream over the specified underlying input stream, and with the
* specified option for detachability. The underlying stream must contain DEFLATE-compressed data with
* no headers or footers (e.g. must be unwrapped from the zlib or gzip container formats). Detachability
* allows {@link #detach()} to be called, and requires the specified input stream to support marking.
* @param in the underlying input stream of raw DEFLATE-compressed data
* @param detachable whether {@code detach()} can be called later
* @throws NullPointerException if the input stream is {@code null}
* @throws IllegalArgumentException if {@code detach == true} but {@code in.markSupported() == false}
*/
public InflaterInputStream(InputStream in, boolean detachable) {
this(in, detachable, 16 * 1024); // Use a reasonable default input buffer size
}
/**
* Constructs an inflater input stream over the specified underlying input stream, with the
* specified options for detachability and input buffer size. The underlying stream must
* contain DEFLATE-compressed data with no headers or footers (e.g. must be unwrapped from
* the zlib or gzip container formats). Detachability allows {@link #detach()} to be called,
* and requires the specified input stream to support marking.
* @param in the underlying input stream of raw DEFLATE-compressed data
* @param detachable whether {@code detach()} can be called later
* @param inBufLen the size of the internal read buffer, which must be positive
* @throws NullPointerException if the input stream is {@code null}
* @throws IllegalArgumentException if {@code inBufLen < 1}
* @throws IllegalArgumentException if {@code detach == true} but {@code in.markSupported() == false}
*/
public InflaterInputStream(InputStream in, boolean detachable, int inBufLen) {
// Handle the input stream and detachability
super(in);
if (inBufLen <= 0)
throw new IllegalArgumentException("Input buffer size must be positive");
isDetachable = detachable;
if (detachable) {
if (in.markSupported())
in.mark(0);
else
throw new IllegalArgumentException("Input stream not markable, cannot support detachment");
}
// Initialize data buffers
inputBuffer = new byte[inBufLen];
inputBufferLength = 0;
inputBufferIndex = 0;
inputBitBuffer = 0;
inputBitBufferLength = 0;
outputBufferLength = 0;
outputBufferIndex = 0;
dictionary = new byte[DICTIONARY_LENGTH];
dictionaryIndex = 0;
// Initialize state
state = 0;
exception = null;
isLastBlock = false;
literalLengthCodeTree = null;
literalLengthCodeTable = null;
distanceCodeTree = null;
distanceCodeTable = null;
}
/**
* Extra constructor added for org.eclipse.mat.hprof
*
* Only safe to use copy or original once the underlying stream
* has been positioned to the appropriate location.
* Added by Eclipse MAT.
* @param copy the original stream
*/
public InflaterInputStream(InflaterInputStream copy)
{
this(copy.in, copy.isDetachable);
if (copy.inputBuffer != null)
inputBuffer = Arrays.copyOf(copy.inputBuffer, copy.inputBuffer.length);
inputBufferLength = copy.inputBufferLength;
inputBufferIndex = copy.inputBufferIndex;
inputBitBuffer = copy.inputBitBuffer;
inputBitBufferLength = copy.inputBitBufferLength;
outputBufferLength = copy.outputBufferLength;
outputBufferIndex = copy.outputBufferIndex;
if (copy.dictionary != null)
dictionary = Arrays.copyOf(copy.dictionary, copy.dictionary.length);
else
dictionary = null;
dictionaryIndex = copy.dictionaryIndex;
markPos = copy.markPos;
// Initialize state
state = copy.state;
if (copy.exception != null)
exception = new IOException(copy.exception.toString());
isLastBlock = copy.isLastBlock;
if (copy.literalLengthCodeTree != null)
literalLengthCodeTree = Arrays.copyOf(copy.literalLengthCodeTree, copy.literalLengthCodeTree.length);
if (copy.literalLengthCodeTable != null)
literalLengthCodeTable = Arrays.copyOf(copy.literalLengthCodeTable, copy.literalLengthCodeTable.length);
if (copy.distanceCodeTree != null)
distanceCodeTree = Arrays.copyOf(copy.distanceCodeTree, copy.distanceCodeTree.length);
if (copy.distanceCodeTable != null)
distanceCodeTable = Arrays.copyOf(copy.distanceCodeTable, copy.distanceCodeTable.length);
}
/*---- Public API methods ----*/
/**
* Reads the next byte of decompressed data from this stream. If data is available
* then a number in the range [0, 255] is returned (blocking if necessary);
* otherwise −1 is returned if the end of stream is reached.
* @return the next unsigned byte of data, or −1 for the end of stream
* @throws IOException if an I/O exception occurred in the underlying input stream, the end of
* stream was encountered at an unexpected position, or the compressed data has a format error
* @throws IllegalStateException if the stream has already been closed
*/
public int read() throws IOException {
// In theory this method for reading a single byte could be implemented somewhat faster.
// We could take the logic of read(byte[],int,int) and simplify it for the special case
// of handling one byte. But if the caller chose to use this read() method instead of
// the bulk read(byte[]) method, then they have already chosen to not care about speed.
// Therefore speeding up this method would result in needless complexity. Instead,
// we chose to optimize this method for simplicity and ease of verifying correctness.
while (true) {
byte[] b = new byte[1];
switch (read(b)) {
case 1:
return (b[0] & 0xFF);
case 0:
continue;
case -1:
return -1;
default:
throw new AssertionError();
}
}
}
/**
* Reads some bytes from the decompressed data of this stream into the specified array's subrange.
* This returns the number of data bytes that were stored into the array, and is in the range
* [−1, len]. Note that 0 can be returned even if the end of stream hasn't been reached yet.
* @throws NullPointerException if the array is {@code null}
* @throws ArrayIndexOutOfBoundsException if the array subrange is out of bounds
* @throws IOException if an I/O exception occurred in the underlying input stream, the end of
* stream was encountered at an unexpected position, or the compressed data has a format error
* @throws IllegalStateException if the stream has already been closed
*/
public int read(byte[] b, int off, int len) throws IOException {
// Check arguments and state
Objects.requireNonNull(b);
if (off < 0 || off > b.length || len < 0 || b.length - off < len)
throw new ArrayIndexOutOfBoundsException();
if (in == null)
throw new IllegalStateException("Stream already closed");
if (exception != null)
throw exception;
// Special handling for empty read request
if (len == 0)
return (outputBufferLength > 0 || state != 0 || !isLastBlock) ? 0 : -1;
assert len > 0;
int result = 0; // Number of bytes filled in the array 'b'
// First move bytes (if any) from the output buffer [which is actually in the dictionary]
if (outputBufferLength > 0) {
// There could be a new mark while we are re-reading data, or reading duplicated bytes.
if (markPos == -2)
markPos = (dictionaryIndex - (outputBufferLength - outputBufferIndex)) & DICTIONARY_MASK;
int n = Math.min(outputBufferLength - outputBufferIndex, len);
// Move up to end of dictionary array
int i = (dictionaryIndex - (outputBufferLength - outputBufferIndex)) & DICTIONARY_MASK;
int n1 = Math.min(n, DICTIONARY_LENGTH - i);
System.arraycopy(dictionary, i, b, off + result, n1);
result = n1;
outputBufferIndex += n1;
// Move the remainder
if (n1 < n) {
// Wrap
n1 = n - n1;
System.arraycopy(dictionary, 0, b, off + result, n1);
result += n1;
outputBufferIndex += n1;
}
if (outputBufferIndex == outputBufferLength) {
outputBufferLength = 0;
outputBufferIndex = 0;
}
if (result == len)
return result;
}
// Now the output buffer is clear, and we have room to read at least one byte
assert outputBufferLength == 0 && outputBufferIndex == 0 && result < len;
// Get into a block if not already inside one
while (state == 0) {
if (isLastBlock)
return (result != 0) ? result : -1;
// Read and process the block header
isLastBlock = readBits(1) == 1;
switch (readBits(2)) { // Type
case 0:
alignInputToByte();
state = readBits(16); // Block length
if (state != (readBits(16) ^ 0xFFFF))
destroyAndThrow(new DataFormatException("len/nlen mismatch in uncompressed block"));
break;
case 1:
state = -1;
literalLengthCodeTree = FIXED_LITERAL_LENGTH_CODE_TREE;
literalLengthCodeTable = FIXED_LITERAL_LENGTH_CODE_TABLE;
distanceCodeTree = FIXED_DISTANCE_CODE_TREE;
distanceCodeTable = FIXED_DISTANCE_CODE_TABLE;
break;
case 2:
state = -1;
decodeHuffmanCodes();
break;
case 3:
destroyAndThrow(new DataFormatException("Reserved block type"));
break;
default:
throw new AssertionError();
}
}
// Read the current block's data into the argument array
if (1 <= state && state <= 0xFFFF) {
// Read bytes from uncompressed block
int toRead = Math.min(state, len - result);
readBytes(b, off + result, toRead);
updateMark(dictionaryIndex, toRead);
for (int i = 0; i < toRead; i++) {
dictionary[dictionaryIndex] = b[off + result];
dictionaryIndex = (dictionaryIndex + 1) & DICTIONARY_MASK;
result++;
}
state -= toRead;
return result;
} else if (state == -1) // Decode symbols from Huffman-coded block
return result + readInsideHuffmanBlock(b, off + result, len - result);
else if (state == -4) {
// Modification for Eclipse MAT
int n = Math.min(inputBufferLength - inputBufferIndex + (inputBitBufferLength >> 3), len - result);
if (n > 0) {
readBytes(b, off + result, n);
result += n;
} else if (result < len) {
n = len - result;
int r = in.read(b, off + result, n);
if (r >= 0) {
result += r;
} else if (result == 0)
result = r;
}
return result;
} else
throw new AssertionError("Impossible state");
}
/**
* Notes the current position of the output, so that later the caller can
* go back to this spot by calling reset.
* This uses the dictionary array as a cache of the last bytes returned to the caller.
* The dictionary is 32768 bytes in size, however when reading compressed data
* the inflator can generate an extra 257 bytes which are not returned to the caller
* but are stored in the dictionary. There is therefore a limit of 32768 - 257 = 32211 bytes
* that can be stored.
* Strictly speaking, the InputStream contract expects any value of limit to be
* honoured, and for implementation to add extra buffering to achieve this.
* Addition for Eclipse MAT.
* @param limit the caller can read at least this many bytes before calling {@link #reset()}
* @see #reset()
*/
@Override
public void mark(int limit)
{
if (limit > DICTIONARY_LENGTH - OUTPUT_BUFFER_LENGTH)
throw new IllegalArgumentException(String.valueOf(limit));
if (state == -4)
throw new IllegalStateException(String.valueOf(state));
markPos = -2;
}
/**
* Goes back in the output to the point where {@link #mark(int)} was called.
* Addition for Eclipse MAT
* @throws IOException if it is not possible to go back to the mark point.
* The current position is then unchanged.
* @see #mark(int)
*/
@Override
public void reset() throws IOException
{
// If we are between chunks then reset doesn't make sense at it
// only operates on decompressed data.
if (state == -4)
throw new IllegalStateException(String.valueOf(state));
if (markPos >= 0) {
int toread;
if (dictionaryIndex > markPos)
toread = dictionaryIndex - markPos;
else
toread = DICTIONARY_LENGTH - markPos + dictionaryIndex;
if (outputBufferLength > toread) {
assert outputBufferIndex >= outputBufferLength - toread;
outputBufferIndex = outputBufferLength - toread;
} else {
outputBufferLength = toread;
outputBufferIndex = 0;
}
} else if (markPos == -3)
throw new IOException("too far");
else if (markPos == -2)
{
// mark() immediately followed by reset() so we don't need to do anything
// mark is still valid for a subsequent reset.
}
else if (markPos == -1)
throw new IOException("no mark");
else
throw new IllegalStateException(String.valueOf(markPos));
}
/**
* Does this stream support {@link #mark(int)} and {@link #reset()} ?
* It does.
* Addition for Eclipse MAT.
* @return true
* @see #mark(int)
* @see #reset()
*/
@Override
public boolean markSupported()
{
return true;
}
/**
* The number of bytes which can be read without blocking.
* With the underlying stream it is not clear how many bytes those
* will expand to so just rely on what is in the buffers.
* Addition for Eclipse MAT.
* @return the number of bytes which can be read without blocking.
*/
@Override
public int available() throws IOException {
long input;
int actual = (outputBufferLength - outputBufferIndex);
if (state == -4) {
// pass through mode
input = inputBufferLength - inputBufferIndex + (inputBitBufferLength >> 3);
} else {
// The amount of compressed data
input = inputBufferLength - inputBufferIndex + (inputBitBufferLength >> 3);
if (state > 0) {
// Uncompressed block
if (input > state)
input = state;
} else if (state == -1 && input >= 3) {
// With 48 bits of input we can decode at least one symbol
// However, the symbol could be end of block!
input = 0;
} else {
// We don't know how much can be read without going to the underlying stream
input = 0;
}
}
return (int) Math.min(actual + input, Integer.MAX_VALUE);
}
//@Override
public long skip(long n) throws IOException
{
if (n <= 0)
return 0;
long skipped = 0;
if (outputBufferLength > 0) {
// There could be a new mark while we are re-reading data, or reading duplicated bytes.
if (markPos == -2)
markPos = (dictionaryIndex - (outputBufferLength - outputBufferIndex)) & DICTIONARY_MASK;
if (n >= outputBufferLength - outputBufferIndex) {
skipped = outputBufferLength - outputBufferIndex;
outputBufferLength = 0;
outputBufferIndex = 0;
n -= skipped;
} else {
outputBufferIndex += n;
return n;
}
}
byte buf[] = null;
while (n > 0)
{
int toskip = (int)Math.min(n, 16384);
if (buf == null)
buf = new byte[toskip];
long r = read(buf, 0, toskip);
if (r == -1)
{
break;
}
skipped += r;
n -= r;
}
return skipped;
}
/**
* See if too much data has been read for a {@link #reset()} to not work anymore.
* Addition for Eclipse MAT.
* @param dictionaryIndex the next place to store in the dictionary
* @param count the number of bytes to add to dictionary
*/
private void updateMark(int dictionaryIndex, int count)
{
if (markPos == -1)
return;
else if (markPos == -2) {
if (count <= DICTIONARY_LENGTH)
markPos = dictionaryIndex;
else
markPos = -3;
} else if (count >= DICTIONARY_LENGTH)
markPos = -3;
else if (markPos >= dictionaryIndex)
if (dictionaryIndex + count >= markPos)
markPos = -3;
else if (((dictionaryIndex + count) & DICTIONARY_MASK) >= markPos)
markPos = -3;
}
private void endofblock() {
if (isLastBlock && !(markPos >= 0 || outputBufferLength > 0)) {
// Clear the dictionary early
dictionary = null;
dictionaryIndex = -1;
}
}
// This method exists to split up the public read(byte[],int,int) method that is rather long.
// For internal use only; must only be called by read(byte[],int,int). The input array's subrange must be valid
// (the caller checks the preconditions). The current state must be -1. This returns a number in the range [0, len].
private int readInsideHuffmanBlock(byte[] b, int off, int len) throws IOException {
int result = 0;
while (result < len) {
// Try to fill the input bit buffer (somewhat similar to logic in readBits())
if (inputBitBufferLength < 48) {
byte[] c = inputBuffer; // Shorter name
int i = inputBufferIndex; // Shorter name
int numBytes = Math.min((64 - inputBitBufferLength) >>> 3, inputBufferLength - i);
assert 0 <= numBytes && numBytes <= 8;
if (numBytes == 2) { // Only implement special cases that occur frequently in practice
inputBitBuffer |= (long)((c[i]&0xFF) | (c[i+1]&0xFF)<<8) << inputBitBufferLength;
inputBitBufferLength += 2 * 8;
inputBufferIndex += 2;
} else if (numBytes == 3) {
inputBitBuffer |= (long)((c[i]&0xFF) | (c[i+1]&0xFF)<<8 | (c[i+2]&0xFF)<<16) << inputBitBufferLength;
inputBitBufferLength += 3 * 8;
inputBufferIndex += 3;
} else if (numBytes == 4) {
inputBitBuffer |= (((c[i]&0xFF) | (c[i+1]&0xFF)<<8 | (c[i+2]&0xFF)<<16 | c[i+3]<<24) & 0xFFFFFFFFL) << inputBitBufferLength;
inputBitBufferLength += 4 * 8;
inputBufferIndex += 4;
} else { // This slower general logic is valid for 0 <= numBytes <= 8
for (int j = 0; j < numBytes; j++, inputBitBufferLength += 8, inputBufferIndex++)
inputBitBuffer |= (c[inputBufferIndex] & 0xFFL) << inputBitBufferLength;
}
}
// The worst-case number of bits consumed in one iteration:
// length (symbol (15) + extra (5)) + distance (symbol (15) + extra (13)) = 48.
// This allows us to do decoding entirely from the bit buffer, avoiding the byte buffer or actual I/O.
if (inputBitBufferLength >= 48) { // Fast path
// Decode next literal/length symbol (a customized version of decodeSymbol())
int sym;
{
int temp = literalLengthCodeTable[(int)inputBitBuffer & CODE_TABLE_MASK];
assert temp >= 0; // No need to mask off sign extension bits
int consumed = temp >>> 11;
inputBitBuffer >>>= consumed;
inputBitBufferLength -= consumed;
int node = (temp << 21) >> 21; // Sign extension from 11 bits
while (node >= 0) {
node = literalLengthCodeTree[node + ((int)inputBitBuffer & 1)];
inputBitBuffer >>>= 1;
inputBitBufferLength--;
}
sym = ~node;
}
// Handle the symbol by ranges
assert 0 <= sym && sym <= 287;
if (sym < 256) { // Literal byte
b[off + result] = (byte)sym;
updateMark(dictionaryIndex, 1);
dictionary[dictionaryIndex] = (byte)sym;
dictionaryIndex = (dictionaryIndex + 1) & DICTIONARY_MASK;
result++;
} else if (sym > 256) { // Length and distance for copying
// Decode the run length (a customized version of decodeRunLength())
assert 257 <= sym && sym <= 287;
if (sym > 285)
destroyAndThrow(new DataFormatException("Reserved run length symbol: " + sym));
int run;
{
int temp = RUN_LENGTH_TABLE[sym - 257];
run = temp >>> 3;
int numExtraBits = temp & 7;
run += (int)inputBitBuffer & ((1 << numExtraBits) - 1);
inputBitBuffer >>>= numExtraBits;
inputBitBufferLength -= numExtraBits;
}
assert 3 <= run && run <= 258;
// Decode next distance symbol (a customized version of decodeSymbol())
if (distanceCodeTree == null)
destroyAndThrow(new DataFormatException("Length symbol encountered with empty distance code"));
int distSym;
{
int temp = distanceCodeTable[(int)inputBitBuffer & CODE_TABLE_MASK];
assert temp >= 0; // No need to mask off sign extension bits
int consumed = temp >>> 11;
inputBitBuffer >>>= consumed;
inputBitBufferLength -= consumed;
int node = (temp << 21) >> 21; // Sign extension from 11 bits
while (node >= 0) { // Medium path
node = distanceCodeTree[node + ((int)inputBitBuffer & 1)];
inputBitBuffer >>>= 1;
inputBitBufferLength--;
}
distSym = ~node;
}
assert 0 <= distSym && distSym <= 31;
// Decode the distance (a customized version of decodeDistance())
if (distSym > 29)
destroyAndThrow(new DataFormatException("Reserved distance symbol: " + distSym));
int dist;
{
int temp = DISTANCE_TABLE[distSym];
dist = temp >>> 4;
int numExtraBits = temp & 0xF;
dist += (int)inputBitBuffer & ((1 << numExtraBits) - 1);
inputBitBuffer >>>= numExtraBits;
inputBitBufferLength -= numExtraBits;
}
assert 1 <= dist && dist <= 32768;
assert inputBitBufferLength >= 0;
// Copy bytes to output and dictionary
int dictReadIndex = (dictionaryIndex - dist) & DICTIONARY_MASK;
if (len - result >= run) { // Nice case with less branching
updateMark(dictionaryIndex, run);
for (int i = 0; i < run; i++) {
byte bb = dictionary[dictReadIndex];
dictionary[dictionaryIndex] = bb;
b[off + result] = bb;
dictReadIndex = (dictReadIndex + 1) & DICTIONARY_MASK;
dictionaryIndex = (dictionaryIndex + 1) & DICTIONARY_MASK;
result++;
}
} else { // General case
assert outputBufferLength == 0;
updateMark(dictionaryIndex, run);
for (int i = 0; i < run; i++) {
byte bb = dictionary[dictReadIndex];
dictionary[dictionaryIndex] = bb;
dictReadIndex = (dictReadIndex + 1) & DICTIONARY_MASK;
dictionaryIndex = (dictionaryIndex + 1) & DICTIONARY_MASK;
if (result < len) {
b[off + result] = bb;
result++;
} else {
assert outputBufferLength < DICTIONARY_LENGTH;
assert outputBufferLength < OUTPUT_BUFFER_LENGTH;
outputBufferLength++;
}
}
}
} else { // sym == 256, end of block
literalLengthCodeTree = null;
literalLengthCodeTable = null;
distanceCodeTree = null;
distanceCodeTable = null;
state = 0;
endofblock();
break;
}
} else { // General case (always correct), when not enough bits in buffer to guarantee reading
int sym = decodeSymbol(literalLengthCodeTree);
assert 0 <= sym && sym <= 287;
if (sym < 256) { // Literal byte
b[off + result] = (byte)sym;
updateMark(dictionaryIndex, 1);
dictionary[dictionaryIndex] = (byte)sym;
dictionaryIndex = (dictionaryIndex + 1) & DICTIONARY_MASK;
result++;
} else if (sym > 256) { // Length and distance for copying
int run = decodeRunLength(sym);
assert 3 <= run && run <= 258;
if (distanceCodeTree == null)
destroyAndThrow(new DataFormatException("Length symbol encountered with empty distance code"));
int distSym = decodeSymbol(distanceCodeTree);
assert 0 <= distSym && distSym <= 31;
int dist = decodeDistance(distSym);
assert 1 <= dist && dist <= 32768;
assert outputBufferLength == 0;
// Copy bytes to output and dictionary
int dictReadIndex = (dictionaryIndex - dist) & DICTIONARY_MASK;
updateMark(dictionaryIndex, run);
for (int i = 0; i < run; i++) {
byte bb = dictionary[dictReadIndex];
dictReadIndex = (dictReadIndex + 1) & DICTIONARY_MASK;
dictionary[dictionaryIndex] = bb;
dictionaryIndex = (dictionaryIndex + 1) & DICTIONARY_MASK;
if (result < len) {
b[off + result] = bb;
result++;
} else {
assert outputBufferLength < DICTIONARY_LENGTH;
assert outputBufferLength < OUTPUT_BUFFER_LENGTH;
outputBufferLength++;
}
}
} else { // sym == 256, end of block
literalLengthCodeTree = null;
literalLengthCodeTable = null;
distanceCodeTree = null;
distanceCodeTable = null;
state = 0;
endofblock();
break;
}
}
}
return result;
}
/**
* Detaches the underlying input stream from this decompressor. This puts the underlying stream
* at the position of the first byte after the data that this decompressor actually consumed.
* Calling {@code detach()} invalidates this stream object but doesn't close the underlying stream.
* <p>This method exists because for efficiency, the decompressor may read more bytes from the
* underlying stream than necessary to produce the decompressed data. If you want to continue
* reading the underlying stream exactly after the point the DEFLATE-compressed data ends,
* then it is necessary to call this detach method.</p>
* <p>This can only be called once, and is mutually exclusive with respect to calling
* {@link #close()}. It is illegal to call {@link #read()} after detaching.</p>
* @throws IllegalStateException if detach was already called or this stream has been closed
* @throws IOException if an I/O exception occurred
*/
public void detach() throws IOException {
// Modifications for Eclipse MAT
//if (!isDetachable)
// throw new IllegalStateException("Detachability not specified at construction");
if (in == null)
throw new IllegalStateException("Input stream already detached/closed");
if (exception != null)
throw exception;
if (!isDetachable)
{
if (state == -4)
throw new IllegalStateException("Input stream already detached");
endofblock();
state = -4;
return;
}
// Rewind the underlying stream, then skip over bytes that were already consumed.
// Note that a byte with some bits consumed is considered to be fully consumed.
in.reset();
int skip = inputBufferIndex - inputBitBufferLength / 8;
assert skip >= 0;
while (skip > 0) {
long n = in.skip(skip);
if (n <= 0)
throw new EOFException();
skip -= n;
}
in = null;
state = -3;
destroyState();
}
/**
* Resume decompression.
* Addition for Eclipse MAT.
*/
public void attach()
{
if (state != -4)
throw new IllegalStateException("Not detached");
// Keep the old dictionary if it is needed for reset()
// Could clear it (but no point) if (!(markPos >= 0 || outputBufferLength > 0))
if (dictionary == null) {
// The dictionary was cleared earlier
dictionary = new byte[DICTIONARY_LENGTH];
dictionaryIndex = 0;
}
isLastBlock = false;
state = 0;
}
public String toString() {
return this.getClass()+" "+state+" "+(dictionary == null ? "no buf" : dictionary);
}
/**
* Closes this input stream and the underlying stream.
* It is illegal to call {@link #read()} or {@link #detach()} after closing.
* It is idempotent to call this {@link #close()} method more than once.
* @throws IOException if an I/O exception occurred in the underlying stream
*/
public void close() throws IOException {
if (in == null)
return;
super.close();
in = null;
state = -3;
exception = null;
destroyState();
}
/*---- Huffman coding methods ----*/
// Reads the current block's dynamic Huffman code tables from from the input buffers/stream,
// processes the code lengths and computes the code trees, and ultimately sets just the variables
// {literalLengthCodeTree, literalLengthCodeTable, distanceCodeTree, distanceCodeTable}.
// This might throw an IOException for actual I/O exceptions, unexpected end of stream,
// or a description of an invalid Huffman code.
private void decodeHuffmanCodes() throws IOException {
int numLitLenCodes = readBits(5) + 257; // hlit + 257
int numDistCodes = readBits(5) + 1; // hdist + 1
// Read the code length code lengths
int numCodeLenCodes = readBits(4) + 4; // hclen + 4
byte[] codeLenCodeLen = new byte[19]; // This array is filled in a strange order
for (int i = 0; i < numCodeLenCodes; i++)
codeLenCodeLen[CODE_LENGTH_CODE_ORDER[i]] = (byte)readBits(3);
short[] codeLenCodeTree;
try {
codeLenCodeTree = codeLengthsToCodeTree(codeLenCodeLen);
} catch (DataFormatException e) {
destroyAndThrow(e);
throw new AssertionError("Unreachable");
}
// Read the main code lengths and handle runs
byte[] codeLens = new byte[numLitLenCodes + numDistCodes];
byte runVal = -1;
int runLen = 0;
for (int i = 0; i < codeLens.length; ) {
if (runLen > 0) {
assert runVal != -1;
codeLens[i] = runVal;
runLen--;
i++;
} else {
int sym = decodeSymbol(codeLenCodeTree);
assert 0 <= sym && sym <= 18;
if (sym < 16) {
runVal = codeLens[i] = (byte)sym;
i++;
} else if (sym == 16) {
if (runVal == -1)
destroyAndThrow(new DataFormatException("No code length value to copy"));
runLen = readBits(2) + 3;
} else if (sym == 17) {
runVal = 0;
runLen = readBits(3) + 3;
} else { // sym == 18
runVal = 0;
runLen = readBits(7) + 11;
}
}
}
if (runLen > 0)
destroyAndThrow(new DataFormatException("Run exceeds number of codes"));
// Create literal-length code tree
byte[] litLenCodeLen = Arrays.copyOf(codeLens, numLitLenCodes);
try {
literalLengthCodeTree = codeLengthsToCodeTree(litLenCodeLen);
} catch (DataFormatException e) {
destroyAndThrow(e);
throw new AssertionError("Unreachable");
}
literalLengthCodeTable = codeTreeToCodeTable(literalLengthCodeTree);
// Create distance code tree with some extra processing
byte[] distCodeLen = Arrays.copyOfRange(codeLens, numLitLenCodes, codeLens.length);
if (distCodeLen.length == 1 && distCodeLen[0] == 0)
distanceCodeTree = null; // Empty distance code; the block shall be all literal symbols
else {
// Get statistics for upcoming logic
int oneCount = 0;
int otherPositiveCount = 0;
for (byte x : distCodeLen) {
if (x == 1)
oneCount++;
else if (x > 1)
otherPositiveCount++;
}
// Handle the case where only one distance code is defined
if (oneCount == 1 && otherPositiveCount == 0) {
// Add a dummy invalid code to make the Huffman tree complete
distCodeLen = Arrays.copyOf(distCodeLen, 32);
distCodeLen[31] = 1;
}
try {
distanceCodeTree = codeLengthsToCodeTree(distCodeLen);
} catch (DataFormatException e) {
destroyAndThrow(e);
throw new AssertionError("Unreachable");
}
distanceCodeTable = codeTreeToCodeTable(distanceCodeTree);
}
}
/*
* Converts the given array of symbol code lengths into a canonical code tree.
* A symbol code length is either zero (absent from the tree) or a positive integer.
*
* A code tree is an array of integers, where each pair represents a node.
* Each pair is adjacent and starts on an even index. The first element of
* the pair represents the left child and the second element represents the
* right child. The root node is at index 0. If an element is non-negative,
* then it is the index of the child node in the array. Otherwise it is the
* bitwise complement of the leaf symbol. This tree is used in decodeSymbol()
* and codeTreeToCodeTable(). Not every element of the array needs to be
* used, nor do used elements need to be contiguous.
*
* For example, this Huffman tree:
* o
* / \
* o \
* / \ \
* 'a' 'b' 'c'
* is serialized as this array:
* {2, ~'c', ~'a', ~'b'}
* because the root is located at index 0 and the other internal node is
* located at index 2.
*/
private static short[] codeLengthsToCodeTree(byte[] codeLengths) throws DataFormatException {
// Allocate array for the worst case if all symbols are present
short[] result = new short[(codeLengths.length - 1) * 2];
Arrays.fill(result, CODE_TREE_UNUSED_SLOT);
result[0] = CODE_TREE_OPEN_SLOT;
result[1] = CODE_TREE_OPEN_SLOT;
int allocated = 2; // Always even in this algorithm
int maxCodeLen = 0;
for (byte cl : codeLengths) {
assert 0 <= cl && cl <= 15;
maxCodeLen = Math.max(cl, maxCodeLen);
}
if (maxCodeLen > 15)
throw new AssertionError("Maximum code length exceeds DEFLATE specification");
// Allocate Huffman tree nodes according to ascending code lengths
for (int curCodeLen = 1; curCodeLen <= maxCodeLen; curCodeLen++) {
// Loop invariant: Each OPEN child slot in the result array has depth curCodeLen
// Allocate all symbols of current code length to open slots in ascending order
int resultIndex = 0;
for (int symbol = 0; ; ) {
// Find next symbol having current code length
while (symbol < codeLengths.length && codeLengths[symbol] != curCodeLen)
symbol++;
if (symbol == codeLengths.length)
break; // No more symbols to process
// Find next open child slot
while (resultIndex < allocated && result[resultIndex] != CODE_TREE_OPEN_SLOT)
resultIndex++;
if (resultIndex == allocated) // No more slots left
throw new DataFormatException("Canonical code fails to produce full Huffman code tree");
// Put the symbol into the slot and increment
result[resultIndex] = (short)~symbol;
resultIndex++;