|
| 1 | +import blosc |
| 2 | +import pyzfp |
| 3 | +import numpy as np |
| 4 | +from contexttimer import Timer |
| 5 | +from functools import partial |
| 6 | +import pickle |
| 7 | + |
| 8 | + |
| 9 | +DEFAULTS = {None: {}, 'blosc': {'chunk_size': 1000000}, |
| 10 | + 'zfp': {'tolerance': 0.0000001, 'parallel': True}} |
| 11 | + |
| 12 | + |
| 13 | +def init_compression(params): |
| 14 | + params = params.copy() |
| 15 | + scheme = params.pop('scheme', None) |
| 16 | + if scheme == 'custom': |
| 17 | + compressor = params.pop('compressor', None) |
| 18 | + decompressor = params.pop('decompressor', None) |
| 19 | + else: |
| 20 | + compressor = compressors[scheme] |
| 21 | + decompressor = decompressors[scheme] |
| 22 | + default_values = DEFAULTS[scheme] |
| 23 | + for k, v in default_values.items(): |
| 24 | + if k not in params: |
| 25 | + params[k] = v |
| 26 | + part_compressor = partial(compressor, params) |
| 27 | + part_decompressor = partial(decompressor, params) |
| 28 | + return part_compressor, part_decompressor |
| 29 | + |
| 30 | + |
| 31 | +def no_compression_in(params, indata): |
| 32 | + return CompressedObject(memoryview(indata.tobytes()), shape=indata.shape, |
| 33 | + dtype=indata.dtype) |
| 34 | + |
| 35 | + |
| 36 | +def no_compression_out(params, indata): |
| 37 | + return np.frombuffer(indata.data, dtype=indata.dtype).reshape(indata.shape) |
| 38 | + |
| 39 | + |
| 40 | +def blosc_compress(params, indata): |
| 41 | + s = indata.tostring() |
| 42 | + chunk_size = params.get('chunk_size') |
| 43 | + chunked = [s[i:i+chunk_size] for i in range(0, len(s), chunk_size)] |
| 44 | + time = 0 |
| 45 | + size = 0 |
| 46 | + compressed = bytes() |
| 47 | + chunk_sizes = [] |
| 48 | + for chunk in chunked: |
| 49 | + with Timer(factor=1000) as t: |
| 50 | + c = blosc.compress(chunk) |
| 51 | + compressed += c |
| 52 | + time += t.elapsed |
| 53 | + size += len(c) |
| 54 | + chunk_sizes.append(len(c)) |
| 55 | + metadata = {'shape': indata.shape, 'dtype': indata.dtype, |
| 56 | + 'chunks': chunk_sizes} |
| 57 | + return CompressedObject(data=compressed, metadata=metadata) |
| 58 | + |
| 59 | + |
| 60 | +def blosc_decompress(params, indata): |
| 61 | + compressed = indata.data |
| 62 | + chunk_sizes = indata.metadata['chunks'] |
| 63 | + |
| 64 | + ptr = 0 |
| 65 | + decompressed = bytes() |
| 66 | + for s in chunk_sizes: |
| 67 | + c = compressed[ptr:(ptr + s)] |
| 68 | + d = blosc.decompress(c) |
| 69 | + decompressed += d |
| 70 | + ptr += s |
| 71 | + return np.frombuffer(decompressed, |
| 72 | + dtype=indata.dtype).reshape(indata.shape) |
| 73 | + |
| 74 | + |
| 75 | +class CompressedObject(object): |
| 76 | + def __init__(self, data, shape=None, dtype=None, metadata=None): |
| 77 | + assert(metadata is None or (shape is None and dtype is None)) |
| 78 | + if metadata is not None: |
| 79 | + assert('shape' in metadata and 'dtype' in metadata) |
| 80 | + shape = metadata['shape'] |
| 81 | + dtype = metadata['dtype'] |
| 82 | + else: |
| 83 | + metadata = {'shape': shape, 'dtype': dtype} |
| 84 | + self.shape = shape |
| 85 | + self.dtype = dtype |
| 86 | + self.data = data |
| 87 | + self.metadata = metadata |
| 88 | + self.pickled_metadata = pickle.dumps(self.metadata) |
| 89 | + |
| 90 | + |
| 91 | +def zfp_compress(params, indata): |
| 92 | + return CompressedObject(memoryview(pyzfp.compress(indata, **params)), |
| 93 | + shape=indata.shape, dtype=indata.dtype) |
| 94 | + |
| 95 | + |
| 96 | +def zfp_decompress(params, indata): |
| 97 | + assert(isinstance(indata, CompressedObject)) |
| 98 | + return pyzfp.decompress(indata.data, indata.shape, indata.dtype, |
| 99 | + **params) |
| 100 | + |
| 101 | + |
| 102 | +compressors = {None: no_compression_in, 'blosc': blosc_compress, |
| 103 | + 'zfp': zfp_compress} |
| 104 | +decompressors = {None: no_compression_out, 'blosc': blosc_decompress, |
| 105 | + 'zfp': zfp_decompress} |
| 106 | +allowed_names = [None, 'blosc', 'zfp'] |
0 commit comments