|
| 1 | +""" |
| 2 | + Speed benchmark for saving/loading numpy arrays using various compression codecs |
| 3 | +""" |
1 | 4 | import jdata as jd |
2 | 5 | import numpy as np |
3 | 6 | import time |
4 | 7 | import os |
5 | 8 |
|
6 | 9 | print("jdata version:" + jd.__version__) |
7 | 10 |
|
8 | | -codecs = ["zlib", "lzma", "lz4", "blosc2blosclz", "blosc2lz4", "blosc2lz4hc", "blosc2zlib", "blosc2zstd"] |
| 11 | +codecs = ["npy", "npz", "zlib", "lzma", "lz4", "blosc2blosclz", "blosc2lz4", "blosc2lz4hc", "blosc2zlib", "blosc2zstd"] |
| 12 | +nthread = 8 |
| 13 | + |
9 | 14 |
|
10 | 15 | def benchmark(codec, x): |
11 | 16 | t0 = time.time() |
12 | | - jd.save(x, "matrix_" + codec + suffix, {"compression": codec, "nthread": 8}) |
| 17 | + ext = suffix |
| 18 | + if codec == "npy": |
| 19 | + ext = "." + codec |
| 20 | + np.save("matrix_" + codec + ext, x) |
| 21 | + elif codec == "npz": |
| 22 | + ext = "." + codec |
| 23 | + np.savez_compressed("matrix_" + codec + ext, x) |
| 24 | + else: |
| 25 | + jd.save(x, "matrix_" + codec + ext, {"compression": codec, "nthread": nthread}) |
13 | 26 | dt = time.time() - t0 # saving time |
14 | 27 | res = {"codec": codec, "save": dt} |
15 | | - y = jd.load("matrix_" + codec + suffix, {"nthread": 8}) # loading |
16 | | - res["load"] = time.time() - t0 - dt # loading time |
17 | | - res["size"] = os.path.getsize("matrix_" + codec + suffix) |
| 28 | + if codec == "npy": |
| 29 | + y = np.load("matrix_" + codec + ext) |
| 30 | + elif codec == "npz": |
| 31 | + y = np.load("matrix_" + codec + ext)["arr_0"] |
| 32 | + else: |
| 33 | + y = jd.load("matrix_" + codec + ext, {"nthread": nthread}) # loading |
18 | 34 | res["sum"] = y.sum() |
| 35 | + res["load"] = time.time() - t0 - dt # loading time |
| 36 | + res["size"] = os.path.getsize("matrix_" + codec + ext) |
19 | 37 | print(res) |
20 | 38 | return res |
21 | 39 |
|
22 | 40 |
|
| 41 | +## a highly compressible matrix |
23 | 42 | x = np.eye(10000) |
24 | | -suffix = '.jdb' |
| 43 | + |
| 44 | +## a less compressible random matrix |
| 45 | +# np.random.seed(0) |
| 46 | +# x = np.random.rand(2000,2000) |
| 47 | + |
| 48 | +print("\n- Testing binary JSON (BJData) files (.jdb) ...") |
| 49 | + |
| 50 | +suffix = ".jdb" |
25 | 51 | res = list(map(benchmark, codecs, [x] * len(codecs))) |
26 | 52 | # print(np.array(res)) |
27 | 53 |
|
28 | | -suffix = '.jdt' |
| 54 | +print("\n- Testing text-based JSON files (.jdt) ...") |
| 55 | + |
| 56 | +suffix = ".jdt" |
29 | 57 | res = list(map(benchmark, codecs, [x] * len(codecs))) |
30 | 58 | # print(np.array(res)) |
0 commit comments