Skip to content

Commit 9c74653

Browse files
committed
update speed benchmark script to test npy and npz
1 parent abbc4e5 commit 9c74653

File tree

2 files changed

+36
-8
lines changed

2 files changed

+36
-8
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ to HDF5 filters. Currently supported codecs include `zlib`, `gzip`, `lz4`, `lzma
108108
`blosc2zstd`. To apply a selected compression method, one simply set `{'compression':'method'}` as
109109
the option to `jdata.encode` or `jdata.save` function; `jdata.load` or `jdata.decode` automatically
110110
decompress the data based on the `_ArrayZipType_` annotation present in the data. Only `blosc2`
111-
compression methods support multi-threading. To set the thread number, one should define a `nthread`
111+
compression methods support multi-threading. To set the thread number, one should define an `nthread`
112112
value in the option (`opt`) for both encoding and decoding.
113113

114114

test/benchcodecs.py

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,58 @@
1+
"""
2+
Speed benchmark for saving/loading numpy arrays using various compression codecs
3+
"""
14
import jdata as jd
25
import numpy as np
36
import time
47
import os
58

69
print("jdata version:" + jd.__version__)
710

8-
codecs = ["zlib", "lzma", "lz4", "blosc2blosclz", "blosc2lz4", "blosc2lz4hc", "blosc2zlib", "blosc2zstd"]
11+
codecs = ["npy", "npz", "zlib", "lzma", "lz4", "blosc2blosclz", "blosc2lz4", "blosc2lz4hc", "blosc2zlib", "blosc2zstd"]
12+
nthread = 8
13+
914

1015
def benchmark(codec, x):
1116
t0 = time.time()
12-
jd.save(x, "matrix_" + codec + suffix, {"compression": codec, "nthread": 8})
17+
ext = suffix
18+
if codec == "npy":
19+
ext = "." + codec
20+
np.save("matrix_" + codec + ext, x)
21+
elif codec == "npz":
22+
ext = "." + codec
23+
np.savez_compressed("matrix_" + codec + ext, x)
24+
else:
25+
jd.save(x, "matrix_" + codec + ext, {"compression": codec, "nthread": nthread})
1326
dt = time.time() - t0 # saving time
1427
res = {"codec": codec, "save": dt}
15-
y = jd.load("matrix_" + codec + suffix, {"nthread": 8}) # loading
16-
res["load"] = time.time() - t0 - dt # loading time
17-
res["size"] = os.path.getsize("matrix_" + codec + suffix)
28+
if codec == "npy":
29+
y = np.load("matrix_" + codec + ext)
30+
elif codec == "npz":
31+
y = np.load("matrix_" + codec + ext)["arr_0"]
32+
else:
33+
y = jd.load("matrix_" + codec + ext, {"nthread": nthread}) # loading
1834
res["sum"] = y.sum()
35+
res["load"] = time.time() - t0 - dt # loading time
36+
res["size"] = os.path.getsize("matrix_" + codec + ext)
1937
print(res)
2038
return res
2139

2240

41+
## a highly compressible matrix
2342
x = np.eye(10000)
24-
suffix = '.jdb'
43+
44+
## a less compressible random matrix
45+
# np.random.seed(0)
46+
# x = np.random.rand(2000,2000)
47+
48+
print("\n- Testing binary JSON (BJData) files (.jdb) ...")
49+
50+
suffix = ".jdb"
2551
res = list(map(benchmark, codecs, [x] * len(codecs)))
2652
# print(np.array(res))
2753

28-
suffix = '.jdt'
54+
print("\n- Testing text-based JSON files (.jdt) ...")
55+
56+
suffix = ".jdt"
2957
res = list(map(benchmark, codecs, [x] * len(codecs)))
3058
# print(np.array(res))

0 commit comments

Comments
 (0)