add numpy array encoding/decoding benchmark

fangq · fangq · commit abbc4e529af4 · 2022-08-31T00:03:35.000-04:00
diff --git a/README.md b/README.md
@@ -103,13 +103,13 @@ newdata
 ```
 
 PyJData supports multiple N-D array data compression/decompression methods (i.e. codecs), similar
-to HDF5 filters. The currently supported filters include `zlib`, `gzip`, `lz4`, `lzma`, and various
+to HDF5 filters. Currently supported codecs include `zlib`, `gzip`, `lz4`, `lzma`, `base64` and various
 `blosc2` compression methods, including `blosc2blosclz`, `blosc2lz4`, `blosc2lz4hc`, `blosc2zlib`,
 `blosc2zstd`. To apply a selected compression method, one simply set `{'compression':'method'}` as
 the option to `jdata.encode` or `jdata.save` function; `jdata.load` or `jdata.decode` automatically
 decompress the data based on the `_ArrayZipType_` annotation present in the data. Only `blosc2`
 compression methods support multi-threading. To set the thread number, one should define a `nthread`
-value in the option for both encoding and decoding.
+value in the option (`opt`) for both encoding and decoding.
 
 
 ## Utility
diff --git a/test/benchcodecs.py b/test/benchcodecs.py
@@ -0,0 +1,30 @@
+import jdata as jd
+import numpy as np
+import time
+import os
+
+print("jdata version:" + jd.__version__)
+
+codecs = ["zlib", "lzma", "lz4", "blosc2blosclz", "blosc2lz4", "blosc2lz4hc", "blosc2zlib", "blosc2zstd"]
+
+def benchmark(codec, x):
+    t0 = time.time()
+    jd.save(x, "matrix_" + codec + suffix, {"compression": codec, "nthread": 8})
+    dt = time.time() - t0  # saving time
+    res = {"codec": codec, "save": dt}
+    y = jd.load("matrix_" + codec + suffix, {"nthread": 8})  # loading
+    res["load"] = time.time() - t0 - dt  # loading time
+    res["size"] = os.path.getsize("matrix_" + codec + suffix)
+    res["sum"] = y.sum()
+    print(res)
+    return res
+
+
+x = np.eye(10000)
+suffix = '.jdb'
+res = list(map(benchmark, codecs, [x] * len(codecs)))
+# print(np.array(res))
+
+suffix = '.jdt'
+res = list(map(benchmark, codecs, [x] * len(codecs)))
+# print(np.array(res))