Skip to content

Commit fbf070c

Browse files
committed
[feat] add jsoncache, jdlink, _DataLink_ support, load supports URL
1 parent 57f0108 commit fbf070c

File tree

8 files changed

+466
-72
lines changed

8 files changed

+466
-72
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
- Copyright: (C) Qianqian Fang (2019-2024) <q.fang at neu.edu>
66
- License: Apache License, Version 2.0
7-
- Version: 0.5.3
7+
- Version: 0.5.5
88
- URL: https://github.com/NeuroJSON/pyjdata
99

1010
![Build Status](https://github.com/NeuroJSON/pyjdata/actions/workflows/run_test.yml/badge.svg)

jdata/__init__.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,25 +33,42 @@
3333
to restore the original data types
3434
"""
3535

36-
from .jfile import load, save, show, loadt, savet, loadb, saveb, jext
36+
from .jfile import (
37+
load,
38+
save,
39+
show,
40+
loadt,
41+
savet,
42+
loadts,
43+
loadbs,
44+
loadb,
45+
saveb,
46+
jsoncache,
47+
jdlink,
48+
jext,
49+
)
3750
from .jdata import encode, decode, jdtype, jsonfilter
3851

39-
__version__ = "0.5.3"
52+
__version__ = "0.5.5"
4053
__all__ = [
4154
"load",
4255
"save",
4356
"show",
4457
"loadt",
4558
"savet",
59+
"loadts",
60+
"loadbs",
4661
"loadb",
4762
"saveb",
4863
"encode",
4964
"decode",
65+
"jsoncache",
66+
"jdlink",
5067
"jdtype",
5168
"jsonfilter",
5269
"jext",
5370
]
54-
__license__ = """Apache license 2.0, Copyright (c) 2019-2022 Qianqian Fang"""
71+
__license__ = """Apache license 2.0, Copyright (c) 2019-2024 Qianqian Fang"""
5572

5673

5774
if __name__ == "__main__":

jdata/__main__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ def main():
2121
# get arguments and invoke the conversion routines
2222
#
2323

24-
parser = argparse.ArgumentParser(description="Convert a text JSON/JData file to a binary JSON/JData file and vice versa.")
24+
parser = argparse.ArgumentParser(
25+
description="Convert a text JSON/JData file to a binary JSON/JData file and vice versa."
26+
)
2527

2628
parser.add_argument(
2729
"file",

jdata/jdata.py

Lines changed: 99 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Encoding and decoding python native data structures as
33
portable JData-spec annotated dict structure
44
5-
Copyright (c) 2019-2022 Qianqian Fang <q.fang at neu.edu>
5+
Copyright (c) 2019-2024 Qianqian Fang <q.fang at neu.edu>
66
"""
77

88
__all__ = ["encode", "decode", "jdtype", "jsonfilter"]
@@ -15,6 +15,9 @@
1515
import copy
1616
import zlib
1717
import base64
18+
import os
19+
import re
20+
from .jfile import jdlink
1821

1922
##====================================================================================
2023
## global variables
@@ -131,12 +134,19 @@ def encode(d, opt={}):
131134
return newobj
132135
elif isinstance(d, np.ndarray) or np.iscomplex(d):
133136
newobj = {}
134-
newobj["_ArrayType_"] = jdtype[str(d.dtype)] if (str(d.dtype) in jdtype) else str(d.dtype)
137+
newobj["_ArrayType_"] = (
138+
jdtype[str(d.dtype)] if (str(d.dtype) in jdtype) else str(d.dtype)
139+
)
135140
if np.isscalar(d):
136141
newobj["_ArraySize_"] = 1
137142
else:
138143
newobj["_ArraySize_"] = list(d.shape)
139-
if d.dtype == np.complex64 or d.dtype == np.complex128 or d.dtype == np.csingle or d.dtype == np.cdouble:
144+
if (
145+
d.dtype == np.complex64
146+
or d.dtype == np.complex128
147+
or d.dtype == np.csingle
148+
or d.dtype == np.cdouble
149+
):
140150
newobj["_ArrayIsComplex_"] = True
141151
newobj["_ArrayData_"] = np.stack((d.ravel().real, d.ravel().imag))
142152
else:
@@ -158,15 +168,23 @@ def encode(d, opt={}):
158168
newobj["_ArrayZipData_"] = gzipper.compress(newobj["_ArrayZipData_"])
159169
elif opt["compression"] == "lzma":
160170
try:
161-
newobj["_ArrayZipData_"] = lzma.compress(newobj["_ArrayZipData_"], lzma.FORMAT_ALONE)
171+
newobj["_ArrayZipData_"] = lzma.compress(
172+
newobj["_ArrayZipData_"], lzma.FORMAT_ALONE
173+
)
162174
except Exception:
163-
print('you must install "lzma" module to compress with this format, ignoring')
175+
print(
176+
'you must install "lzma" module to compress with this format, ignoring'
177+
)
164178
pass
165179
elif opt["compression"] == "lz4":
166180
try:
167-
newobj["_ArrayZipData_"] = lz4.frame.compress(newobj["_ArrayZipData_"].tobytes())
181+
newobj["_ArrayZipData_"] = lz4.frame.compress(
182+
newobj["_ArrayZipData_"].tobytes()
183+
)
168184
except ImportError:
169-
print('you must install "lz4" module to compress with this format, ignoring')
185+
print(
186+
'you must install "lz4" module to compress with this format, ignoring'
187+
)
170188
pass
171189
elif opt["compression"].startswith("blosc2"):
172190
try:
@@ -187,9 +205,13 @@ def encode(d, opt={}):
187205
nthreads=blosc2nthread,
188206
)
189207
except ImportError:
190-
print('you must install "blosc2" module to compress with this format, ignoring')
208+
print(
209+
'you must install "blosc2" module to compress with this format, ignoring'
210+
)
191211
pass
192-
if (("base64" in opt) and (opt["base64"])) or opt["compression"] == "base64":
212+
if (("base64" in opt) and (opt["base64"])) or opt[
213+
"compression"
214+
] == "base64":
193215
newobj["_ArrayZipData_"] = base64.b64encode(newobj["_ArrayZipData_"])
194216
newobj.pop("_ArrayData_")
195217
return newobj
@@ -214,8 +236,14 @@ def decode(d, opt={}):
214236
"""
215237

216238
opt.setdefault("inplace", False)
217-
218-
if (isinstance(d, str) or type(d) == "unicode") and len(d) <= 6 and len(d) > 4 and d[-1] == "_":
239+
opt.setdefault("maxlinklevel", 0)
240+
241+
if (
242+
(isinstance(d, str) or type(d) == "unicode")
243+
and len(d) <= 6
244+
and len(d) > 4
245+
and d[-1] == "_"
246+
):
219247
if d == "_NaN_":
220248
return float("nan")
221249
elif d == "_Inf_":
@@ -233,12 +261,16 @@ def decode(d, opt={}):
233261
d["_ArraySize_"] = np.frombuffer(bytearray(d["_ArraySize_"]))
234262
if "_ArrayZipData_" in d:
235263
newobj = d["_ArrayZipData_"]
236-
if (("base64" in opt) and (opt["base64"])) or ("_ArrayZipType_" in d and d["_ArrayZipType_"] == "base64"):
264+
if (("base64" in opt) and (opt["base64"])) or (
265+
"_ArrayZipType_" in d and d["_ArrayZipType_"] == "base64"
266+
):
237267
newobj = base64.b64decode(newobj)
238268
if "_ArrayZipType_" in d and d["_ArrayZipType_"] not in _zipper:
239269
raise Exception(
240270
"JData",
241-
"compression method {} is not supported".format(d["_ArrayZipType_"]),
271+
"compression method {} is not supported".format(
272+
d["_ArrayZipType_"]
273+
),
242274
)
243275
if d["_ArrayZipType_"] == "zlib":
244276
newobj = zlib.decompress(bytes(newobj))
@@ -258,7 +290,9 @@ def decode(d, opt={}):
258290

259291
newobj = lz4.frame.decompress(bytes(newobj))
260292
except Exception:
261-
print('Warning: you must install "lz4" module to decompress a data record in this file, ignoring')
293+
print(
294+
'Warning: you must install "lz4" module to decompress a data record in this file, ignoring'
295+
)
262296
return copy.deepcopy(d) if opt["inplace"] else d
263297
elif d["_ArrayZipType_"].startswith("blosc2"):
264298
try:
@@ -267,15 +301,23 @@ def decode(d, opt={}):
267301
blosc2nthread = 1
268302
if "nthread" in opt:
269303
blosc2nthread = opt["nthread"]
270-
newobj = blosc2.decompress2(bytes(newobj), as_bytearray=False, nthreads=blosc2nthread)
304+
newobj = blosc2.decompress2(
305+
bytes(newobj), as_bytearray=False, nthreads=blosc2nthread
306+
)
271307
except Exception:
272-
print('Warning: you must install "blosc2" module to decompress a data record in this file, ignoring')
308+
print(
309+
'Warning: you must install "blosc2" module to decompress a data record in this file, ignoring'
310+
)
273311
return copy.deepcopy(d) if opt["inplace"] else d
274-
newobj = np.frombuffer(bytearray(newobj), dtype=np.dtype(d["_ArrayType_"])).reshape(d["_ArrayZipSize_"])
312+
newobj = np.frombuffer(
313+
bytearray(newobj), dtype=np.dtype(d["_ArrayType_"])
314+
).reshape(d["_ArrayZipSize_"])
275315
if "_ArrayIsComplex_" in d and newobj.shape[0] == 2:
276316
newobj = newobj[0] + 1j * newobj[1]
277317
if "_ArrayOrder_" in d and (
278-
d["_ArrayOrder_"].lower() == "c" or d["_ArrayOrder_"].lower() == "col" or d["_ArrayOrder_"].lower() == "column"
318+
d["_ArrayOrder_"].lower() == "c"
319+
or d["_ArrayOrder_"].lower() == "col"
320+
or d["_ArrayOrder_"].lower() == "column"
279321
):
280322
newobj = newobj.reshape(d["_ArraySize_"], order="F")
281323
else:
@@ -285,17 +327,25 @@ def decode(d, opt={}):
285327
return newobj
286328
elif "_ArrayData_" in d:
287329
if isinstance(d["_ArrayData_"], str):
288-
newobj = np.frombuffer(d["_ArrayData_"], dtype=np.dtype(d["_ArrayType_"]))
330+
newobj = np.frombuffer(
331+
d["_ArrayData_"], dtype=np.dtype(d["_ArrayType_"])
332+
)
289333
else:
290-
newobj = np.asarray(d["_ArrayData_"], dtype=np.dtype(d["_ArrayType_"]))
334+
newobj = np.asarray(
335+
d["_ArrayData_"], dtype=np.dtype(d["_ArrayType_"])
336+
)
291337
if "_ArrayZipSize_" in d and newobj.shape[0] == 1:
292338
if isinstance(d["_ArrayZipSize_"], str):
293-
d["_ArrayZipSize_"] = np.frombuffer(bytearray(d["_ArrayZipSize_"]))
339+
d["_ArrayZipSize_"] = np.frombuffer(
340+
bytearray(d["_ArrayZipSize_"])
341+
)
294342
newobj = newobj.reshape(d["_ArrayZipSize_"])
295343
if "_ArrayIsComplex_" in d and newobj.shape[0] == 2:
296344
newobj = newobj[0] + 1j * newobj[1]
297345
if "_ArrayOrder_" in d and (
298-
d["_ArrayOrder_"].lower() == "c" or d["_ArrayOrder_"].lower() == "col" or d["_ArrayOrder_"].lower() == "column"
346+
d["_ArrayOrder_"].lower() == "c"
347+
or d["_ArrayOrder_"].lower() == "col"
348+
or d["_ArrayOrder_"].lower() == "column"
299349
):
300350
newobj = newobj.reshape(d["_ArraySize_"], order="F")
301351
else:
@@ -308,6 +358,33 @@ def decode(d, opt={}):
308358
"JData",
309359
"one and only one of _ArrayData_ or _ArrayZipData_ is required",
310360
)
361+
elif "_DataLink_" in d:
362+
if opt["maxlinklevel"] > 0 and "_DataLink_" in data:
363+
if isinstance(data["_DataLink_"], str):
364+
datalink = data["_DataLink_"]
365+
if re.search("\:\$", datalink):
366+
ref = re.search(
367+
"^(?P<proto>[a-zA-Z]+://)*(?P<path>.+)(?P<delim>\:)()*(?P<jsonpath>(?<=:)\$\d*\.*.*)*",
368+
datalink,
369+
)
370+
else:
371+
ref = re.search(
372+
"^(?P<proto>[a-zA-Z]+://)*(?P<path>.+)(?P<delim>\:)*(?P<jsonpath>(?<=:)\$\d*\..*)*",
373+
datalink,
374+
)
375+
if ref and ref.group("path"):
376+
uripath = ref.group("proto") + ref.group("path")
377+
newobj, fname = jdlink(uripath)
378+
if os.path.exists(fname):
379+
opt["maxlinklevel"] = opt["maxlinklevel"] - 1
380+
if ref.group("jsonpath"):
381+
newobj = jsonpath(newdata, ref.group("jsonpath"))
382+
return nrewobj
383+
else:
384+
raise Exception(
385+
"JData",
386+
"_DataLink_ contains invalid URL",
387+
)
311388
return decodedict(d, opt)
312389
else:
313390
return copy.deepcopy(d) if opt["inplace"] else d

0 commit comments

Comments
 (0)