Skip to content

Commit 2647fc9

Browse files
committed
[feat] port jsonpath from JSONLab, add loadurl for REST API
1 parent fbf070c commit 2647fc9

File tree

3 files changed

+203
-5
lines changed

3 files changed

+203
-5
lines changed

jdata/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from .jfile import (
3737
load,
3838
save,
39+
loadurl,
3940
show,
4041
loadt,
4142
savet,
@@ -48,11 +49,13 @@
4849
jext,
4950
)
5051
from .jdata import encode, decode, jdtype, jsonfilter
52+
from .jpath import jsonpath
5153

5254
__version__ = "0.5.5"
5355
__all__ = [
5456
"load",
5557
"save",
58+
"loadurl",
5659
"show",
5760
"loadt",
5861
"savet",
@@ -67,6 +70,7 @@
6770
"jdtype",
6871
"jsonfilter",
6972
"jext",
73+
"jsonpath",
7074
]
7175
__license__ = """Apache license 2.0, Copyright (c) 2019-2024 Qianqian Fang"""
7276

jdata/jfile.py

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
__all__ = [
88
"load",
99
"save",
10+
"loadurl",
1011
"show",
1112
"loadt",
1213
"savet",
@@ -54,7 +55,7 @@ def load(fname, opt={}, **kwargs):
5455
"""
5556
if re.match("^https*://", fname):
5657
newdata = downloadlink(fname, opt, **kwargs)
57-
return newdata
58+
return newdata[0]
5859

5960
spl = os.path.splitext(fname)
6061
ext = spl[1].lower()
@@ -102,6 +103,24 @@ def save(data, fname, opt={}, **kwargs):
102103
)
103104

104105

106+
def loadurl(url, opt={}, **kwargs):
107+
"""@brief Loading a JData file (binary or text) from a URL without caching locally
108+
109+
@param[in] url: a REST API URL, curently only support http:// and https://
110+
@param[in] opt: options, opt['nocache']=True by default, setting to False download and locally cache the data
111+
"""
112+
opt.setdefault("nocache", True)
113+
114+
if re.match("^https*://", url):
115+
newdata = downloadlink(url, opt, **kwargs)
116+
return newdata[0]
117+
else:
118+
raise Exception(
119+
"JData",
120+
"input to loadurl is not a valid URL",
121+
)
122+
123+
105124
##====================================================================================
106125
## Loading and saving text-based JData (i.e. JSON) files
107126
##====================================================================================
@@ -370,7 +389,6 @@ def jsoncache(url, opt={}, **kwargs):
370389
if p is not None:
371390
cachepath.insert(0, p)
372391
elif dbname and docname:
373-
print([domain, dbname, docname, cachepath])
374392
cachepath = [os.path.join(x, domain, dbname, docname) for x in cachepath]
375393
if filename is not None:
376394
for i in range(len(cachepath)):
@@ -421,21 +439,32 @@ def jdlink(uripath, opt={}, **kwargs):
421439
)
422440
alloutput = [[] for _ in range(3)]
423441
for i in range(len(uripath)):
424-
newdata, fname, cachepath = downloadlink(uripath[i], opt)
442+
newdata, fname, cachepath = downloadlink(uripath[i], opt, **kwargs)
425443
alloutput[0].append(newdata)
426444
alloutput[1].append(fname)
427445
alloutput[2].append(cachepath)
428446
if len(uripath) == 1:
429447
alloutput = [x[0] for x in alloutput]
430448
newdata, fname, cachepath = tuple(alloutput)
431449
elif isinstance(uripath, str):
432-
newdata, fname, cachepath = downloadlink(uripath, opt)
450+
newdata, fname, cachepath = downloadlink(uripath, opt, **kwargs)
433451
return newdata, fname
434452

435453

436-
def downloadlink(uripath, opt={}):
454+
def downloadlink(uripath, opt={}, **kwargs):
437455
opt.setdefault("showlink", 1)
438456

457+
if "nocache" in opt and opt["nocache"]:
458+
newdata = urllib.request.urlopen(uripath).read()
459+
try:
460+
newdata = loadts(newdata, opt, **kwargs)
461+
except:
462+
try:
463+
newdata = loadbs(newdata, opt, **kwargs)
464+
except:
465+
pass
466+
return newdata, uripath, None
467+
439468
newdata = []
440469
cachepath, filename = jsoncache(uripath)
441470
if isinstance(cachepath, list) and cachepath:

jdata/jpath.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
"""@package docstring
2+
JSONPath implementation ported from the jsonpath MATLAB function in JSONLab
3+
4+
Copyright (c) 2019-2024 Qianqian Fang <q.fang at neu.edu>
5+
"""
6+
7+
__all__ = [
8+
"jsonpath",
9+
]
10+
11+
##====================================================================================
12+
## dependent libraries
13+
##====================================================================================
14+
15+
16+
import re
17+
import json
18+
import copy
19+
20+
21+
def jsonpath(root, jpath, opt={}):
22+
23+
obj = root
24+
jpath = re.sub(r"([^.\]])(\[[-0-9:\*]+\])", r"\1.\2", jpath)
25+
jpath = re.sub(r"\[[\'\"]*([^]\'\"]+)[\'\"]*\]", r".[\1]", jpath)
26+
jpath = re.sub(r"\\.", "_0x2E_", jpath)
27+
while re.search(r"(\[[\'\"]*[^]\'\"]+)\.(?=[^]\'\"]+[\'\"]*\])", jpath):
28+
jpath = re.sub(
29+
r"(\[[\'\"]*[^]\'\"]+)\.(?=[^]\'\"]+[\'\"]*\])", r"\1_0x2E_", jpath
30+
)
31+
32+
paths = re.findall(r"(\.{0,2}[^.]+)", jpath)
33+
paths = [re.sub("_0x2E_", ".", x) for x in paths]
34+
if paths and paths[0] == "$":
35+
paths.pop(0)
36+
37+
for i, path in enumerate(paths):
38+
obj, isfound = getonelevel(obj, paths, i, opt)
39+
if not isfound:
40+
return None
41+
return obj
42+
43+
44+
def getonelevel(input_data, paths, pathid, opt):
45+
46+
opt.setdefault("inplace", False)
47+
48+
pathname = paths[pathid]
49+
if isinstance(pathname, list):
50+
pathname = pathname[0]
51+
deepscan = bool(re.search(r"^\.\.", pathname))
52+
origpath = pathname
53+
pathname = re.sub(r"^\.+", "", pathname)
54+
obj = None
55+
isfound = False
56+
57+
if pathname == "$":
58+
obj = input_data
59+
elif re.match(r"\$\d+", pathname):
60+
obj = input_data[int(pathname[2:]) + 1]
61+
elif re.match(r"^\[[\-0-9\*:]+\]$", pathname) or isinstance(
62+
input_data, (list, tuple, frozenset)
63+
):
64+
arraystr = pathname[1:-1]
65+
arrayrange = {"start": None, "end": None}
66+
67+
if ":" in arraystr:
68+
match = re.search(r"(?P<start>-*\d*):(?P<end>-*\d*)", arraystr)
69+
if match:
70+
arrayrange["start"] = (
71+
int(match.group("start")) if match.group("start") else None
72+
)
73+
arrayrange["end"] = (
74+
int(match.group("end")) if match.group("end") else None
75+
)
76+
77+
if arrayrange["start"] is not None:
78+
if arrayrange["start"] < 0:
79+
arrayrange["start"] = len(input_data) + arrayrange["start"]
80+
else:
81+
arrayrange["start"] += 1
82+
else:
83+
arrayrange["start"] = 1
84+
85+
if arrayrange["end"] is not None:
86+
if arrayrange["end"] < 0:
87+
arrayrange["end"] = len(input_data) + arrayrange["end"]
88+
else:
89+
arrayrange["end"] += 1
90+
else:
91+
arrayrange["end"] = len(input_data)
92+
elif re.match(r"^[-0-9:]+$", arraystr):
93+
firstidx = int(arraystr)
94+
if firstidx < 0:
95+
firstidx = len(input_data) + firstidx + 1
96+
else:
97+
firstidx += 1
98+
arrayrange["start"] = arrayrange["end"] = firstidx
99+
elif re.match(r"^\*$", arraystr):
100+
pass
101+
102+
if (
103+
"arrayrange" in locals()
104+
and arrayrange["start"] is not None
105+
and arrayrange["end"] is not None
106+
):
107+
obj = input_data[arrayrange["start"] - 1 : arrayrange["end"]]
108+
else:
109+
arrayrange = {"start": 1, "end": len(input_data)}
110+
111+
if not obj and isinstance(input_data, list):
112+
input_data = input_data[arrayrange["start"] - 1 : arrayrange["end"]]
113+
searchkey = ".." + pathname if deepscan else origpath
114+
newobj = []
115+
for idx, item in enumerate(input_data):
116+
val, isfound = getonelevel(
117+
item, paths[:pathid] + [searchkey], pathid, opt
118+
)
119+
if isfound:
120+
newobj.extend(val)
121+
if newobj:
122+
obj = newobj
123+
if isinstance(obj, list) and len(obj) == 1:
124+
obj = obj[0]
125+
126+
elif isinstance(input_data, dict):
127+
pathname = re.sub(r"^\[(.*)\]$", r"\1", pathname)
128+
stpath = pathname
129+
130+
if stpath in input_data:
131+
obj = [input_data[stpath]]
132+
133+
deepscan = False
134+
if obj is None or deepscan:
135+
items = input_data.keys()
136+
137+
for idx in items:
138+
val, isfound = getonelevel(
139+
input_data[idx], paths[:pathid] + [[".." + pathname]], pathid, opt
140+
)
141+
if isfound:
142+
obj = obj or []
143+
if isinstance(val, list):
144+
obj.extend(val)
145+
else:
146+
obj.append(val)
147+
148+
if obj and len(obj) == 1:
149+
obj = obj[0]
150+
151+
if isinstance(obj, list) and len(obj) == 1:
152+
obj = obj[0]
153+
154+
elif not deepscan:
155+
raise ValueError(
156+
f'json path segment "{pathname}" can not be found in the input_data object'
157+
)
158+
159+
if obj is None:
160+
isfound = False
161+
obj = []
162+
else:
163+
isfound = True
164+
165+
return (copy.deepcopy(obj), isfound) if opt["inplace"] else (obj, isfound)

0 commit comments

Comments
 (0)