Skip to content

Commit 322ed7b

Browse files
committed
Tweak python implementation to work on both py2 & py3 + add docstrings
1 parent 017bab6 commit 322ed7b

1 file changed

Lines changed: 94 additions & 31 deletions

File tree

jsonpath.py

Lines changed: 94 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,30 @@
11
#!/usr/bin/env python3
22

33
from __future__ import print_function
4-
import re
54
import sys
5+
import re
66

7-
def main(args):
8-
output = None
9-
if args.line == True:
10-
parts = re.split("[,.:/-]", args.target)
11-
line = int(parts[0])
12-
column = 1 if len(parts) == 1 else int(parts[1])
13-
res = scan_file(args.file, line=line, column=column, verbose=args.verbose)
14-
if res:
15-
output = args.delimeter.join(str(x) for x in res)
16-
else:
17-
path = args.target.split(args.delimeter)
18-
res = scan_file(args.file, path=path, verbose=args.verbose)
19-
if res:
20-
output = ":".join(str(x) for x in res)
21-
if output:
22-
print(output)
23-
else:
24-
print(f"'{args.target}' not found", file=sys.stderr)
25-
sys.exit(2)
7+
# open() doesn't support encoding in python2, but codecs.open does
8+
if (sys.version_info < (3, 0)):
9+
from codecs import open
2610

11+
def scan_stream(stream, path=[], line=-1, column=-1, verbose=False):
12+
"""
13+
Scans the given stream (CountingStream instance) for a given path, or
14+
line/column offset.
2715
28-
def scan_file(file, path=[], line=-1, column=-1, verbose=False):
16+
If a path is specified the function will search for it, returning (line,
17+
column) of where the value is declared if found.
18+
19+
If a positive line and column is provided the function will instead return
20+
the path (as a list) at that offset.
21+
"""
2922
searching = len(path) > 0
3023

3124
if not searching and (line < 0 or column < 0):
3225
raise ValueError("Must provide a non-empty 'path' or 'line' and 'column' >=0")
3326

3427
# Parser state
35-
stream = CountingStream(file)
3628
in_key = False
3729
quoted = False
3830
key = "" # using a string array seems to perform worse
@@ -48,7 +40,13 @@ def scan_file(file, path=[], line=-1, column=-1, verbose=False):
4840
break
4941

5042
if verbose:
51-
print(f"{stream.lnum}:{stream.cnum} {char} quoted={quoted} in_key={in_key} key={key}")
43+
print("%d:%d %c quoted=%s in_key=%s key=%s",
44+
stream.lnum,
45+
stream.cnum,
46+
char,
47+
quoted,
48+
in_key,
49+
key)
5250

5351
if char == "\\":
5452
decoded = read_escape(stream)
@@ -123,6 +121,9 @@ def scan_file(file, path=[], line=-1, column=-1, verbose=False):
123121
ESCAPE_MAP = {"n":"\n","t":"\t","r":"\r","b":"\b","f":"\f"}
124122

125123
def read_escape(stream):
124+
"""
125+
Reads a escape code from stream, returning the decoded character.
126+
"""
126127
char = stream.read()
127128
mapped = ESCAPE_MAP.get(char, None)
128129
if mapped is not None:
@@ -138,16 +139,20 @@ def read_escape(stream):
138139
return unichr(r)
139140

140141

141-
class CountingStream(object):
142-
def __init__(self, file):
143-
self.file = file
142+
class CountingStream:
143+
"""
144+
Wraps a readable text stream accessible one character at a time.
145+
Keeps track of current line (lnum) and column number (cnum).
146+
"""
147+
def __init__(self):
144148
self.lnum = 1
145-
self.cnum = 0
149+
self.cnum = 1
150+
151+
def _read(self):
152+
raise TypeError("Use a CountingStream subclass implementing _read()")
146153

147154
def read(self, advance=True):
148-
# TODO: Any way to optimize using something from this?
149-
# https://stackoverflow.com/a/59013806/1527562
150-
char = self.file.read(1)
155+
char = self._read()
151156
if advance is True or (callable(advance) and advance(char)):
152157
if char == "\n":
153158
self.lnum += 1
@@ -164,13 +169,71 @@ def skip_spaces(self):
164169
break
165170
return char
166171

172+
173+
class CountingLines(CountingStream):
174+
"""
175+
CountingStream for streams accessible via line lists.
176+
"""
177+
def __init__(self, lines):
178+
CountingStream.__init__(self)
179+
self.lines = lines
180+
self.line_count = len(self.lines)
181+
182+
def _read(self):
183+
if self.lnum >= self.line_count:
184+
return ""
185+
line = self.lines[self.lnum - 1]
186+
if self.cnum > len(line):
187+
return "\n"
188+
return line[self.cnum - 1]
189+
190+
191+
class CountingFile(CountingStream):
192+
"""
193+
CountingStream for file objects (returned via `open()`).
194+
"""
195+
def __init__(self, file):
196+
CountingStream.__init__(self)
197+
self.file = file
198+
199+
def _read(self):
200+
return self.file.read(1)
201+
202+
203+
def main(args):
204+
"""
205+
Entrypoint for CLI usage.
206+
"""
207+
file = sys.stdin
208+
if args.file != "-":
209+
file = open(args.file, "r", buffering=2048, encoding="utf-8")
210+
stream = CountingFile(file)
211+
output = None
212+
if args.line == True:
213+
parts = re.split("[,.:/-]", args.target)
214+
line = int(parts[0])
215+
column = 1 if len(parts) == 1 else int(parts[1])
216+
res = scan_stream(stream, line=line, column=column, verbose=args.verbose)
217+
if res:
218+
output = args.delimeter.join(str(x) for x in res)
219+
else:
220+
path = args.target.split(args.delimeter)
221+
res = scan_stream(stream, path=path, verbose=args.verbose)
222+
if res:
223+
output = ":".join(str(x) for x in res)
224+
if output:
225+
print(output)
226+
else:
227+
print("'%s' not found" % (args.target), file=sys.stderr)
228+
sys.exit(2)
229+
230+
167231
if __name__ == "__main__":
168232
import argparse
169233
parser = argparse.ArgumentParser()
170234

171235
parser.add_argument(
172236
"file",
173-
type=argparse.FileType("r", 2048, "utf-8"),
174237
default="-",
175238
help="json to parse")
176239

0 commit comments

Comments
 (0)