11#!/usr/bin/env python3
22
33from __future__ import print_function
4- import re
54import sys
5+ import re
66
7- def main (args ):
8- output = None
9- if args .line == True :
10- parts = re .split ("[,.:/-]" , args .target )
11- line = int (parts [0 ])
12- column = 1 if len (parts ) == 1 else int (parts [1 ])
13- res = scan_file (args .file , line = line , column = column , verbose = args .verbose )
14- if res :
15- output = args .delimeter .join (str (x ) for x in res )
16- else :
17- path = args .target .split (args .delimeter )
18- res = scan_file (args .file , path = path , verbose = args .verbose )
19- if res :
20- output = ":" .join (str (x ) for x in res )
21- if output :
22- print (output )
23- else :
24- print (f"'{ args .target } ' not found" , file = sys .stderr )
25- sys .exit (2 )
7+ # open() doesn't support encoding in python2, but codecs.open does
8+ if (sys .version_info < (3 , 0 )):
9+ from codecs import open
2610
11+ def scan_stream (stream , path = [], line = - 1 , column = - 1 , verbose = False ):
12+ """
13+ Scans the given stream (CountingStream instance) for a given path, or
14+ line/column offset.
2715
28- def scan_file (file , path = [], line = - 1 , column = - 1 , verbose = False ):
16+ If a path is specified the function will search for it, returning (line,
17+ column) of where the value is declared if found.
18+
19+ If a positive line and column is provided the function will instead return
20+ the path (as a list) at that offset.
21+ """
2922 searching = len (path ) > 0
3023
3124 if not searching and (line < 0 or column < 0 ):
3225 raise ValueError ("Must provide a non-empty 'path' or 'line' and 'column' >=0" )
3326
3427 # Parser state
35- stream = CountingStream (file )
3628 in_key = False
3729 quoted = False
3830 key = "" # using a string array seems to perform worse
@@ -48,7 +40,13 @@ def scan_file(file, path=[], line=-1, column=-1, verbose=False):
4840 break
4941
5042 if verbose :
51- print (f"{ stream .lnum } :{ stream .cnum } { char } quoted={ quoted } in_key={ in_key } key={ key } " )
43+ print ("%d:%d %c quoted=%s in_key=%s key=%s" ,
44+ stream .lnum ,
45+ stream .cnum ,
46+ char ,
47+ quoted ,
48+ in_key ,
49+ key )
5250
5351 if char == "\\ " :
5452 decoded = read_escape (stream )
@@ -123,6 +121,9 @@ def scan_file(file, path=[], line=-1, column=-1, verbose=False):
123121ESCAPE_MAP = {"n" :"\n " ,"t" :"\t " ,"r" :"\r " ,"b" :"\b " ,"f" :"\f " }
124122
125123def read_escape (stream ):
124+ """
125+ Reads a escape code from stream, returning the decoded character.
126+ """
126127 char = stream .read ()
127128 mapped = ESCAPE_MAP .get (char , None )
128129 if mapped is not None :
@@ -138,16 +139,20 @@ def read_escape(stream):
138139 return unichr (r )
139140
140141
141- class CountingStream (object ):
142- def __init__ (self , file ):
143- self .file = file
142+ class CountingStream :
143+ """
144+ Wraps a readable text stream accessible one character at a time.
145+ Keeps track of current line (lnum) and column number (cnum).
146+ """
147+ def __init__ (self ):
144148 self .lnum = 1
145- self .cnum = 0
149+ self .cnum = 1
150+
151+ def _read (self ):
152+ raise TypeError ("Use a CountingStream subclass implementing _read()" )
146153
147154 def read (self , advance = True ):
148- # TODO: Any way to optimize using something from this?
149- # https://stackoverflow.com/a/59013806/1527562
150- char = self .file .read (1 )
155+ char = self ._read ()
151156 if advance is True or (callable (advance ) and advance (char )):
152157 if char == "\n " :
153158 self .lnum += 1
@@ -164,13 +169,71 @@ def skip_spaces(self):
164169 break
165170 return char
166171
172+
173+ class CountingLines (CountingStream ):
174+ """
175+ CountingStream for streams accessible via line lists.
176+ """
177+ def __init__ (self , lines ):
178+ CountingStream .__init__ (self )
179+ self .lines = lines
180+ self .line_count = len (self .lines )
181+
182+ def _read (self ):
183+ if self .lnum >= self .line_count :
184+ return ""
185+ line = self .lines [self .lnum - 1 ]
186+ if self .cnum > len (line ):
187+ return "\n "
188+ return line [self .cnum - 1 ]
189+
190+
191+ class CountingFile (CountingStream ):
192+ """
193+ CountingStream for file objects (returned via `open()`).
194+ """
195+ def __init__ (self , file ):
196+ CountingStream .__init__ (self )
197+ self .file = file
198+
199+ def _read (self ):
200+ return self .file .read (1 )
201+
202+
203+ def main (args ):
204+ """
205+ Entrypoint for CLI usage.
206+ """
207+ file = sys .stdin
208+ if args .file != "-" :
209+ file = open (args .file , "r" , buffering = 2048 , encoding = "utf-8" )
210+ stream = CountingFile (file )
211+ output = None
212+ if args .line == True :
213+ parts = re .split ("[,.:/-]" , args .target )
214+ line = int (parts [0 ])
215+ column = 1 if len (parts ) == 1 else int (parts [1 ])
216+ res = scan_stream (stream , line = line , column = column , verbose = args .verbose )
217+ if res :
218+ output = args .delimeter .join (str (x ) for x in res )
219+ else :
220+ path = args .target .split (args .delimeter )
221+ res = scan_stream (stream , path = path , verbose = args .verbose )
222+ if res :
223+ output = ":" .join (str (x ) for x in res )
224+ if output :
225+ print (output )
226+ else :
227+ print ("'%s' not found" % (args .target ), file = sys .stderr )
228+ sys .exit (2 )
229+
230+
167231if __name__ == "__main__" :
168232 import argparse
169233 parser = argparse .ArgumentParser ()
170234
171235 parser .add_argument (
172236 "file" ,
173- type = argparse .FileType ("r" , 2048 , "utf-8" ),
174237 default = "-" ,
175238 help = "json to parse" )
176239
0 commit comments