22Encoding and decoding python native data structures as
33portable JData-spec annotated dict structure
44
5- Copyright (c) 2019-2022 Qianqian Fang <q.fang at neu.edu>
5+ Copyright (c) 2019-2024 Qianqian Fang <q.fang at neu.edu>
66"""
77
88__all__ = ["encode" , "decode" , "jdtype" , "jsonfilter" ]
1515import copy
1616import zlib
1717import base64
18+ import os
19+ import re
20+ from .jfile import jdlink
1821
1922##====================================================================================
2023## global variables
@@ -131,12 +134,19 @@ def encode(d, opt={}):
131134 return newobj
132135 elif isinstance (d , np .ndarray ) or np .iscomplex (d ):
133136 newobj = {}
134- newobj ["_ArrayType_" ] = jdtype [str (d .dtype )] if (str (d .dtype ) in jdtype ) else str (d .dtype )
137+ newobj ["_ArrayType_" ] = (
138+ jdtype [str (d .dtype )] if (str (d .dtype ) in jdtype ) else str (d .dtype )
139+ )
135140 if np .isscalar (d ):
136141 newobj ["_ArraySize_" ] = 1
137142 else :
138143 newobj ["_ArraySize_" ] = list (d .shape )
139- if d .dtype == np .complex64 or d .dtype == np .complex128 or d .dtype == np .csingle or d .dtype == np .cdouble :
144+ if (
145+ d .dtype == np .complex64
146+ or d .dtype == np .complex128
147+ or d .dtype == np .csingle
148+ or d .dtype == np .cdouble
149+ ):
140150 newobj ["_ArrayIsComplex_" ] = True
141151 newobj ["_ArrayData_" ] = np .stack ((d .ravel ().real , d .ravel ().imag ))
142152 else :
@@ -158,15 +168,23 @@ def encode(d, opt={}):
158168 newobj ["_ArrayZipData_" ] = gzipper .compress (newobj ["_ArrayZipData_" ])
159169 elif opt ["compression" ] == "lzma" :
160170 try :
161- newobj ["_ArrayZipData_" ] = lzma .compress (newobj ["_ArrayZipData_" ], lzma .FORMAT_ALONE )
171+ newobj ["_ArrayZipData_" ] = lzma .compress (
172+ newobj ["_ArrayZipData_" ], lzma .FORMAT_ALONE
173+ )
162174 except Exception :
163- print ('you must install "lzma" module to compress with this format, ignoring' )
175+ print (
176+ 'you must install "lzma" module to compress with this format, ignoring'
177+ )
164178 pass
165179 elif opt ["compression" ] == "lz4" :
166180 try :
167- newobj ["_ArrayZipData_" ] = lz4 .frame .compress (newobj ["_ArrayZipData_" ].tobytes ())
181+ newobj ["_ArrayZipData_" ] = lz4 .frame .compress (
182+ newobj ["_ArrayZipData_" ].tobytes ()
183+ )
168184 except ImportError :
169- print ('you must install "lz4" module to compress with this format, ignoring' )
185+ print (
186+ 'you must install "lz4" module to compress with this format, ignoring'
187+ )
170188 pass
171189 elif opt ["compression" ].startswith ("blosc2" ):
172190 try :
@@ -187,9 +205,13 @@ def encode(d, opt={}):
187205 nthreads = blosc2nthread ,
188206 )
189207 except ImportError :
190- print ('you must install "blosc2" module to compress with this format, ignoring' )
208+ print (
209+ 'you must install "blosc2" module to compress with this format, ignoring'
210+ )
191211 pass
192- if (("base64" in opt ) and (opt ["base64" ])) or opt ["compression" ] == "base64" :
212+ if (("base64" in opt ) and (opt ["base64" ])) or opt [
213+ "compression"
214+ ] == "base64" :
193215 newobj ["_ArrayZipData_" ] = base64 .b64encode (newobj ["_ArrayZipData_" ])
194216 newobj .pop ("_ArrayData_" )
195217 return newobj
@@ -214,8 +236,14 @@ def decode(d, opt={}):
214236 """
215237
216238 opt .setdefault ("inplace" , False )
217-
218- if (isinstance (d , str ) or type (d ) == "unicode" ) and len (d ) <= 6 and len (d ) > 4 and d [- 1 ] == "_" :
239+ opt .setdefault ("maxlinklevel" , 0 )
240+
241+ if (
242+ (isinstance (d , str ) or type (d ) == "unicode" )
243+ and len (d ) <= 6
244+ and len (d ) > 4
245+ and d [- 1 ] == "_"
246+ ):
219247 if d == "_NaN_" :
220248 return float ("nan" )
221249 elif d == "_Inf_" :
@@ -233,12 +261,16 @@ def decode(d, opt={}):
233261 d ["_ArraySize_" ] = np .frombuffer (bytearray (d ["_ArraySize_" ]))
234262 if "_ArrayZipData_" in d :
235263 newobj = d ["_ArrayZipData_" ]
236- if (("base64" in opt ) and (opt ["base64" ])) or ("_ArrayZipType_" in d and d ["_ArrayZipType_" ] == "base64" ):
264+ if (("base64" in opt ) and (opt ["base64" ])) or (
265+ "_ArrayZipType_" in d and d ["_ArrayZipType_" ] == "base64"
266+ ):
237267 newobj = base64 .b64decode (newobj )
238268 if "_ArrayZipType_" in d and d ["_ArrayZipType_" ] not in _zipper :
239269 raise Exception (
240270 "JData" ,
241- "compression method {} is not supported" .format (d ["_ArrayZipType_" ]),
271+ "compression method {} is not supported" .format (
272+ d ["_ArrayZipType_" ]
273+ ),
242274 )
243275 if d ["_ArrayZipType_" ] == "zlib" :
244276 newobj = zlib .decompress (bytes (newobj ))
@@ -258,7 +290,9 @@ def decode(d, opt={}):
258290
259291 newobj = lz4 .frame .decompress (bytes (newobj ))
260292 except Exception :
261- print ('Warning: you must install "lz4" module to decompress a data record in this file, ignoring' )
293+ print (
294+ 'Warning: you must install "lz4" module to decompress a data record in this file, ignoring'
295+ )
262296 return copy .deepcopy (d ) if opt ["inplace" ] else d
263297 elif d ["_ArrayZipType_" ].startswith ("blosc2" ):
264298 try :
@@ -267,15 +301,23 @@ def decode(d, opt={}):
267301 blosc2nthread = 1
268302 if "nthread" in opt :
269303 blosc2nthread = opt ["nthread" ]
270- newobj = blosc2 .decompress2 (bytes (newobj ), as_bytearray = False , nthreads = blosc2nthread )
304+ newobj = blosc2 .decompress2 (
305+ bytes (newobj ), as_bytearray = False , nthreads = blosc2nthread
306+ )
271307 except Exception :
272- print ('Warning: you must install "blosc2" module to decompress a data record in this file, ignoring' )
308+ print (
309+ 'Warning: you must install "blosc2" module to decompress a data record in this file, ignoring'
310+ )
273311 return copy .deepcopy (d ) if opt ["inplace" ] else d
274- newobj = np .frombuffer (bytearray (newobj ), dtype = np .dtype (d ["_ArrayType_" ])).reshape (d ["_ArrayZipSize_" ])
312+ newobj = np .frombuffer (
313+ bytearray (newobj ), dtype = np .dtype (d ["_ArrayType_" ])
314+ ).reshape (d ["_ArrayZipSize_" ])
275315 if "_ArrayIsComplex_" in d and newobj .shape [0 ] == 2 :
276316 newobj = newobj [0 ] + 1j * newobj [1 ]
277317 if "_ArrayOrder_" in d and (
278- d ["_ArrayOrder_" ].lower () == "c" or d ["_ArrayOrder_" ].lower () == "col" or d ["_ArrayOrder_" ].lower () == "column"
318+ d ["_ArrayOrder_" ].lower () == "c"
319+ or d ["_ArrayOrder_" ].lower () == "col"
320+ or d ["_ArrayOrder_" ].lower () == "column"
279321 ):
280322 newobj = newobj .reshape (d ["_ArraySize_" ], order = "F" )
281323 else :
@@ -285,17 +327,25 @@ def decode(d, opt={}):
285327 return newobj
286328 elif "_ArrayData_" in d :
287329 if isinstance (d ["_ArrayData_" ], str ):
288- newobj = np .frombuffer (d ["_ArrayData_" ], dtype = np .dtype (d ["_ArrayType_" ]))
330+ newobj = np .frombuffer (
331+ d ["_ArrayData_" ], dtype = np .dtype (d ["_ArrayType_" ])
332+ )
289333 else :
290- newobj = np .asarray (d ["_ArrayData_" ], dtype = np .dtype (d ["_ArrayType_" ]))
334+ newobj = np .asarray (
335+ d ["_ArrayData_" ], dtype = np .dtype (d ["_ArrayType_" ])
336+ )
291337 if "_ArrayZipSize_" in d and newobj .shape [0 ] == 1 :
292338 if isinstance (d ["_ArrayZipSize_" ], str ):
293- d ["_ArrayZipSize_" ] = np .frombuffer (bytearray (d ["_ArrayZipSize_" ]))
339+ d ["_ArrayZipSize_" ] = np .frombuffer (
340+ bytearray (d ["_ArrayZipSize_" ])
341+ )
294342 newobj = newobj .reshape (d ["_ArrayZipSize_" ])
295343 if "_ArrayIsComplex_" in d and newobj .shape [0 ] == 2 :
296344 newobj = newobj [0 ] + 1j * newobj [1 ]
297345 if "_ArrayOrder_" in d and (
298- d ["_ArrayOrder_" ].lower () == "c" or d ["_ArrayOrder_" ].lower () == "col" or d ["_ArrayOrder_" ].lower () == "column"
346+ d ["_ArrayOrder_" ].lower () == "c"
347+ or d ["_ArrayOrder_" ].lower () == "col"
348+ or d ["_ArrayOrder_" ].lower () == "column"
299349 ):
300350 newobj = newobj .reshape (d ["_ArraySize_" ], order = "F" )
301351 else :
@@ -308,6 +358,33 @@ def decode(d, opt={}):
308358 "JData" ,
309359 "one and only one of _ArrayData_ or _ArrayZipData_ is required" ,
310360 )
361+ elif "_DataLink_" in d :
362+ if opt ["maxlinklevel" ] > 0 and "_DataLink_" in data :
363+ if isinstance (data ["_DataLink_" ], str ):
364+ datalink = data ["_DataLink_" ]
365+ if re .search ("\:\$" , datalink ):
366+ ref = re .search (
367+ "^(?P<proto>[a-zA-Z]+://)*(?P<path>.+)(?P<delim>\:)()*(?P<jsonpath>(?<=:)\$\d*\.*.*)*" ,
368+ datalink ,
369+ )
370+ else :
371+ ref = re .search (
372+ "^(?P<proto>[a-zA-Z]+://)*(?P<path>.+)(?P<delim>\:)*(?P<jsonpath>(?<=:)\$\d*\..*)*" ,
373+ datalink ,
374+ )
375+ if ref and ref .group ("path" ):
376+ uripath = ref .group ("proto" ) + ref .group ("path" )
377+ newobj , fname = jdlink (uripath )
378+ if os .path .exists (fname ):
379+ opt ["maxlinklevel" ] = opt ["maxlinklevel" ] - 1
380+ if ref .group ("jsonpath" ):
381+ newobj = jsonpath (newdata , ref .group ("jsonpath" ))
382+ return nrewobj
383+ else :
384+ raise Exception (
385+ "JData" ,
386+ "_DataLink_ contains invalid URL" ,
387+ )
311388 return decodedict (d , opt )
312389 else :
313390 return copy .deepcopy (d ) if opt ["inplace" ] else d
0 commit comments