Skip to content

Commit d65e2fa

Browse files
author
Kazuki Suzuki Przyborowski
committed
Create pyneofile_light.py
1 parent 4bccad5 commit d65e2fa

1 file changed

Lines changed: 296 additions & 0 deletions

File tree

pyneofile_light.py

Lines changed: 296 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,296 @@
1+
#!/usr/bin/env python
2+
# -*- coding: UTF-8 -*-
3+
# pyneofile_light.py - lightweight pack/unpack/list with Py2/3 compatibility.
4+
5+
import os, sys, io, json, time, struct
6+
7+
PY2 = (sys.version_info[0] == 2)
8+
9+
try:
10+
basestring
11+
except NameError:
12+
basestring = (str,)
13+
14+
try:
15+
unicode # Py2
16+
except NameError:
17+
unicode = str
18+
19+
def to_text(s, encoding="utf-8", errors="strict"):
20+
if s is None:
21+
return u""
22+
if isinstance(s, unicode):
23+
return s
24+
if isinstance(s, (bytes, bytearray)):
25+
return s.decode(encoding, errors)
26+
return unicode(s)
27+
28+
def to_bytes(s, encoding="utf-8", errors="strict"):
29+
if s is None:
30+
return b""
31+
if isinstance(s, (bytes, bytearray)):
32+
return bytes(s)
33+
return to_text(s, encoding, errors).encode(encoding, errors)
34+
35+
def normpath(p):
36+
s = to_text(p)
37+
s = s.replace("\\\\", "/")
38+
if "://" not in s:
39+
while "//" in s:
40+
s = s.replace("//", "/")
41+
return s.rstrip("/") or s
42+
43+
def ensure_dir(path):
44+
if not path:
45+
return
46+
if not os.path.isdir(path):
47+
os.makedirs(path)
48+
49+
_zlib = None
50+
_gzip = None
51+
_bz2 = None
52+
_lzma = None
53+
HAVE_LZMA = False
54+
55+
def _init_backends():
56+
global _zlib, _gzip, _bz2, _lzma, HAVE_LZMA
57+
if _zlib is None:
58+
import zlib as _z
59+
_zlib = _z
60+
if _gzip is None:
61+
import gzip as _g
62+
_gzip = _g
63+
if _bz2 is None:
64+
import bz2 as _b
65+
_bz2 = _b
66+
if _lzma is None:
67+
try:
68+
import lzma as _l
69+
_lzma = _l
70+
HAVE_LZMA = True
71+
except Exception:
72+
_lzma = None
73+
HAVE_LZMA = False
74+
75+
COMP_NONE = 0
76+
COMP_ZLIB = 1
77+
COMP_GZIP = 2
78+
COMP_BZ2 = 3
79+
COMP_XZ = 4
80+
81+
def compress(data, algo):
82+
_init_backends()
83+
if algo == COMP_NONE:
84+
return data
85+
if algo == COMP_ZLIB:
86+
return _zlib.compress(data, 6)
87+
if algo == COMP_GZIP:
88+
buf = io.BytesIO()
89+
gz = _gzip.GzipFile(fileobj=buf, mode='wb', compresslevel=6)
90+
try:
91+
gz.write(data)
92+
finally:
93+
gz.close()
94+
return buf.getvalue()
95+
if algo == COMP_BZ2:
96+
return _bz2.compress(data, 6) if hasattr(_bz2, "compress") else _bz2.BZ2Compressor(6).compress(data)
97+
if algo == COMP_XZ:
98+
if not HAVE_LZMA:
99+
raise RuntimeError("xz/lzma not available on this Python")
100+
return _lzma.compress(data, preset=6)
101+
raise ValueError("Unknown compression algorithm: %r" % algo)
102+
103+
def decompress(data, algo):
104+
_init_backends()
105+
if algo == COMP_NONE:
106+
return data
107+
if algo == COMP_ZLIB:
108+
return _zlib.decompress(data)
109+
if algo == COMP_GZIP:
110+
buf = io.BytesIO(data)
111+
gz = _gzip.GzipFile(fileobj=buf, mode='rb')
112+
try:
113+
return gz.read()
114+
finally:
115+
gz.close()
116+
if algo == COMP_BZ2:
117+
return _bz2.decompress(data)
118+
if algo == COMP_XZ:
119+
if not HAVE_LZMA:
120+
raise RuntimeError("xz/lzma not available on this Python")
121+
return _lzma.decompress(data)
122+
raise ValueError("Unknown compression algorithm: %r" % algo)
123+
124+
AUTO_XZ_MIN = 2 * 1024 * 1024
125+
AUTO_BZ2_MIN = 256 * 1024
126+
AUTO_ZLIB_MIN = 16 * 1024
127+
128+
def pick_algo(total_size):
129+
_init_backends()
130+
if total_size >= AUTO_XZ_MIN and HAVE_LZMA:
131+
return COMP_XZ
132+
if total_size >= AUTO_BZ2_MIN:
133+
return COMP_BZ2
134+
if total_size >= AUTO_ZLIB_MIN:
135+
return COMP_ZLIB
136+
return COMP_NONE
137+
138+
MAGIC = b"PNF1"
139+
140+
def _iter_files(sources):
141+
for src in sources:
142+
src = to_text(src)
143+
if os.path.isdir(src):
144+
base = src
145+
for root, _, files in os.walk(src):
146+
for f in files:
147+
p = os.path.join(root, f)
148+
st = os.stat(p)
149+
name = normpath(os.path.relpath(p, base))
150+
yield name, p, st.st_size, int(st.st_mtime)
151+
else:
152+
st = os.stat(src)
153+
name = normpath(os.path.basename(src))
154+
yield name, src, st.st_size, int(st.st_mtime)
155+
156+
def pack(sources, outfile, compression="auto"):
157+
assert isinstance(sources, (list, tuple)) and sources, "sources must be a non-empty list"
158+
algo_map = {
159+
'none': COMP_NONE, 'zlib': COMP_ZLIB, 'gzip': COMP_GZIP,
160+
'bz2': COMP_BZ2, 'xz': COMP_XZ, 'auto': 'auto',
161+
}
162+
if compression not in algo_map:
163+
raise ValueError("Unknown compression: %r" % compression)
164+
entries = list(_iter_files(sources))
165+
total_size = sum(s for _,__,s,___ in entries)
166+
algo = pick_algo(total_size) if compression == 'auto' else algo_map[compression]
167+
168+
manifest = {"files": [{"name": n, "size": s, "mtime": m} for (n, p, s, m) in entries]}
169+
hdr = to_bytes(json.dumps(manifest, sort_keys=True, separators=(',', ':')))
170+
flags = 0 if algo == COMP_NONE else 1
171+
header = MAGIC + struct.pack(">BBI", flags, algo, len(hdr)) + hdr
172+
173+
if outfile == "-" or outfile is None:
174+
out = sys.stdout if outfile == "-" else sys.stdout
175+
stream = getattr(out, "buffer", out)
176+
stream.write(header)
177+
raw = io.BytesIO()
178+
for name, path, size, mtime in entries:
179+
with open(path, "rb") as f:
180+
while True:
181+
chunk = f.read(1024 * 1024)
182+
if not chunk:
183+
break
184+
raw.write(chunk)
185+
payload = raw.getvalue()
186+
stream.write(compress(payload, algo))
187+
stream.flush()
188+
return
189+
190+
ensure_dir(os.path.dirname(outfile) or ".")
191+
with open(outfile, "wb") as f:
192+
f.write(header)
193+
raw = io.BytesIO()
194+
for name, path, size, mtime in entries:
195+
with open(path, "rb") as rf:
196+
while True:
197+
chunk = rf.read(1024 * 1024)
198+
if not chunk:
199+
break
200+
raw.write(chunk)
201+
payload = raw.getvalue()
202+
f.write(compress(payload, algo))
203+
204+
def _read_header(stream):
205+
magic = stream.read(4)
206+
if magic != MAGIC:
207+
raise ValueError("Not a PNF archive or corrupt magic: %r" % magic)
208+
flags, algo, hdrln = struct.unpack(">BBI", stream.read(6))
209+
hdr = stream.read(hdrln)
210+
man = json.loads(to_text(hdr))
211+
return flags, algo, man
212+
213+
def list_archive(infile):
214+
stream = sys.stdin if infile in (None, "-") else open(infile, "rb")
215+
stream = getattr(stream, "buffer", stream)
216+
try:
217+
_, algo, man = _read_header(stream)
218+
return man
219+
finally:
220+
if stream is not sys.stdin and stream is not getattr(sys.stdin, "buffer", sys.stdin):
221+
stream.close()
222+
223+
def unpack(infile, outdir):
224+
stream = sys.stdin if infile in (None, "-") else open(infile, "rb")
225+
stream = getattr(stream, "buffer", stream)
226+
try:
227+
flags, algo, man = _read_header(stream)
228+
payload = stream.read()
229+
data = decompress(payload, algo)
230+
cursor = 0
231+
ensure_dir(outdir or ".")
232+
for entry in man.get("files", []):
233+
name = normpath(entry["name"])
234+
size = int(entry["size"])
235+
mtime = int(entry.get("mtime", int(time.time())))
236+
outpath = os.path.join(outdir or ".", name)
237+
ensure_dir(os.path.dirname(outpath))
238+
with open(outpath, "wb") as f:
239+
f.write(data[cursor:cursor+size])
240+
os.utime(outpath, (mtime, mtime))
241+
cursor += size
242+
if cursor != len(data):
243+
sys.stderr.write("Warning: payload trailing bytes: %d\n" % (len(data)-cursor))
244+
finally:
245+
if stream is not sys.stdin and stream is not getattr(sys.stdin, "buffer", sys.stdin):
246+
stream.close()
247+
248+
def _usage():
249+
msg = (
250+
"pyneofile_light.py - lightweight pack/unpack/list\\n\\n"
251+
"Usage:\\n"
252+
" pack OUTFILE [--comp auto|none|zlib|gzip|bz2|xz] SRC [SRC...]\\n"
253+
" unpack INFILE OUTDIR\\n"
254+
" list INFILE\\n"
255+
)
256+
sys.stdout.write(msg)
257+
258+
def main(argv=None):
259+
argv = list(sys.argv[1:] if argv is None else argv)
260+
if not argv or argv[0] in ("-h", "--help"):
261+
_usage()
262+
return 0
263+
cmd = argv.pop(0)
264+
if cmd == "pack":
265+
if not argv:
266+
_usage(); return 2
267+
outfile = argv.pop(0)
268+
comp = "auto"
269+
if argv and argv[0] == "--comp":
270+
argv.pop(0)
271+
if not argv:
272+
_usage(); return 2
273+
comp = argv.pop(0)
274+
sources = argv
275+
if not sources:
276+
_usage(); return 2
277+
pack(sources, outfile, comp)
278+
return 0
279+
elif cmd == "unpack":
280+
if len(argv) < 2:
281+
_usage(); return 2
282+
infile, outdir = argv[0], argv[1]
283+
unpack(infile, outdir)
284+
return 0
285+
elif cmd == "list":
286+
if len(argv) < 1:
287+
_usage(); return 2
288+
man = list_archive(argv[0])
289+
sys.stdout.write(json.dumps(man, indent=2, sort_keys=True) + ("\n" if not PY2 else ""))
290+
return 0
291+
else:
292+
_usage()
293+
return 2
294+
295+
if __name__ == "__main__":
296+
sys.exit(main())

0 commit comments

Comments
 (0)