Skip to content

Commit 2eb24ec

Browse files
authored
Add files via upload
1 parent 50fd10c commit 2eb24ec

1 file changed

Lines changed: 94 additions & 38 deletions

File tree

pycatfile.py

Lines changed: 94 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2066,31 +2066,48 @@ def MkTempFile(data=None,
20662066
prefix=__program_name__,
20672067
delete=True,
20682068
encoding="utf-8",
2069-
newline=None, # text mode only; in-memory objects ignore newline semantics
2069+
newline=None,
2070+
text_errors="strict",
20702071
dir=None,
20712072
suffix="",
20722073
use_spool=__use_spoolfile__,
2074+
autoswitch_spool=False,
20732075
spool_max=__spoolfile_size__,
2074-
spool_dir=__use_spooldir__):
2076+
spool_dir=__use_spooldir__,
2077+
reset_to_start=True,
2078+
memfd_name=None,
2079+
memfd_allow_sealing=False,
2080+
memfd_flags_extra=0,
2081+
on_create=None):
20752082
"""
20762083
Return a file-like handle with consistent behavior on Py2.7 and Py3.x.
20772084

20782085
Storage:
2079-
- inmem=True -> BytesIO (bytes) or StringIO (text), or memfd for bytes if available
2080-
- inmem=False, use_spool=True -> SpooledTemporaryFile (binary), optionally TextIOWrapper for text
2081-
- inmem=False, use_spool=False -> NamedTemporaryFile (binary), optionally TextIOWrapper for text
2086+
- inmem=True, usememfd=True, isbytes=True and memfd available
2087+
-> memfd-backed anonymous file (binary)
2088+
- inmem=True, otherwise
2089+
-> BytesIO (bytes) or StringIO (text)
2090+
- inmem=False, use_spool=True
2091+
-> SpooledTemporaryFile (binary), optionally TextIOWrapper for text
2092+
- inmem=False, use_spool=False
2093+
-> NamedTemporaryFile (binary), optionally TextIOWrapper for text
20822094

20832095
Text vs bytes:
20842096
- isbytes=True -> file expects bytes; 'data' must be bytes-like
2085-
- isbytes=False -> file expects text; 'data' must be text (unicode/str). Newline translation and encoding
2086-
apply only for spooled/named files (not BytesIO/StringIO).
2097+
- isbytes=False -> file expects text; 'data' must be text (unicode/str). Newline translation and
2098+
encoding apply only for spooled/named files (not BytesIO/StringIO).
20872099

20882100
Notes:
2089-
- On Windows, NamedTemporaryFile(delete=True) keeps the file open and cannot be reopened by other processes.
2090-
Use delete=False if you need to pass the path elsewhere.
2091-
- For text: in-memory StringIO ignores 'newline' (as usual).
2092-
- When available, memfd is used only for inmem=True and isbytes=True, providing an anonymous in-memory
2093-
file descriptor (Linux-only). Text in-memory still uses StringIO to preserve newline semantics.
2101+
- On Windows, NamedTemporaryFile(delete=True) keeps the file open and cannot be reopened by
2102+
other processes. Use delete=False if you need to pass the path elsewhere.
2103+
- For text: in-memory StringIO ignores 'newline' and 'text_errors' (as usual).
2104+
- When available, and if usememfd=True, memfd is used only for inmem=True and isbytes=True,
2105+
providing an anonymous in-memory file descriptor (Linux-only). Text in-memory still uses
2106+
StringIO to preserve newline semantics.
2107+
- If autoswitch_spool=True and initial data size exceeds spool_max, in-memory storage is
2108+
skipped and a spooled file is used instead (if use_spool=True).
2109+
- If on_create is not None, it is called as on_create(fp, kind) where kind is one of:
2110+
"memfd", "bytesio", "stringio", "spool", "disk".
20942111
"""
20952112

20962113
# -- sanitize simple params (avoid None surprises) --
@@ -2122,39 +2139,65 @@ def MkTempFile(data=None,
21222139
else:
21232140
init = None
21242141

2142+
# Size of init for autoswitch; only meaningful for bytes
2143+
init_len = len(init) if (init is not None and isbytes) else None
2144+
21252145
# -------- In-memory --------
21262146
if inmem:
2127-
# Use memfd only for bytes, and only where available (Linux, Python 3.8+)
2128-
if usememfd and isbytes and hasattr(os, "memfd_create"):
2129-
flags = 0
2130-
# Close-on-exec is almost always what you want for temps
2131-
if hasattr(os, "MFD_CLOEXEC"):
2132-
flags |= os.MFD_CLOEXEC
2133-
2134-
fd = os.memfd_create(prefix, flags)
2135-
# Binary read/write file-like object backed by RAM
2136-
f = os.fdopen(fd, "w+b")
2137-
2138-
if init is not None:
2139-
f.write(init)
2140-
f.seek(0)
2141-
return f
2147+
# If autoswitch is enabled and data is larger than spool_max, and
2148+
# spooling is allowed, skip the in-memory branch and fall through
2149+
# to the spool/disk logic below.
2150+
if autoswitch_spool and use_spool and init_len is not None and init_len > spool_max:
2151+
pass # fall through to spool/disk sections
2152+
else:
2153+
# Use memfd only for bytes, and only where available (Linux, Python 3.8+)
2154+
if usememfd and isbytes and hasattr(os, "memfd_create"):
2155+
name = memfd_name or prefix or "MkTempFile"
2156+
flags = 0
2157+
# Close-on-exec is almost always what you want for temps
2158+
if hasattr(os, "MFD_CLOEXEC"):
2159+
flags |= os.MFD_CLOEXEC
2160+
# Optional sealing support if requested and available
2161+
if memfd_allow_sealing and hasattr(os, "MFD_ALLOW_SEALING"):
2162+
flags |= os.MFD_ALLOW_SEALING
2163+
# Extra custom flags (e.g. hugepage flags) if caller wants them
2164+
if memfd_flags_extra:
2165+
flags |= memfd_flags_extra
2166+
2167+
fd = os.memfd_create(name, flags)
2168+
# Binary read/write file-like object backed by RAM
2169+
f = os.fdopen(fd, "w+b")
2170+
2171+
if init is not None:
2172+
f.write(init)
2173+
if reset_to_start:
2174+
f.seek(0)
2175+
2176+
if on_create is not None:
2177+
on_create(f, "memfd")
2178+
return f
2179+
2180+
# Fallback: pure Python in-memory objects
2181+
if isbytes:
2182+
f = io.BytesIO(init if init is not None else b"")
2183+
kind = "bytesio"
2184+
else:
2185+
# newline/text_errors not enforced for StringIO; matches stdlib semantics
2186+
f = io.StringIO(init if init is not None else "")
2187+
kind = "stringio"
21422188

2143-
# Fallback: pure Python in-memory objects
2144-
if isbytes:
2145-
f = io.BytesIO(init if init is not None else b"")
2146-
else:
2147-
# newline not enforced for StringIO; matches stdlib semantics
2148-
f = io.StringIO(init if init is not None else "")
2189+
if reset_to_start:
2190+
f.seek(0)
21492191

2150-
f.seek(0)
2151-
return f
2192+
if on_create is not None:
2193+
on_create(f, kind)
2194+
return f
21522195

21532196
# Helper: wrap a binary file into a text file with encoding/newline
21542197
def _wrap_text(handle):
21552198
# For both Py2 & Py3, TextIOWrapper gives consistent newline/encoding behavior
2156-
tw = io.TextIOWrapper(handle, encoding=encoding, newline=newline)
2157-
return tw
2199+
return io.TextIOWrapper(handle, encoding=encoding,
2200+
newline=newline, errors=text_errors)
21582201

21592202
# -------- Spooled (RAM then disk) --------
21602203
if use_spool:
@@ -2165,17 +2208,30 @@ def _wrap_text(handle):
21652208

21662209
if init is not None:
21672210
f.write(init)
2211+
if reset_to_start:
2212+
f.seek(0)
2213+
elif reset_to_start:
21682214
f.seek(0)
2215+
2216+
if on_create is not None:
2217+
on_create(f, "spool")
21692218
return f
21702219

21712220
# -------- On-disk temp (NamedTemporaryFile) --------
21722221
# Always create binary file; wrap for text if needed for uniform Py2/3 behavior
2173-
b = tempfile.NamedTemporaryFile(mode="w+b", prefix=prefix, suffix=suffix, dir=dir, delete=delete)
2222+
b = tempfile.NamedTemporaryFile(mode="w+b", prefix=prefix, suffix=suffix,
2223+
dir=dir, delete=delete)
21742224
f = b if isbytes else _wrap_text(b)
21752225

21762226
if init is not None:
21772227
f.write(init)
2228+
if reset_to_start:
2229+
f.seek(0)
2230+
elif reset_to_start:
21782231
f.seek(0)
2232+
2233+
if on_create is not None:
2234+
on_create(f, "disk")
21792235
return f
21802236

21812237

0 commit comments

Comments
 (0)