@@ -2885,6 +2885,7 @@ def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0
28852885 global JM_mupdf_show_errors
28862886 JM_mupdf_show_errors_old = JM_mupdf_show_errors
28872887 JM_mupdf_show_errors = 0
2888+
28882889 try:
28892890 self.is_closed = False
28902891 self.is_encrypted = False
@@ -2901,138 +2902,93 @@ def __init__(self, filename=None, stream=None, filetype=None, rect=None, width=0
29012902 self.this_is_pdf = True
29022903 return
29032904
2904- # Classic implementation temporarily sets JM_mupdf_show_errors=0 then
2905- # restores the previous value in `fz_always() {...}` before returning.
2906- #
2907-
2908- if not filename or type(filename) is str:
2909- pass
2910- elif hasattr(filename, "absolute"):
2911- filename = str(filename)
2912- elif hasattr(filename, "name"):
2913- filename = filename.name
2914- else:
2915- raise TypeError(f"bad filename: {type(filename)=} {filename=}.")
2916-
2917- if stream is not None:
2918- if isinstance(stream, (bytes, memoryview)):
2919- self.stream = stream
2920- elif isinstance(stream, bytearray):
2921- self.stream = bytes(stream)
2922- elif isinstance(stream, io.BytesIO):
2923- self.stream = stream.getvalue()
2924- else:
2925- raise TypeError(f"bad stream: {type(stream)=}.")
2926- stream = self.stream
2927- if not (filename or filetype):
2928- filename = 'pdf'
2929- else:
2930- self.stream = None
2931-
2932- if filename and self.stream is None:
2933- from_file = True
2934- self._name = filename
2935- else:
2936- from_file = False
2937- self._name = ""
2938-
2939- if from_file:
2940- if not os.path.exists(filename):
2941- msg = f"no such file: '{filename}'"
2942- raise FileNotFoundError(msg)
2943- elif not os.path.isfile(filename):
2944- msg = f"'{filename}' is no file"
2945- raise FileDataError(msg)
2946- elif os.path.getsize(filename) == 0:
2947- raise EmptyFileError(f'Cannot open empty file: {filename=}.')
2948-
29492905 w = width
29502906 h = height
29512907 r = JM_rect_from_py(rect)
29522908 if not mupdf.fz_is_infinite_rect(r):
29532909 w = r.x1 - r.x0
29542910 h = r.y1 - r.y0
29552911
2912+ self._name = filename
2913+ self.stream = stream
2914+
29562915 if stream is not None:
2916+ if filename is not None and filetype is None:
2917+ # 2025-05-06: Use <filename> as the filetype. This is
2918+ # reversing precedence - we used to use <filename> if both
2919+ # were set.
2920+ filetype = filename
2921+ if isinstance(stream, (bytes, memoryview)):
2922+ pass
2923+ elif isinstance(stream, bytearray):
2924+ stream = bytes(stream)
2925+ elif isinstance(stream, io.BytesIO):
2926+ stream = stream.getvalue()
2927+ else:
2928+ raise TypeError(f"bad stream: {type(stream)=}.")
2929+ self.stream = stream
2930+
29572931 assert isinstance(stream, (bytes, memoryview))
29582932 if len(stream) == 0:
2933+ # MuPDF raise an exception for this but also generates
2934+ # warnings, which is not very helpful for us. So instead we
2935+ # raise a specific exception.
29592936 raise EmptyFileError('Cannot open empty stream.')
2960- c = stream
2961- #len = (size_t) PyBytes_Size(stream);
2962-
2963- if mupdf_cppyy:
2964- buffer_ = mupdf.fz_new_buffer_from_copied_data(c)
2965- data = mupdf.fz_open_buffer(buffer_)
2966- else:
2967- # Pass raw bytes data to mupdf.fz_open_memory(). This assumes
2968- # that the bytes string will not be modified; i think the
2969- # original PyMuPDF code makes the same assumption. Presumably
2970- # setting self.stream above ensures that the bytes will not be
2971- # garbage collected?
2972- data = mupdf.fz_open_memory(mupdf.python_buffer_data(c), len(c))
2973- magic = filename
2974- if not magic:
2975- magic = filetype
2976- # fixme: pymupdf does:
2977- # handler = fz_recognize_document(gctx, filetype);
2978- # if (!handler) raise ValueError( MSG_BAD_FILETYPE)
2979- # but prefer to leave fz_open_document_with_stream() to raise.
2937+
2938+ stream2 = mupdf.fz_open_memory(mupdf.python_buffer_data(stream), len(stream))
29802939 try:
2981- doc = mupdf.fz_open_document_with_stream(magic, data )
2940+ doc = mupdf.fz_open_document_with_stream(filetype if filetype else '', stream2 )
29822941 except Exception as e:
29832942 if g_exceptions_verbose > 1: exception_info()
29842943 raise FileDataError('Failed to open stream') from e
2985- else:
2986- if filename:
2987- if not filetype:
2988- try:
2989- doc = mupdf.fz_open_document(filename)
2990- except Exception as e:
2991- if g_exceptions_verbose > 1: exception_info()
2992- raise FileDataError(f'Failed to open file {filename!r}.') from e
2993- else:
2994- handler = mupdf.ll_fz_recognize_document(filetype)
2995- if handler:
2996- if handler.open:
2997- #log( f'{handler.open=}')
2998- #log( f'{dir(handler.open)=}')
2999- try:
3000- stream = mupdf.FzStream(filename)
3001- accel = mupdf.FzStream()
3002- archive = mupdf.FzArchive(None)
3003- if mupdf_version_tuple >= (1, 24, 8):
3004- doc = mupdf.ll_fz_document_handler_open(
3005- handler,
3006- stream.m_internal,
3007- accel.m_internal,
3008- archive.m_internal,
3009- None, # recognize_state
3010- )
3011- else:
3012- doc = mupdf.ll_fz_document_open_fn_call(
3013- handler.open,
3014- stream.m_internal,
3015- accel.m_internal,
3016- archive.m_internal,
3017- )
3018- except Exception as e:
3019- if g_exceptions_verbose > 1: exception_info()
3020- raise FileDataError(f'Failed to open file {filename!r} as type {filetype!r}.') from e
3021- doc = mupdf.FzDocument( doc)
3022- else:
3023- assert 0
3024- else:
3025- raise ValueError( MSG_BAD_FILETYPE)
2944+
2945+ elif filename:
2946+ assert not stream
2947+ if isinstance(filename, str):
2948+ pass
2949+ elif hasattr(filename, "absolute"):
2950+ filename = str(filename)
2951+ elif hasattr(filename, "name"):
2952+ filename = filename.name
30262953 else:
3027- pdf = mupdf.PdfDocument()
3028- doc = mupdf.FzDocument(pdf)
2954+ raise TypeError(f"bad filename: {type(filename)=} {filename=}.")
2955+ self._name = filename
2956+
2957+ # Generate our own specific exceptions. This avoids MuPDF
2958+ # generating warnings etc.
2959+ if not os.path.exists(filename):
2960+ raise FileNotFoundError(f"no such file: '{filename}'")
2961+ elif not os.path.isfile(filename):
2962+ raise FileDataError(f"'{filename}' is no file")
2963+ elif os.path.getsize(filename) == 0:
2964+ raise EmptyFileError(f'Cannot open empty file: {filename=}.')
2965+
2966+ if filetype:
2967+ # Override the type implied by <filename>. MuPDF does not
2968+ # have a way to do this directly so we open via a stream.
2969+ try:
2970+ fz_stream = mupdf.fz_open_file(filename)
2971+ doc = mupdf.fz_open_document_with_stream(filetype, fz_stream)
2972+ except Exception as e:
2973+ if g_exceptions_verbose > 1: exception_info()
2974+ raise FileDataError(f'Failed to open file {filename!r} as type {filetype!r}.') from e
2975+ else:
2976+ try:
2977+ doc = mupdf.fz_open_document(filename)
2978+ except Exception as e:
2979+ if g_exceptions_verbose > 1: exception_info()
2980+ raise FileDataError(f'Failed to open file {filename!r}.') from e
2981+
2982+ else:
2983+ pdf = mupdf.PdfDocument()
2984+ doc = mupdf.FzDocument(pdf)
2985+
30292986 if w > 0 and h > 0:
30302987 mupdf.fz_layout_document(doc, w, h, fontsize)
30312988 elif mupdf.fz_is_document_reflowable(doc):
30322989 mupdf.fz_layout_document(doc, 400, 600, 11)
3033- this = doc
30342990
3035- self.this = this
2991+ self.this = doc
30362992
30372993 # fixme: not sure where self.thisown gets initialised in PyMuPDF.
30382994 #
0 commit comments