@@ -86,9 +86,38 @@ class LibreOfficeFileConverter:
8686 "ppt" : frozenset (["pdf" , "pptx" , "odp" , "html" , "png" , "jpg" ]),
8787 "odp" : frozenset (["pdf" , "pptx" , "ppt" , "html" , "png" , "jpg" ]),
8888 }
89+ """A non-exhaustive mapping of supported conversion types by this component.
90+ See https://help.libreoffice.org/latest/en-GB/text/shared/guide/convertfilters.html for more information."""
8991
90- def __init__ (self ) -> None :
91- """Check whether soffice is installed."""
92+ def __init__ (
93+ self ,
94+ output_file_type : Literal [
95+ "doc" ,
96+ "docx" ,
97+ "odt" ,
98+ "rtf" ,
99+ "txt" ,
100+ "html" ,
101+ "xlsx" ,
102+ "xls" ,
103+ "ods" ,
104+ "csv" ,
105+ "pptx" ,
106+ "ppt" ,
107+ "odp" ,
108+ "epub" ,
109+ "png" ,
110+ "jpg" ,
111+ ]
112+ | None = None ,
113+ ) -> None :
114+ """
115+ Check whether soffice is installed.
116+
117+ :param output_file_type:
118+ Target file format to convert to. Must be a valid conversion target for
119+ each source's input type — see :attr:`SUPPORTED_TYPES` for the full mapping.
120+ """
92121 soffice_path = shutil .which ("soffice" )
93122 if soffice_path is None :
94123 msg = """LibreOffice (soffice) is required but not installed or not in PATH.
@@ -97,6 +126,7 @@ def __init__(self) -> None:
97126 raise FileNotFoundError (msg )
98127
99128 self .soffice_path = soffice_path
129+ self .output_file_type = output_file_type
100130
101131 def to_dict (self ) -> dict [str , Any ]:
102132 """
@@ -127,12 +157,12 @@ def _get_conversion_args(
127157
128158 :param source: Source file path.
129159 :param output_directory: Output directory to save converted files to.
130- :param output_file_type: Target file format extension (e.g. `` "pdf"` `).
131- :returns: Tuple of `` (output_path, soffice_args)`` where `` output_path` ` is the
132- expected path of the converted file and `` soffice_args` ` is the list of
133- arguments to pass to `` soffice` `.
134- :raises FileNotFoundError: If `` source` ` does not exist.
135- :raises OSError: If `` output_directory` ` does not exist or is not writable.
160+ :param output_file_type: Target file format extension (e.g. `"pdf"`).
161+ :returns: Tuple of `(output_path, soffice_args)` where `output_path` is the
162+ expected path of the converted file and `soffice_args` is the list of
163+ arguments to pass to `soffice`.
164+ :raises FileNotFoundError: If `source` does not exist.
165+ :raises OSError: If `output_directory` does not exist or is not writable.
136166 """
137167 source_path = Path (source )
138168 output_path = Path (output_directory )
@@ -164,12 +194,12 @@ def _validate_args(self, output_file_type: str, input_file_type: str | None = No
164194
165195 :param output_file_type: Target file format extension to convert to.
166196 :param input_file_type: Source file format extension. If provided, validates that
167- it is a supported input type and that `` output_file_type` ` is a valid conversion
197+ it is a supported input type and that `output_file_type` is a valid conversion
168198 target for it.
169- :raises ValueError: If `` input_file_type` ` is not in :attr:`SUPPORTED_TYPES`, or if
170- `` output_file_type`` is not a valid conversion target for the given `` input_file_type` `.
199+ :raises ValueError: If `input_file_type` is not in :attr:`SUPPORTED_TYPES`, or if
200+ `output_file_type` is not a valid conversion target for the given `input_file_type`.
171201 """
172- # Cannot validate conversion types if input conversions is not known - i.e., source is `` ByteStream` `
202+ # Cannot validate conversion types if input conversions is not known - i.e., source is `ByteStream`
173203 if input_file_type is None :
174204 return
175205
@@ -206,28 +236,36 @@ def run(
206236 "epub" ,
207237 "png" ,
208238 "jpg" ,
209- ],
239+ ]
240+ | None = None ,
210241 ) -> LibreOfficeFileConverterOutput :
211242 """
212243 Convert office files to the specified output format using LibreOffice.
213244
214245 :param sources:
215- List of sources to convert. Each source can be a file path (`` str` ` or
216- `` Path`` ) or a `` ByteStream`` . For `` ByteStream` ` sources, the input file
217- type cannot be inferred from the filename, so only `` output_file_type` ` is
246+ List of sources to convert. Each source can be a file path (`str` or
247+ `Path`) or a `ByteStream`. For `ByteStream` sources, the input file
248+ type cannot be inferred from the filename, so only `output_file_type` is
218249 validated (not the source type).
219250 :param output_file_type:
220251 Target file format to convert to. Must be a valid conversion target for
221252 each source's input type — see :attr:`SUPPORTED_TYPES` for the full mapping.
253+ If set, it will override the `output_file_type` parameter provided during initialization.
222254 :returns:
223255 A dictionary with the following key:
224- - `` output`` : List of `` ByteStream` ` objects containing the converted file
225- data, in the same order as `` sources` `.
256+ - `output`: List of `ByteStream` objects containing the converted file
257+ data, in the same order as `sources`.
226258 :raises FileNotFoundError: If a source file path does not exist.
227259 :raises OSError: If the internal temporary output directory is not writable.
228260 :raises ValueError: If a source's file type is not in :attr:`SUPPORTED_TYPES`,
229- or if ``output_file_type`` is not a valid conversion target for it.
261+ or if `output_file_type` is not a valid conversion target for it,
262+ or if `output_file_type` has not been provided anywhere.
230263 """
264+ if output_file_type is None and self .output_file_type is None :
265+ msg = "output_file_type must be provided either during initialization or for this method"
266+ raise ValueError (msg )
267+ output_file_type = output_file_type or self .output_file_type
268+
231269 outputs : list [ByteStream ] = []
232270 with TemporaryDirectory () as tmpdir :
233271 for source in sources :
@@ -272,30 +310,38 @@ async def run_async(
272310 "epub" ,
273311 "png" ,
274312 "jpg" ,
275- ],
313+ ]
314+ | None = None ,
276315 ) -> LibreOfficeFileConverterOutput :
277316 """
278317 Asynchronously convert office files to the specified output format using LibreOffice.
279318
280319 This is the asynchronous version of the `run` method with the same parameters and return values.
281320
282321 :param sources:
283- List of sources to convert. Each source can be a file path (`` str` ` or
284- `` Path`` ) or a `` ByteStream`` . For `` ByteStream` ` sources, the input file
285- type cannot be inferred from the filename, so only `` output_file_type` ` is
322+ List of sources to convert. Each source can be a file path (`str` or
323+ `Path`) or a `ByteStream`. For `ByteStream` sources, the input file
324+ type cannot be inferred from the filename, so only `output_file_type` is
286325 validated (not the source type).
287326 :param output_file_type:
288327 Target file format to convert to. Must be a valid conversion target for
289328 each source's input type — see :attr:`SUPPORTED_TYPES` for the full mapping.
329+ If set, it will override the `output_file_type` parameter provided during initialization.
290330 :returns:
291331 A dictionary with the following key:
292- - `` output`` : List of `` ByteStream` ` objects containing the converted file
293- data, in the same order as `` sources` `.
332+ - `output`: List of `ByteStream` objects containing the converted file
333+ data, in the same order as `sources`.
294334 :raises FileNotFoundError: If a source file path does not exist.
295335 :raises OSError: If the internal temporary output directory is not writable.
296336 :raises ValueError: If a source's file type is not in :attr:`SUPPORTED_TYPES`,
297- or if ``output_file_type`` is not a valid conversion target for it.
337+ or if `output_file_type` is not a valid conversion target for it,
338+ or if `output_file_type` has not been provided anywhere.
298339 """
340+ if output_file_type is None and self .output_file_type is None :
341+ msg = "output_file_type must be provided either during initialization or for this method"
342+ raise ValueError (msg )
343+ output_file_type = output_file_type or self .output_file_type
344+
299345 outputs : list [ByteStream ] = []
300346 with TemporaryDirectory () as tmpdir :
301347 for source in sources :
0 commit comments