Docling fails to convert docx document

### Bug
Even when I have set OCR=False, it is failing asking to install VLM model

`def _initialize_converter(self) -> None:
        """Initialize the document converter with appropriate settings."""
        if self.optimize_pdf:
            pdf_pipeline_options = PdfPipelineOptions(ocr_options=EasyOcrOptions())
            pdf_pipeline_options.do_ocr = self.enable_ocr
            pdf_pipeline_options.do_table_structure = True
            pdf_pipeline_options.table_structure_options.do_cell_matching = True
            pdf_pipeline_options.do_picture_description = False
            pdf_pipeline_options.picture_description_options = None

            # Use GPU only when OCR is enabled, otherwise use CPU
            device = AcceleratorDevice.AUTO if self.enable_ocr else AcceleratorDevice.CPU
            cuda_flash = self.enable_ocr

            pdf_pipeline_options.accelerator_options = AcceleratorOptions(
                num_threads=self.pdf_threads,
                device=device,
                cuda_use_flash_attention2=cuda_flash
            )
        else:
            pdf_pipeline_options = PdfPipelineOptions()
            pdf_pipeline_options.do_picture_description = False
            pdf_pipeline_options.do_ocr = False
            pdf_pipeline_options.do_table_structure = False

        # Create converter instance with separate PDF and image options
        self.converter = DoclingConverter(
            allowed_formats=[
                InputFormat.PDF,
                InputFormat.IMAGE,
                InputFormat.DOCX,
                InputFormat.HTML,
                InputFormat.PPTX,
                InputFormat.ASCIIDOC,
                InputFormat.CSV,
                InputFormat.MD,
                InputFormat.XLSX
            ],
            format_options={
                InputFormat.PDF: PdfFormatOption(
                    pipeline_cls=StandardPdfPipeline,
                    backend=PyPdfiumDocumentBackend,
                    pipeline_options=pdf_pipeline_options
                ),
                InputFormat.DOCX: WordFormatOption(
                    pipeline_cls=SimplePipeline
                ),
                InputFormat.IMAGE: ImageFormatOption(
                    pipeline_cls=StandardPdfPipeline,
                    backend=PyPdfiumDocumentBackend,
                    pipeline_options=pdf_pipeline_options
                ),

            },
        )

    @file_operation(error_code=ErrorCodes.DOCUMENT_CONVERSION_ERROR)
    def convert_to_markdown(self, input_path: Union[str, Path]) -> str:
        """
        Convert a single file to markdown format.

        Args:
            input_path: Path to the input file or URL

        Returns:
            Markdown content as string

        Raises:
            ValueError: If the file doesn't exist, has an unsupported extension, or conversion fails
        """
        # Handle .txt and .json files directly without using docling converter
        if self._is_text_file(input_path):
            content = self._read_text_file(input_path)
            if content is None:
                raise ValueError(f"Failed to read text file: {self._safe_log_path(input_path)}")
            return content

        # For all other file types, use the docling converter
        conv_result = self.converter.convert(input_path)

        # Get markdown content
        markdown_content = conv_result.document.export_to_markdown()

        if not markdown_content:
            raise ValueError(f"Conversion returned empty content for {self._safe_log_path(input_path)}")

        return markdown_content`


...

### Steps to reproduce
  File "/app/src/objects_core/utils.py", line 34, in wrapper

    return func(*args, **kwargs)

           ^^^^^^^^^^^^^^^^^^^^^

  File "/app/src/vector_store_processor/core/docling_converter.py", line 142, in convert_to_markdown

    conv_result = self.converter.convert(input_path)

                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "/usr/local/lib/python3.11/site-packages/pydantic/_internal/_validate_call.py", line 39, in wrapper_function

    return wrapper(*args, **kwargs)

           ^^^^^^^^^^^^^^^^^^^^^^^^

  File "/usr/local/lib/python3.11/site-packages/pydantic/_internal/_validate_call.py", line 136, in __call__

    res = self.__pydantic_validator__.validate_python(pydantic_core.ArgsKwargs(args, kwargs))

          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "/usr/local/lib/python3.11/site-packages/docling/document_converter.py", line 237, in convert

    return next(all_res)

           ^^^^^^^^^^^^^

  File "/usr/local/lib/python3.11/site-packages/docling/document_converter.py", line 260, in convert_all

    for conv_res in conv_res_iter:

  File "/usr/local/lib/python3.11/site-packages/docling/document_converter.py", line 332, in _convert

    for item in map(

  File "/usr/local/lib/python3.11/site-packages/docling/document_converter.py", line 379, in _process_document

    conv_res = self._execute_pipeline(in_doc, raises_on_error=raises_on_error)

               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "/usr/local/lib/python3.11/site-packages/docling/document_converter.py", line 400, in _execute_pipeline

    pipeline = self._get_pipeline(in_doc.format)

               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "/usr/local/lib/python3.11/site-packages/docling/document_converter.py", line 362, in _get_pipeline

    self.initialized_pipelines[cache_key] = pipeline_class(

                                            ^^^^^^^^^^^^^^^

  File "/usr/local/lib/python3.11/site-packages/docling/pipeline/simple_pipeline.py", line 24, in __init__

    super().__init__(pipeline_options)

  File "/usr/local/lib/python3.11/site-packages/docling/pipeline/base_pipeline.py", line 144, in __init__

    picture_description_model := self._get_picture_description_model(

                                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

  File "/usr/local/lib/python3.11/site-packages/docling/pipeline/base_pipeline.py", line 170, in _get_picture_description_model

    return factory.create_instance(

           ^^^^^^^^^^^^^^^^^^^^^^^^

  File "/usr/local/lib/python3.11/site-packages/docling/models/factories/base_factory.py", line 59, in create_instance

    raise RuntimeError(self._err_msg_on_class_not_found(options.kind))

RuntimeError: No class found with the name 'vlm', known classes are:




same error
...

### Docling version
2.58.0
...

### Python version
3.11.9
...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Docling fails to convert docx document #2515

Bug

Steps to reproduce

Docling version

Python version

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Docling fails to convert docx document #2515

Description

Bug

Steps to reproduce

Docling version

Python version

Metadata

Metadata

Assignees

Labels

Type

Fields

Projects

Milestone

Relationships

Development

Issue actions