Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docling/datamodel/pipeline_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@
from docling.datamodel.vlm_model_specs import (
GRANITE_VISION_OLLAMA as granite_vision_vlm_ollama_conversion_options,
GRANITE_VISION_TRANSFORMERS as granite_vision_vlm_conversion_options,
GRANITEDOCLING as granite_docling_vlm_conversion_options,
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these imports are deprecated, we should not add them.

NU_EXTRACT_2B_TRANSFORMERS,
SMOLDOCLING as smoldocling_vlm_auto_conversion_options,
SMOLDOCLING_MLX as smoldocling_vlm_mlx_conversion_options,
SMOLDOCLING_TRANSFORMERS as smoldocling_vlm_conversion_options,
VlmModelType,
Expand Down
56 changes: 56 additions & 0 deletions docling/datamodel/vlm_model_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,62 @@
)


def _has_apple_silicon_mlx() -> bool:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new stage inference model should take care of automatic MLX choice when available. Where would these extra methods be needed?

"""Return True if MPS is available and mlx-vlm is installed."""
try:
import torch

has_mps = torch.backends.mps.is_built() and torch.backends.mps.is_available()
except ImportError:
has_mps = False

if not has_mps:
return False

try:
import mlx_vlm # type: ignore

return True
except ImportError:
return False


def _get_granitedocling_model():
"""Get the best GraniteDocling variant for the current hardware.

Automatically selects MLX variant on Apple Silicon if mlx-vlm is installed,
otherwise falls back to Transformers variant.
"""
if _has_apple_silicon_mlx():
_log.debug("Auto-selected GraniteDocling MLX variant (Apple Silicon)")
return GRANITEDOCLING_MLX
else:
_log.debug("Auto-selected GraniteDocling Transformers variant")
return GRANITEDOCLING_TRANSFORMERS


# Auto-selecting: picks MLX on Apple Silicon, Transformers otherwise
GRANITEDOCLING = _get_granitedocling_model()


def _get_smoldocling_model():
"""Get the best SmolDocling variant for the current hardware.

Automatically selects MLX variant on Apple Silicon if mlx-vlm is installed,
otherwise falls back to Transformers variant.
"""
if _has_apple_silicon_mlx():
_log.debug("Auto-selected SmolDocling MLX variant (Apple Silicon)")
return SMOLDOCLING_MLX
else:
_log.debug("Auto-selected SmolDocling Transformers variant")
return SMOLDOCLING_TRANSFORMERS


# Auto-selecting: picks MLX on Apple Silicon, Transformers otherwise
SMOLDOCLING = _get_smoldocling_model()


class VlmModelType(str, Enum):
SMOLDOCLING = "smoldocling"
SMOLDOCLING_VLLM = "smoldocling_vllm"
Expand Down
15 changes: 10 additions & 5 deletions docling/models/stages/table_structure/table_structure_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,16 @@ def __init__(
TFPredictor,
)

device = decide_device(accelerator_options.device)

# Disable MPS here, until we know why it makes things slower.
if device == AcceleratorDevice.MPS.value:
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was a clear choice because of performance issues. Is this now resolved?

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can tell that I also achieved a 30% reduction in execution time for converting both a 10-page paper and a 100-page report to markdown by just re-enabling MPS in TableFormerV1 on my M3 Pro MacBook.

device = AcceleratorDevice.CPU.value
device = decide_device(
accelerator_options.device,
supported_devices=[
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
AcceleratorDevice.MPS,
AcceleratorDevice.XPU,
],
)
_log.debug(f"TableStructureModel using device: {device}")

self.tm_config = c.read_config(f"{artifacts_path}/tm_config.json")
self.tm_config["model"]["save_dir"] = artifacts_path
Expand Down
13 changes: 10 additions & 3 deletions docling/models/stages/table_structure/table_structure_model_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,16 @@ def __init__(
model_path = artifacts_path

# Determine device
device = decide_device(accelerator_options.device)
if device == AcceleratorDevice.MPS.value:
device = AcceleratorDevice.CPU.value
device = decide_device(
accelerator_options.device,
supported_devices=[
AcceleratorDevice.CPU,
AcceleratorDevice.CUDA,
AcceleratorDevice.MPS,
AcceleratorDevice.XPU,
],
)
_log.debug(f"TableStructureModelV2 using device: {device}")
self.device = device

# Set number of threads for CPU inference
Expand Down
6 changes: 2 additions & 4 deletions docs/usage/model_catalog.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,8 @@ The following table shows all processing stages in Docling, their model families

| Model | Inference Engine | Supported Devices |
|-------|------------------|-------------------|
| TableFormer (fast) | docling-ibm-models | CPU, CUDA, XPU |
| TableFormer (accurate) | docling-ibm-models | CPU, CUDA, XPU |

**Note:** MPS is currently disabled for TableFormer due to performance issues.
| TableFormer (fast) | docling-ibm-models | CPU, CUDA, MPS, XPU |
| TableFormer (accurate) | docling-ibm-models | CPU, CUDA, MPS, XPU |

### Image Classifier (Picture Classifier)

Expand Down
2 changes: 2 additions & 0 deletions docs/usage/vision_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,10 @@ The following table reports the models currently available out-of-the-box.

| Model instance | Model | Framework | Device | Num pages | Inference time (sec) |
| ---------------|------ | --------- | ------ | --------- | ---------------------|
| `vlm_model_specs.GRANITEDOCLING` | Auto-selects MLX or Transformers | `Auto` | MPS | 1 | - |
| `vlm_model_specs.GRANITEDOCLING_TRANSFORMERS` | [ibm-granite/granite-docling-258M](https://huggingface.co/ibm-granite/granite-docling-258M) | `Transformers/AutoModelForVision2Seq` | MPS | 1 | - |
| `vlm_model_specs.GRANITEDOCLING_MLX` | [ibm-granite/granite-docling-258M-mlx-bf16](https://huggingface.co/ibm-granite/granite-docling-258M-mlx-bf16) | `MLX`| MPS | 1 | - |
| `vlm_model_specs.SMOLDOCLING` | Auto-selects MLX or Transformers | `Auto` | MPS | 1 | - |
| `vlm_model_specs.SMOLDOCLING_TRANSFORMERS` | [ds4sd/SmolDocling-256M-preview](https://huggingface.co/ds4sd/SmolDocling-256M-preview) | `Transformers/AutoModelForVision2Seq` | MPS | 1 | 102.212 |
| `vlm_model_specs.SMOLDOCLING_MLX` | [ds4sd/SmolDocling-256M-preview-mlx-bf16](https://huggingface.co/ds4sd/SmolDocling-256M-preview-mlx-bf16) | `MLX`| MPS | 1 | 6.15453 |
| `vlm_model_specs.QWEN25_VL_3B_MLX` | [mlx-community/Qwen2.5-VL-3B-Instruct-bf16](https://huggingface.co/mlx-community/Qwen2.5-VL-3B-Instruct-bf16) | `MLX`| MPS | 1 | 23.4951 |
Expand Down
Loading
Loading