evalstate
diff --git a/‎docker/transformers-pytorch-amd-gpu/Dockerfile‎
Lines changed: 13 additions & 1 deletion b/‎docker/transformers-pytorch-amd-gpu/Dockerfile‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎docs/source/en/_toctree.yml‎
Lines changed: 4 additions & 0 deletions b/‎docs/source/en/_toctree.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/source/en/model_doc/glm_moe_dsa.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/en/model_doc/glm_moe_dsa.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/en/model_doc/hy_v3.md‎
Lines changed: 64 additions & 0 deletions b/‎docs/source/en/model_doc/hy_v3.md‎
Lines changed: 64 additions & 0 deletions
diff --git a/‎docs/source/en/model_doc/slanet.md‎
Lines changed: 80 additions & 0 deletions b/‎docs/source/en/model_doc/slanet.md‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎docs/source/en/serve-cli/serving.md‎
Lines changed: 81 additions & 0 deletions b/‎docs/source/en/serve-cli/serving.md‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎setup.py‎
Lines changed: 2 additions & 2 deletions b/‎setup.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/transformers/cli/serve.py‎
Lines changed: 8 additions & 0 deletions b/‎src/transformers/cli/serve.py‎
Lines changed: 8 additions & 0 deletions
@@ -4,13 +4,25 @@ LABEL maintainer="Hugging Face"
 ARG DEBIAN_FRONTEND=noninteractive
 
 RUN apt update && \
-    apt install -y --no-install-recommends git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-dev python3-pip python3-dev ffmpeg git-lfs && \
+    apt install -y --no-install-recommends git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-dev python3-pip python3-dev ffmpeg git-lfs libjpeg-turbo8-dev libpng-dev zlib1g-dev && \
     apt clean && \
     rm -rf /var/lib/apt/lists/*
 
 RUN git lfs install
 
 RUN python3 -m pip install --no-cache-dir --upgrade pip numpy importlib-metadata setuptools wheel ninja pytesseract "itsdangerous<2.1.0"
+
+# Rebuild torchvision so decode_image has libjpeg and ROCm image ops stay on GPU.
+RUN python3 -m pip install --no-cache-dir "setuptools<81" pybind11
+RUN TV_VERSION=$(python3 -c "import torchvision; print(torchvision.__version__.split('+')[0])") && \
+    python3 -m pip uninstall -y torchvision && \
+    git clone --depth 1 --branch "v${TV_VERSION}" https://github.com/pytorch/vision.git /tmp/vision && \
+    cd /tmp/vision && \
+    sed -i -E 's|list\(CSRS_DIR\.glob\("([^"]+\.cpp)"\)\)|[p for p in CSRS_DIR.glob("\1") if not p.name.endswith("_hip.cpp")]|g' setup.py && \
+    FORCE_CUDA=1 TORCHVISION_USE_FFMPEG=0 TORCHVISION_USE_VIDEO_CODEC=0 \
+    python3 -m pip install --no-cache-dir --no-build-isolation -v . && \
+    cd / && rm -rf /tmp/vision
+
 RUN python3 -m pip install --no-cache-dir --no-build-isolation git+https://github.com/facebookresearch/detectron2.git
 
 ARG REF=main
 
@@ -661,6 +661,8 @@
         title: HunYuanDenseV1
       - local: model_doc/hunyuan_v1_moe
         title: HunYuanMoEV1
+      - local: model_doc/hy_v3
+        title: HYV3
       - local: model_doc/ibert
         title: I-BERT
       - local: model_doc/jais2
@@ -1365,6 +1367,8 @@
         title: SigLIP
       - local: model_doc/siglip2
         title: SigLIP2
+      - local: model_doc/slanet
+        title: SLANet
       - local: model_doc/slanext
         title: SLANeXt
       - local: model_doc/smollm3
 
@@ -16,7 +16,7 @@ limitations under the License.
 ⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be rendered properly in your Markdown viewer.
 
 -->
-*This model was released on {release_date} and added to Hugging Face Transformers on 2026-02-08.*
+*This model was released on 2026-02-17 and added to Hugging Face Transformers on 2026-02-09.*
 
 <div style="float: right;">
     <div class="flex flex-wrap space-x-1">
 
@@ -0,0 +1,64 @@
+<!--Copyright 2026 THL A29 Limited, a Tencent company and The HuggingFace Inc. team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+*This model was released on {release_date} and added to Hugging Face Transformers on 2026-04-22.*
+
+# Hy3-preview
+
+## Overview
+
+Hy3-preview is a large-scale Mixture-of-Experts (MoE) language model developed by the Tencent HunYuan team. It features a dense-MoE hybrid architecture with 192 routed experts and 1 always-active shared expert per MoE layer, achieving strong performance with efficient inference via sparse expert activation.
+
+Key architectural features:
+
+- **Dense-MoE hybrid**: The first layer uses a dense FFN; all subsequent layers use MoE with top-k routing (default k=8).
+- **Shared experts**: Each MoE layer includes 1 shared expert that processes all tokens alongside the routed experts.
+- **Sigmoid routing with expert-bias correction**: Tokens are routed via sigmoid scoring (not softmax) with a learned per-expert bias for load balancing.
+- **QK-Norm**: Per-head RMSNorm applied to query and key projections before attention for improved training stability.
+
+## Usage tips
+
+- Load with `AutoModelForCausalLM`. The model requires multiple GPUs due to its size.
+- Set `output_router_logits=True` in the config or forward call to collect per-layer MoE router logits. Note that this model does not compute an auxiliary load-balancing loss; `aux_loss` is always `None`.
+- The model supports `gradient_checkpointing` to reduce memory during fine-tuning.
+
+```python
+from transformers import AutoTokenizer, AutoModelForCausalLM
+
+model_id = "tencent/Hy3-preview"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    device_map="auto",
+)
+
+inputs = tokenizer("The future of artificial intelligence is", return_tensors="pt").to(model.device)
+outputs = model.generate(**inputs, max_new_tokens=64)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+```
+
+## HYV3Config
+
+[[autodoc]] HYV3Config
+
+## HYV3Model
+
+[[autodoc]] HYV3Model
+    - forward
+
+## HYV3ForCausalLM
+
+[[autodoc]] HYV3ForCausalLM
+    - forward
@@ -0,0 +1,80 @@
+<!--Copyright 2026 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+*This model was released on 2025-03-07 and added to Hugging Face Transformers on 2026-04-22.*
+
+# SLANet
+
+<div class="flex flex-wrap space-x-1">
+<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
+</div>
+
+## Overview
+
+**SLANet** and **SLANet_plus** are part of a series of dedicated lightweight models for table structure recognition, focusing on accurately recognizing table structures in documents and natural scenes. For more details about the SLANet series model, please refer to the [official documentation](https://www.paddleocr.ai/latest/en/version3.x/module_usage/table_structure_recognition.html).
+
+## Model Architecture
+
+SLANet is a table structure recognition model developed by Baidu PaddlePaddle Vision Team. The model significantly improves the accuracy and inference speed of table structure recognition by adopting a CPU-friendly lightweight backbone network PP-LCNet, a high-low-level feature fusion module CSP-PAN, and a feature decoding module SLA Head that aligns structural and positional information.
+
+## Usage
+
+### Single input inference
+
+The example below demonstrates how to detect text with SLANet using the [`AutoModel`].
+
+<hfoptions id="usage">
+<hfoption id="AutoModel">
+
+```py
+from io import BytesIO
+
+import httpx
+from PIL import Image
+from transformers import AutoImageProcessor, AutoModelForTableRecognition
+
+model_path="PaddlePaddle/SLANet_plus_safetensors"
+model = AutoModelForTableRecognition.from_pretrained(model_path, device_map="auto")
+image_processor = AutoImageProcessor.from_pretrained(model_path)
+
+image = Image.open(BytesIO(httpx.get(image_url).content))
+inputs = image_processor(images=image, return_tensors="pt").to(model.device)
+outputs = model(**inputs)
+
+results = image_processor.post_process_table_recognition(outputs)
+
+print(result['structure'])
+print(result['structure_score'])
+```
+
+</hfoption>
+</hfoptions>
+
+## SLANetConfig
+
+[[autodoc]] SLANetConfig
+
+## SLANetForTableRecognition
+
+[[autodoc]] SLANetForTableRecognition
+
+## SLANetBackbone
+
+[[autodoc]] SLANetBackbone
+
+## SLANetSLAHead
+
+[[autodoc]] SLANetSLAHead
+
@@ -24,6 +24,7 @@ The `transformers serve` CLI is a lightweight option for local or self-hosted se
 The `transformers serve` command spawns a local server compatible with the [OpenAI SDK](https://platform.openai.com/docs/overview). The server works with many third-party applications and supports the REST APIs below.
 
 - `/v1/chat/completions` for text, image, audio, and video requests
+- `/v1/completions` for legacy text completions from a freeform prompt
 - `/v1/responses` supports the [Responses API](https://platform.openai.com/docs/api-reference/responses)
 - `/v1/audio/transcriptions` for audio transcriptions
 - `/v1/models` lists available models for third-party integrations
@@ -959,6 +960,86 @@ The follow-up question "How many people live there?" relies on the prior context
 As of 2021, the population of Paris is approximately 2.2 million people.
 ```
 
+## v1/completions
+
+The `v1/completions` API is based on the [legacy Completions API](https://platform.openai.com/docs/api-reference/completions). Unlike `/v1/chat/completions`, it takes a freeform text `prompt` instead of chat messages and returns generated text in `choices[].text`. This is useful for base (non-instruct) models and text completion tasks where a chat template is not needed. It also supports `suffix` for fill-in-the-middle text insertion.
+
+<hfoptions id="legacy-completion">
+<hfoption id="curl">
+
+```shell
+curl -X POST http://localhost:8000/v1/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "Qwen/Qwen2.5-0.5B",
+    "prompt": "The capital of France is",
+    "max_tokens": 20
+  }'
+```
+
+The command returns the following response.
+
+```json
+{
+  "id": "chatcmpl-abc123",
+  "object": "text_completion",
+  "created": 1234567890,
+  "model": "Qwen/Qwen2.5-0.5B@main",
+  "choices": [
+    {
+      "text": " Paris, and the capital of the United States is Washington, D.C.",
+      "index": 0,
+      "logprobs": null,
+      "finish_reason": "stop"
+    }
+  ],
+  "usage": {
+    "prompt_tokens": 5,
+    "completion_tokens": 16,
+    "total_tokens": 21
+  }
+}
+```
+
+</hfoption>
+<hfoption id="openai">
+
+```python
+from openai import OpenAI
+
+client = OpenAI(base_url="http://localhost:8000/v1", api_key="<random_string>")
+
+# Non-streaming
+completion = client.completions.create(
+    model="Qwen/Qwen2.5-0.5B",
+    prompt="The capital of France is",
+    max_tokens=20,
+)
+print(completion.choices[0].text)
+```
+
+The [OpenAI](https://platform.openai.com/docs/quickstart) client returns the following.
+
+```shell
+ Paris, and the capital of the United States is Washington, D.C.
+```
+
+Streaming is also supported.
+
+```python
+stream = client.completions.create(
+    model="Qwen/Qwen2.5-0.5B",
+    prompt="The capital of France is",
+    max_tokens=20,
+    stream=True,
+)
+for chunk in stream:
+    print(chunk.choices[0].text, end="")
+```
+
+</hfoption>
+</hfoptions>
+
 ## v1/responses
 
 The [Responses API](https://platform.openai.com/docs/api-reference/responses) is OpenAI's latest API endpoint for generation. It supports stateful interactions and integrates built-in tools to extend a model's capabilities. OpenAI [recommends](https://platform.openai.com/docs/guides/migrate-to-responses) using the Responses API over the Chat Completions API for new projects.
 
@@ -124,7 +124,7 @@
     "rjieba",
     "rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1",
     "ruff==0.14.10",
-    "transformers-mlinter @ git+https://github.com/huggingface/transformers-mlinter@b9d319ce264c106f97a959d926ef42bc3c0ea4d1",
+    "transformers-mlinter==0.1.0",
     "ty==0.0.20",
     # `sacrebleu` not used in `transformers`. However, it is needed in several tests, when a test calls
     # `evaluate.load("sacrebleu")`. This metric is used in the examples that we use to test the `Trainer` with, in the
@@ -295,7 +295,7 @@ def finalize_options(self):
         pass
 
     def run(self):
-        if SUPPORTED_PYTHON_VERSIONS[0] >= PYTHON_MINOR_VERSION:
+        if SUPPORTED_PYTHON_VERSIONS[0] > PYTHON_MINOR_VERSION:
             print(
                 f"Table updated only when running 3.{SUPPORTED_PYTHON_VERSIONS[0]}.x, detected version is {sys.version}."
             )
 
@@ -83,6 +83,7 @@ def __init__(
         import uvicorn
 
         from .serving.chat_completion import ChatCompletionHandler
+        from .serving.completion import CompletionHandler
         from .serving.model_manager import ModelManager
         from .serving.response import ResponseHandler
         from .serving.server import build_server
@@ -131,6 +132,11 @@ def __init__(
             generation_state=self._generation_state,
         )
 
+        self._completion_handler = CompletionHandler(
+            model_manager=self._model_manager,
+            generation_state=self._generation_state,
+        )
+
         self._response_handler = ResponseHandler(
             model_manager=self._model_manager,
             generation_state=self._generation_state,
@@ -141,6 +147,7 @@ def __init__(
         app = build_server(
             self._model_manager,
             self._chat_handler,
+            completion_handler=self._completion_handler,
             response_handler=self._response_handler,
             transcription_handler=self._transcription_handler,
             enable_cors=enable_cors,
@@ -183,6 +190,7 @@ def kill_server(self):
 \b
 Endpoints:
     POST /v1/chat/completions — Chat completions (streaming + non-streaming).
+    POST /v1/completions      — Legacy text completions from a prompt.
     GET  /v1/models           — Lists available models.
     GET  /health              — Health check.