Skip to content

Commit 5820553

Browse files
authored
[DataProcessor] Refactor and unify text/multimodal processor pipeline (#7747)
* first commit * fix unit test * fix pre-commit * fix jinja * fix unit test * update * fix hash * fix * add unit test * update covered * add unit test * fix review * fix review * fix unit test * update
1 parent f202626 commit 5820553

26 files changed

Lines changed: 6509 additions & 39 deletions

fastdeploy/entrypoints/chat_utils.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,11 +199,10 @@ def parse_chat_messages(messages: List[ChatCompletionMessageParam]):
199199
role = message["role"]
200200
content = message["content"]
201201

202-
parsed_content = []
203202
if content is None:
204-
parsed_content = []
203+
parsed_content = content
205204
elif isinstance(content, str):
206-
parsed_content = [{"type": "text", "text": content}]
205+
parsed_content = content
207206
else:
208207
parsed_content = [parse_content_part(mm_parser, part) for part in content]
209208

fastdeploy/entrypoints/llm.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ def chat(
276276
raise RuntimeError(f"Failed to validate 'tools' parameter in chat method: {e}") from e
277277

278278
req_ids = self._add_request(
279-
prompts=[{"messages": msg} for msg in messages],
279+
prompts=messages,
280280
sampling_params=sampling_params,
281281
chat_template_kwargs=chat_template_kwargs,
282282
chat_template=chat_template,
@@ -326,11 +326,21 @@ def _add_request(
326326
"prompt": prompts[i],
327327
"request_id": request_id,
328328
}
329+
elif isinstance(prompts[i], list) and len(prompts[i]) == 0:
330+
raise ValueError(
331+
f"prompts[{i}] is an empty list. Expected a non-empty list of int (prompt_token_ids) "
332+
"or a non-empty list of dict (messages)."
333+
)
329334
elif isinstance(prompts[i], list) and isinstance(prompts[i][0], int):
330335
tasks = {
331336
"prompt_token_ids": prompts[i],
332337
"request_id": request_id,
333338
}
339+
elif isinstance(prompts[i], list) and isinstance(prompts[i][0], dict):
340+
tasks = {
341+
"messages": prompts[i],
342+
"request_id": request_id,
343+
}
334344
elif isinstance(prompts[i], dict):
335345
tasks = prompts[i]
336346
tasks["request_id"] = request_id
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Multimodal processors for FastDeploy."""
16+
17+
from fastdeploy.input.multimodal.ernie4_5_vl import Ernie4_5VLProcessor
18+
from fastdeploy.input.multimodal.mm_processor import MMProcessor
19+
from fastdeploy.input.multimodal.paddleocr_vl import PaddleOCRVLProcessor
20+
from fastdeploy.input.multimodal.qwen3_vl import Qwen3VLProcessor
21+
from fastdeploy.input.multimodal.qwen_vl import QwenVLProcessor
22+
23+
__all__ = [
24+
"MMProcessor",
25+
"QwenVLProcessor",
26+
"Qwen3VLProcessor",
27+
"Ernie4_5VLProcessor",
28+
"PaddleOCRVLProcessor",
29+
]
Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Shared image utility functions for all VL image processors."""
16+
17+
import math
18+
19+
import numpy as np
20+
21+
from fastdeploy.utils import data_processor_logger
22+
23+
__all__ = [
24+
"round_by_factor",
25+
"ceil_by_factor",
26+
"floor_by_factor",
27+
"is_scaled_image",
28+
"smart_resize",
29+
"smart_resize_qwen",
30+
"smart_resize_paddleocr",
31+
]
32+
33+
34+
def round_by_factor(number: int, factor: int) -> int:
35+
"""Returns the closest integer to 'number' that is divisible by 'factor'."""
36+
return round(number / factor) * factor
37+
38+
39+
def ceil_by_factor(number: int, factor: int) -> int:
40+
"""Returns the smallest integer >= 'number' that is divisible by 'factor'."""
41+
return math.ceil(number / factor) * factor
42+
43+
44+
def floor_by_factor(number: int, factor: int) -> int:
45+
"""Returns the largest integer <= 'number' that is divisible by 'factor'."""
46+
return math.floor(number / factor) * factor
47+
48+
49+
def is_scaled_image(image: np.ndarray) -> bool:
50+
"""Check if image pixel values are already normalized to [0, 1] range."""
51+
if image.dtype == np.uint8:
52+
return False
53+
return np.min(image) >= 0 and np.max(image) <= 1
54+
55+
56+
def smart_resize_qwen(
57+
height: int,
58+
width: int,
59+
factor: int,
60+
min_pixels: int,
61+
max_pixels: int,
62+
max_ratio: int = 200,
63+
) -> tuple:
64+
"""Smart image resizing for ERNIE / Qwen2.5 / Qwen3 models."""
65+
if max(height, width) / min(height, width) > max_ratio:
66+
if height > width:
67+
new_width = max(factor, round_by_factor(width, factor))
68+
new_height = floor_by_factor(new_width * max_ratio, factor)
69+
else:
70+
new_height = max(factor, round_by_factor(height, factor))
71+
new_width = floor_by_factor(new_height * max_ratio, factor)
72+
73+
data_processor_logger.info(
74+
f"absolute aspect ratio must be smaller than {max_ratio}, "
75+
f"got {max(height, width) / min(height, width)}, "
76+
f"resize to {max(new_height, new_width) / min(new_height, new_width)}"
77+
)
78+
height = new_height
79+
width = new_width
80+
81+
h_bar = max(factor, round_by_factor(height, factor))
82+
w_bar = max(factor, round_by_factor(width, factor))
83+
if h_bar * w_bar > max_pixels:
84+
beta = math.sqrt((height * width) / max_pixels)
85+
h_bar = floor_by_factor(height / beta, factor)
86+
w_bar = floor_by_factor(width / beta, factor)
87+
elif h_bar * w_bar < min_pixels:
88+
beta = math.sqrt(min_pixels / (height * width))
89+
h_bar = ceil_by_factor(height * beta, factor)
90+
w_bar = ceil_by_factor(width * beta, factor)
91+
92+
if min_pixels > h_bar * w_bar or h_bar * w_bar > max_pixels:
93+
raise ValueError(f"encounter invalid h_bar: {h_bar}, w_bar: {w_bar}")
94+
95+
return h_bar, w_bar
96+
97+
98+
def smart_resize_paddleocr(
99+
height: int,
100+
width: int,
101+
factor: int = 28,
102+
min_pixels: int = 28 * 28 * 130,
103+
max_pixels: int = 28 * 28 * 1280,
104+
) -> tuple:
105+
"""Smart image resizing for PaddleOCR-VL model."""
106+
if height < factor:
107+
data_processor_logger.debug(f"smart_resize_paddleocr: height={height} < factor={factor}, reset height=factor")
108+
width = round((width * factor) / height)
109+
height = factor
110+
111+
if width < factor:
112+
data_processor_logger.debug(f"smart_resize_paddleocr: width={width} < factor={factor}, reset width=factor")
113+
height = round((height * factor) / width)
114+
width = factor
115+
116+
if max(height, width) / min(height, width) > 200:
117+
raise ValueError(
118+
f"absolute aspect ratio must be smaller than 200, " f"got {max(height, width) / min(height, width)}"
119+
)
120+
121+
h_bar = round(height / factor) * factor
122+
w_bar = round(width / factor) * factor
123+
if h_bar * w_bar > max_pixels:
124+
beta = math.sqrt((height * width) / max_pixels)
125+
h_bar = math.floor(height / beta / factor) * factor
126+
w_bar = math.floor(width / beta / factor) * factor
127+
elif h_bar * w_bar < min_pixels:
128+
beta = math.sqrt(min_pixels / (height * width))
129+
h_bar = math.ceil(height * beta / factor) * factor
130+
w_bar = math.ceil(width * beta / factor) * factor
131+
132+
return h_bar, w_bar
133+
134+
135+
def smart_resize(
136+
height: int,
137+
width: int,
138+
factor: int,
139+
min_pixels: int,
140+
max_pixels: int,
141+
max_ratio: int = 200,
142+
variant: str = "qwen",
143+
) -> tuple:
144+
"""Unified smart_resize dispatcher."""
145+
if variant == "paddleocr":
146+
return smart_resize_paddleocr(height, width, factor, min_pixels, max_pixels)
147+
return smart_resize_qwen(height, width, factor, min_pixels, max_pixels, max_ratio)

0 commit comments

Comments
 (0)