Skip to content

Commit f161fea

Browse files
[Models] add fleet model fallback 2 (#7964)
* fd fallback fleet model clean commit * remove test * fix coverge not right * recover review * add test --------- Co-authored-by: kebo01 <kebo01@baidu.com>
1 parent 3fe8f7c commit f161fea

13 files changed

Lines changed: 2320 additions & 12 deletions

File tree

fastdeploy/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@
6060
_ResolvedTask = Literal["generate", "encode", "embed"]
6161

6262
# Model implementation backend options
63-
ModelImpl = Literal["auto", "fastdeploy", "paddleformers"]
63+
ModelImpl = Literal["auto", "fastdeploy", "paddleformers", "paddlefleet"]
6464

6565
_RUNNER_CONVERTS: dict[RunnerType, list[ConvertType]] = {
6666
"generate": [],

fastdeploy/engine/args_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ class EngineArgs:
167167
'auto': Use native FastDeploy implementation when available, fallback to PaddleFormers.
168168
'fastdeploy': Use only native FastDeploy implementations.
169169
'paddleformers': Use PaddleFormers backend with FastDeploy optimizations.
170+
'paddlefleet': Use PaddleFleet backend with FastDeploy optimizations.
170171
"""
171172
override_pooler_config: Optional[Union[dict, PoolerConfig]] = None
172173
"""
@@ -678,7 +679,7 @@ def __post_init__(self):
678679
"kvcache_storage_backend is only supported when ENABLE_V1_KVCACHE_SCHEDULER=1"
679680
)
680681

681-
valid_model_impls = ["auto", "fastdeploy", "paddleformers"]
682+
valid_model_impls = ["auto", "fastdeploy", "paddleformers", "paddlefleet"]
682683
if self.model_impl not in valid_model_impls:
683684
raise NotImplementedError(
684685
f"not support model_impl: '{self.model_impl}'. " f"Must be one of: {', '.join(valid_model_impls)}"
@@ -1063,13 +1064,14 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
10631064
model_group.add_argument(
10641065
"--model-impl",
10651066
type=str,
1066-
choices=["auto", "fastdeploy", "paddleformers"],
1067+
choices=["auto", "fastdeploy", "paddleformers", "paddlefleet"],
10671068
default=EngineArgs.model_impl,
10681069
help=(
10691070
"Model implementation backend. "
10701071
"'auto': Use native FastDeploy when available, fallback to PaddleFormers. "
10711072
"'fastdeploy': Use only native FastDeploy implementations. "
10721073
"'paddleformers': Use PaddleFormers backend with FastDeploy optimizations."
1074+
"'paddlefleet': Use PaddleFleet backend with FastDeploy optimizations."
10731075
),
10741076
)
10751077

fastdeploy/model_executor/graph_optimization/decorator.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,12 @@ def __init__(self, fd_config: FDConfig, **kwargs):
6060
# Not use graph optimization
6161
return
6262

63-
def __call__(self, **kwargs):
63+
def __call__(self, *args, **kwargs):
6464
"""Decorator model.__call__() func"""
6565
if not self.use_graph_opt:
66-
return self.forward(**kwargs)
66+
return self.forward(*args, **kwargs)
6767

68-
return self.graph_opt_backend(**kwargs)
68+
return self.graph_opt_backend(*args, **kwargs)
6969

7070
cls.__init__ = __init__
7171
cls.__call__ = __call__

fastdeploy/model_executor/models/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import os
2020
from pathlib import Path
2121

22-
from paddleformers.transformers import PretrainedModel
22+
from paddleformers.transformers.model_utils import PretrainedModel
2323

2424
from fastdeploy.plugins.model_register import load_model_register_plugins
2525

fastdeploy/model_executor/models/model_base.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,19 @@ def _try_resolve_paddleformers(
194194
elif model_impl == "auto" and is_fallback:
195195
# Auto mode fallback when no native implementation exists
196196
backend_arch = "PaddleFormersForCausalLM"
197+
elif model_impl == "paddlefleet":
198+
from fastdeploy.model_executor.utils import is_paddlefleet_available
199+
200+
if is_paddlefleet_available():
201+
backend_arch = "PaddleFleetForCausalLM"
202+
else:
203+
raise ImportError(
204+
"paddlefleet backend requires paddlefleet to be installed.\n"
205+
"Please install with [change cuda version if needed ]:\n"
206+
"python -m pip install paddlefleet==0.3.0.dev20260527 "
207+
"--extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/ "
208+
"--extra-index-url https://www.paddlepaddle.org.cn/packages/nightly/cu126/"
209+
)
197210
elif model_impl == "fastdeploy":
198211
return None
199212
else:

fastdeploy/model_executor/models/paddleformers/__init__.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
ModelForCasualLM,
2020
ModelRegistry,
2121
)
22+
from fastdeploy.model_executor.utils import is_paddlefleet_available
2223

2324
from .base import PaddleFormersModelBase
2425
from .causallm import CausalLMMixin
@@ -38,3 +39,19 @@ class PaddleFormersForCausalLM(CausalLMMixin, PaddleFormersModelBase, ModelForCa
3839
@classmethod
3940
def name(cls):
4041
return "PaddleFormersForCausalLM"
42+
43+
44+
if is_paddlefleet_available():
45+
from .base_fleet import PaddleFleetModelBase
46+
47+
__all__ += ["PaddleFleetForCausalLM"]
48+
49+
@ModelRegistry.register_model_class(
50+
architecture="PaddleFleetForCausalLM",
51+
module_name="paddleformers",
52+
category=ModelCategory.TEXT_GENERATION,
53+
)
54+
class PaddleFleetForCausalLM(PaddleFleetModelBase, ModelForCasualLM):
55+
@classmethod
56+
def name(cls):
57+
return "PaddleFleetForCausalLM"

0 commit comments

Comments
 (0)