Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion fastdeploy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
_ResolvedTask = Literal["generate", "encode", "embed"]

# Model implementation backend options
ModelImpl = Literal["auto", "fastdeploy", "paddleformers"]
ModelImpl = Literal["auto", "fastdeploy", "paddleformers", "paddlefleet"]

_RUNNER_CONVERTS: dict[RunnerType, list[ConvertType]] = {
"generate": [],
Expand Down
6 changes: 4 additions & 2 deletions fastdeploy/engine/args_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ class EngineArgs:
'auto': Use native FastDeploy implementation when available, fallback to PaddleFormers.
'fastdeploy': Use only native FastDeploy implementations.
'paddleformers': Use PaddleFormers backend with FastDeploy optimizations.
'paddlefleet': Use PaddleFleet backend with FastDeploy optimizations.
"""
override_pooler_config: Optional[Union[dict, PoolerConfig]] = None
"""
Expand Down Expand Up @@ -678,7 +679,7 @@ def __post_init__(self):
"kvcache_storage_backend is only supported when ENABLE_V1_KVCACHE_SCHEDULER=1"
)

valid_model_impls = ["auto", "fastdeploy", "paddleformers"]
valid_model_impls = ["auto", "fastdeploy", "paddleformers", "paddlefleet"]
if self.model_impl not in valid_model_impls:
raise NotImplementedError(
f"not support model_impl: '{self.model_impl}'. " f"Must be one of: {', '.join(valid_model_impls)}"
Expand Down Expand Up @@ -1063,13 +1064,14 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
model_group.add_argument(
"--model-impl",
type=str,
choices=["auto", "fastdeploy", "paddleformers"],
choices=["auto", "fastdeploy", "paddleformers", "paddlefleet"],
default=EngineArgs.model_impl,
help=(
"Model implementation backend. "
"'auto': Use native FastDeploy when available, fallback to PaddleFormers. "
"'fastdeploy': Use only native FastDeploy implementations. "
"'paddleformers': Use PaddleFormers backend with FastDeploy optimizations."
"'paddlefleet': Use PaddleFleet backend with FastDeploy optimizations."
Comment thread
xiaoguoguo626807 marked this conversation as resolved.
),
)

Expand Down
6 changes: 3 additions & 3 deletions fastdeploy/model_executor/graph_optimization/decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ def __init__(self, fd_config: FDConfig, **kwargs):
# Not use graph optimization
return

def __call__(self, **kwargs):
def __call__(self, *args, **kwargs):
"""Decorator model.__call__() func"""
if not self.use_graph_opt:
return self.forward(**kwargs)
return self.forward(*args, **kwargs)

return self.graph_opt_backend(**kwargs)
return self.graph_opt_backend(*args, **kwargs)
Comment thread
xiaoguoguo626807 marked this conversation as resolved.

This comment was marked as outdated.

Comment thread
xiaoguoguo626807 marked this conversation as resolved.

cls.__init__ = __init__
cls.__call__ = __call__
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/model_executor/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import os
from pathlib import Path

from paddleformers.transformers import PretrainedModel
from paddleformers.transformers.model_utils import PretrainedModel

from fastdeploy.plugins.model_register import load_model_register_plugins

Expand Down
13 changes: 13 additions & 0 deletions fastdeploy/model_executor/models/model_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,19 @@ def _try_resolve_paddleformers(
elif model_impl == "auto" and is_fallback:
# Auto mode fallback when no native implementation exists
backend_arch = "PaddleFormersForCausalLM"
elif model_impl == "paddlefleet":
from fastdeploy.model_executor.utils import is_paddlefleet_available

if is_paddlefleet_available():
backend_arch = "PaddleFleetForCausalLM"
else:
raise ImportError(
"paddlefleet backend requires paddlefleet to be installed.\n"
"Please install with [change cuda version if needed ]:\n"
"python -m pip install paddlefleet==0.3.0.dev20260527 "
"--extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/ "
"--extra-index-url https://www.paddlepaddle.org.cn/packages/nightly/cu126/"
)
elif model_impl == "fastdeploy":
return None
else:
Expand Down
17 changes: 17 additions & 0 deletions fastdeploy/model_executor/models/paddleformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
ModelForCasualLM,
ModelRegistry,
)
from fastdeploy.model_executor.utils import is_paddlefleet_available

from .base import PaddleFormersModelBase
from .causallm import CausalLMMixin
Expand All @@ -38,3 +39,19 @@ class PaddleFormersForCausalLM(CausalLMMixin, PaddleFormersModelBase, ModelForCa
@classmethod
def name(cls):
return "PaddleFormersForCausalLM"


if is_paddlefleet_available():
from .base_fleet import PaddleFleetModelBase

__all__ += ["PaddleFleetForCausalLM"]

@ModelRegistry.register_model_class(
architecture="PaddleFleetForCausalLM",
module_name="paddleformers",
category=ModelCategory.TEXT_GENERATION,
)
class PaddleFleetForCausalLM(PaddleFleetModelBase, ModelForCasualLM):
@classmethod
def name(cls):
return "PaddleFleetForCausalLM"
Loading
Loading