Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion fastdeploy/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
_ResolvedTask = Literal["generate", "encode", "embed"]

# Model implementation backend options
ModelImpl = Literal["auto", "fastdeploy", "paddleformers"]
ModelImpl = Literal["auto", "fastdeploy", "paddleformers", "paddlefleet"]

_RUNNER_CONVERTS: dict[RunnerType, list[ConvertType]] = {
"generate": [],
Expand Down
6 changes: 4 additions & 2 deletions fastdeploy/engine/args_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ class EngineArgs:
'auto': Use native FastDeploy implementation when available, fallback to PaddleFormers.
'fastdeploy': Use only native FastDeploy implementations.
'paddleformers': Use PaddleFormers backend with FastDeploy optimizations.
'paddlefleet': Use PaddleFleet backend.
"""
override_pooler_config: Optional[Union[dict, PoolerConfig]] = None
"""
Expand Down Expand Up @@ -641,7 +642,7 @@ def __post_init__(self):
"kvcache_storage_backend is only supported when ENABLE_V1_KVCACHE_SCHEDULER=1"
)

valid_model_impls = ["auto", "fastdeploy", "paddleformers"]
valid_model_impls = ["auto", "fastdeploy", "paddleformers", "paddlefleet"]
if self.model_impl not in valid_model_impls:
raise NotImplementedError(
f"not support model_impl: '{self.model_impl}'. " f"Must be one of: {', '.join(valid_model_impls)}"
Expand Down Expand Up @@ -979,13 +980,14 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
model_group.add_argument(
"--model-impl",
type=str,
choices=["auto", "fastdeploy", "paddleformers"],
choices=["auto", "fastdeploy", "paddleformers", "paddlefleet"],
default=EngineArgs.model_impl,
help=(
"Model implementation backend. "
"'auto': Use native FastDeploy when available, fallback to PaddleFormers. "
"'fastdeploy': Use only native FastDeploy implementations. "
"'paddleformers': Use PaddleFormers backend with FastDeploy optimizations."
"'paddlefleet': Use PaddleFleet backend."

This comment was marked as outdated.

This comment was marked as outdated.

This comment was marked as outdated.

),
)

Expand Down
6 changes: 3 additions & 3 deletions fastdeploy/model_executor/graph_optimization/decorator.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ def __init__(self, fd_config: FDConfig, **kwargs):
# Not use graph optimization
return

def __call__(self, **kwargs):
def __call__(self, *args, **kwargs):
"""Decorator model.__call__() func"""
if not self.use_graph_opt:
return self.forward(**kwargs)
return self.forward(*args, **kwargs)

return self.graph_opt_backend(**kwargs)
return self.graph_opt_backend(*args, **kwargs)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 建议 graph_opt_backend.__call__ 仅接受 **kwargsGraphOptBackend.__call__(self, **kwargs)),此处转发 *args 会导致在 use_graph_opt=True 时抛出 TypeError

当前 PaddleFleet 模型已应用 @support_graph_optimization 装饰器,若用户配置开启图优化,调用链将触发此路径。

建议修复方式:

def __call__(self, *args, **kwargs):
    """Decorator model.__call__() func"""
    if not self.use_graph_opt:
        return self.forward(*args, **kwargs)
    # graph_opt_backend 仅支持 kwargs
    return self.graph_opt_backend(**kwargs)

或者在 GraphOptBackend.__call__ 中同步支持 *args


cls.__init__ = __init__
cls.__call__ = __call__
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/model_executor/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import os
from pathlib import Path

from paddleformers.transformers import PretrainedModel
from paddleformers.transformers.model_utils import PretrainedModel
Comment thread
xiaoguoguo626807 marked this conversation as resolved.

from fastdeploy.plugins.model_register import load_model_register_plugins

Expand Down
13 changes: 13 additions & 0 deletions fastdeploy/model_executor/models/model_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,19 @@ def _try_resolve_paddleformers(
elif model_impl == "auto" and is_fallback:
# Auto mode fallback when no native implementation exists
backend_arch = "PaddleFormersForCausalLM"
elif model_impl == "paddlefleet":
from fastdeploy.model_executor.utils import is_paddlefleet_available

if is_paddlefleet_available():
backend_arch = "PaddleFleetForCausalLM"
else:
raise ImportError(

This comment was marked as outdated.

This comment was marked as outdated.

"paddlefleet backend requires paddlefleet to be installed.\n"

This comment was marked as outdated.

This comment was marked as outdated.

"Please install with [change cuda version if needed ]:\n"

This comment was marked as outdated.

"python -m pip install paddlefleet==0.3.0.dev20260527 "
"--extra-index-url https://www.paddlepaddle.org.cn/packages/stable/cu126/ "

This comment was marked as outdated.

"--extra-index-url https://www.paddlepaddle.org.cn/packages/nightly/cu126/"
)
elif model_impl == "fastdeploy":
return None
else:
Expand Down
17 changes: 17 additions & 0 deletions fastdeploy/model_executor/models/paddleformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
ModelForCasualLM,
ModelRegistry,
)
from fastdeploy.model_executor.utils import is_paddlefleet_available

from .base import PaddleFormersModelBase
from .causallm import CausalLMMixin
Expand All @@ -38,3 +39,19 @@ class PaddleFormersForCausalLM(CausalLMMixin, PaddleFormersModelBase, ModelForCa
@classmethod
def name(cls):
return "PaddleFormersForCausalLM"


if is_paddlefleet_available():

This comment was marked as outdated.

from .base_fleet import PaddleFleetModelBase

__all__ += ["PaddleFleetForCausalLM"]

@ModelRegistry.register_model_class(
architecture="PaddleFleetForCausalLM",
module_name="paddleformers",
category=ModelCategory.TEXT_GENERATION,
)
class PaddleFleetForCausalLM(PaddleFleetModelBase, ModelForCasualLM):
@classmethod
def name(cls):
return "PaddleFleetForCausalLM"
Loading
Loading