Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions fastdeploy/model_executor/layers/moe/ep.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,8 @@
import fastdeploy
from fastdeploy import envs
from fastdeploy.config import MoEPhase
from fastdeploy.model_executor.utils import try_import
from fastdeploy.utils import singleton

paddlefleet_ops = try_import(["paddlefleet.ops"])


def load_deep_ep() -> ModuleType:
"""
Expand All @@ -46,7 +43,10 @@ def load_deep_ep() -> ModuleType:
# Enable paddle.enable_compat before importing deep_ep (required by PFCC/PaddleFleet variants)
paddle.enable_compat(scope={"deep_ep"})
try:
import paddlefleet_ops.deep_ep as deep_ep # type: ignore
try:
import paddlefleet.ops.deep_ep as deep_ep # type: ignore
except:
import paddlefleet_ops.deep_ep as deep_ep # type: ignore

logger.info("FD use PaddleFleet/DeepEP now.")
return deep_ep
Expand Down
7 changes: 5 additions & 2 deletions fastdeploy/model_executor/layers/quantization/fp8_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from fastdeploy.model_executor.ops.gpu import per_token_group_fp8_quant


paddlefleet_ops = try_import(["paddlefleet.ops"])
paddlefleet_ops = try_import(["paddlefleet.ops", "paddlefleet_ops"])


def load_deep_gemm():
Expand All @@ -45,7 +45,10 @@ def load_deep_gemm():
try:
import logging

import paddlefleet_ops.deep_gemm as deep_gemm
try:
import paddlefleet.ops.deep_gemm as deep_gemm
except:
import paddlefleet_ops.deep_gemm as deep_gemm

logging.getLogger().handlers.clear()
logger.info("Detected sm100, use PaddleFleet DeepGEMM")
Expand Down
Loading