Skip to content

Commit 759fcc9

Browse files
committed
Revert model package exports.
Restore tensorrt_llm/_torch/models/__init__.py to its state before the last nine file commits while keeping formatting lint-clean. Signed-off-by: Athena Cai <athenac@nvidia.com>
1 parent a2cbdcf commit 759fcc9

2 files changed

Lines changed: 11 additions & 37 deletions

File tree

tensorrt_llm/_torch/models/__init__.py

Lines changed: 4 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,13 @@
55
# under transformers >= 5.5; see _torch/configs/__init__.py.
66
import tensorrt_llm._torch.configs # noqa: F401
77

8-
from .modeling_afmoe import AfmoeForCausalLM
98
from .modeling_auto import AutoModelForCausalLM
10-
from .modeling_bart import (BartForConditionalGeneration,
11-
MBartForConditionalGeneration)
129
from .modeling_bert import BertForSequenceClassification
1310
from .modeling_clip import CLIPVisionModel
1411
from .modeling_cohere2 import Cohere2ForCausalLM
1512
from .modeling_deepseekv3 import DeepseekV3ForCausalLM
1613
from .modeling_exaone4 import Exaone4ForCausalLM
17-
from .modeling_exaone4_5 import Exaone4_5_ForConditionalGeneration
14+
from .modeling_exaone_moe import ExaoneMoeForCausalLM
1815
from .modeling_gemma3 import Gemma3ForCausalLM
1916
from .modeling_gemma3vl import Gemma3VLM
2017
from .modeling_glm import Glm4MoeForCausalLM
@@ -23,12 +20,9 @@
2320
from .modeling_hunyuan_moe import HunYuanMoEV1ForCausalLM
2421
from .modeling_hyperclovax import HCXVisionForCausalLM
2522
from .modeling_kimi_k25 import KimiK25ForConditionalGeneration
26-
from .modeling_laguna import LagunaForCausalLM
2723
from .modeling_llama import LlamaForCausalLM
2824
from .modeling_llava_next import LlavaNextModel
2925
from .modeling_minimaxm2 import MiniMaxM2ForCausalLM
30-
from .modeling_minimaxm3 import (MiniMaxM3ForCausalLM,
31-
MiniMaxM3VLForConditionalGeneration)
3226
from .modeling_mistral import Mistral3VLM, MistralForCausalLM
3327
from .modeling_mixtral import MixtralForCausalLM
3428
from .modeling_nemotron import NemotronForCausalLM
@@ -50,31 +44,20 @@
5044
from .modeling_seedoss import SeedOssForCausalLM
5145
from .modeling_siglip import SiglipVisionModel
5246
from .modeling_starcoder2 import Starcoder2ForCausalLM
53-
from .modeling_step3p7 import Step3p7ForCausalLM
54-
from .modeling_step3p7vl import Step3p7VLForConditionalGeneration
55-
from .modeling_t5 import T5ForConditionalGeneration
5647
from .modeling_utils import get_model_architecture
5748
from .modeling_vila import VilaModel
5849

59-
try:
60-
from .modeling_exaone_moe import ExaoneMoeForCausalLM
61-
except ImportError:
62-
ExaoneMoeForCausalLM = None
63-
6450
# Note: for better readiblity, this should have same order as imports above
6551
__all__ = [
66-
"AfmoeForCausalLM",
6752
"AutoModelForCausalLM",
68-
"BartForConditionalGeneration",
6953
"BertForSequenceClassification",
7054
"CLIPVisionModel",
7155
"DeepseekV3ForCausalLM",
7256
"Exaone4ForCausalLM",
73-
"Exaone4_5_ForConditionalGeneration",
57+
"ExaoneMoeForCausalLM",
7458
"Gemma3ForCausalLM",
7559
"Gemma3VLM",
7660
"HCXVisionForCausalLM",
77-
"LagunaForCausalLM",
7861
"HunYuanDenseV1ForCausalLM",
7962
"HunYuanMoEV1ForCausalLM",
8063
"KimiK25ForConditionalGeneration",
@@ -95,8 +78,6 @@
9578
"Qwen2MoeForCausalLM",
9679
"SiglipVisionModel",
9780
"Starcoder2ForCausalLM",
98-
"T5ForConditionalGeneration",
99-
"MBartForConditionalGeneration",
10081
"get_model_architecture",
10182
"VilaModel",
10283
"Qwen2VLModel",
@@ -112,24 +93,16 @@
11293
"Glm4MoeForCausalLM",
11394
"Qwen3VLModel",
11495
"MiniMaxM2ForCausalLM",
115-
"MiniMaxM3ForCausalLM",
116-
"MiniMaxM3VLForConditionalGeneration",
11796
"Cohere2ForCausalLM",
118-
"Step3p7ForCausalLM",
119-
"Step3p7VLForConditionalGeneration",
12097
]
12198

122-
if ExaoneMoeForCausalLM is not None:
123-
__all__.append("ExaoneMoeForCausalLM")
124-
12599
if transformers.__version__ >= "4.45.1":
126100
from .modeling_mllama import MllamaForConditionalGeneration # noqa
127101

128102
__all__.append("MllamaForConditionalGeneration")
129103
else:
130-
print(
131-
f"Failed to import MllamaForConditionalGeneration as transformers.__version__ {transformers.__version__} < 4.45.1"
132-
)
104+
print("Failed to import MllamaForConditionalGeneration as "
105+
f"transformers.__version__ {transformers.__version__} < 4.45.1")
133106

134107
# Gemma4 requires transformers>=5.5.0 (native Gemma4 config/model classes).
135108
# Import silently on failure -- `get_model_architecture` in modeling_utils.py

tensorrt_llm/_torch/pyexecutor/kv_cache_transceiver.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
from tensorrt_llm.mapping import Mapping
1111

1212
from .llm_request import LlmRequest
13-
from .mamba_cache_manager import BaseMambaCacheManager, CppMambaHybridCacheManager
13+
from .mamba_cache_manager import (BaseMambaCacheManager,
14+
CppMambaHybridCacheManager)
1415
from .resource_manager import KVCacheManager
1516

1617
CacheTransceiverCpp = tensorrt_llm.bindings.internal.batch_manager.CacheTransceiver
@@ -97,10 +98,9 @@ def create_kv_cache_transceiver(
9798
f"enable chunked transfer.")
9899
elif (runtime == "CPP"
99100
and cache_transceiver_config.chunk_size_blocks is not None):
100-
logger.warning(
101-
"chunk_size_blocks is set but transceiver_runtime='CPP' "
102-
"explicitly disables Python auto-selection; "
103-
"chunk_size_blocks will be ignored.")
101+
logger.warning("chunk_size_blocks is set but transceiver_runtime='CPP' "
102+
"explicitly disables Python auto-selection; "
103+
"chunk_size_blocks will be ignored.")
104104

105105
# Warn when chunk_size_blocks is below the recommended floor. The Pydantic
106106
# field is PositiveInt (>=1), but values below ~16 push the per-chunk RDMA
@@ -127,7 +127,8 @@ def create_kv_cache_transceiver(
127127
f"got {cache_transceiver_config.backend}. "
128128
f"Please use transceiver_runtime='CPP' for MPI, UCX, or MOONCAKE backends."
129129
)
130-
from tensorrt_llm._torch.disaggregation.transceiver import KvCacheTransceiverV2
130+
from tensorrt_llm._torch.disaggregation.transceiver import \
131+
KvCacheTransceiverV2
131132
logger.info("Using KvCacheTransceiverV2")
132133
return KvCacheTransceiverV2(mapping, dist, kv_cache_manager,
133134
cache_transceiver_config)

0 commit comments

Comments
 (0)