Skip to content

Commit caad5ac

Browse files
committed
[None][feat] Address review: remove bundled config, document inline refs
Remove the bundled InternLM3Config from the modeling file. The AD pipeline loads the config from the HF checkpoint via trust_remote_code=True (same pattern as DeciLM). The test file now loads InternLM3Config dynamically from the HF cache. Inline HF reference classes are kept because the HF modeling_internlm3.py cannot be imported on the installed transformers version (requires LossKwargs from transformers >=4.48). Signed-off-by: Lucas Liebenwein <lliebenwein@nvidia.com> Signed-off-by: Lucas Liebenwein <11156568+lucaslie@users.noreply.github.com>
1 parent d217654 commit caad5ac

2 files changed

Lines changed: 21 additions & 81 deletions

File tree

tensorrt_llm/_torch/auto_deploy/models/custom/modeling_internlm3.py

Lines changed: 2 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@
1919
https://huggingface.co/internlm/internlm3-8b-instruct
2020
2121
This implementation differs from the original HuggingFace version in the following ways:
22-
* Bundled config class to work without trust_remote_code (model not in transformers)
2322
* Simplified for prefill-only inference (no KV caching)
2423
* Uses auto_deploy custom ops for export compatibility
2524
* Removed flash attention variants (uses torch_attention custom op)
2625
* Removed gradient checkpointing and training code paths
2726
* Removed attention dropout (inference only)
2827
* No repeat_kv — AD attention ops handle GQA natively
2928
29+
Config is loaded from the HF checkpoint via trust_remote_code=True (not bundled here).
30+
3031
The InternLM3 model uses GQA with SwiGLU MLP, RMSNorm, and dynamic NTK-scaled RoPE.
3132
"""
3233

@@ -35,7 +36,6 @@
3536

3637
import torch
3738
from torch import nn
38-
from transformers import AutoConfig, PretrainedConfig
3939
from transformers.activations import ACT2FN
4040
from transformers.generation import GenerationMixin
4141
from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS
@@ -45,79 +45,6 @@
4545
from tensorrt_llm._torch.auto_deploy.models.hf import AutoModelForCausalLMFactory
4646

4747

48-
class InternLM3Config(PretrainedConfig):
49-
"""Configuration class for InternLM3 model.
50-
51-
Bundled with the custom model implementation since InternLM3 is not natively
52-
registered in transformers (requires trust_remote_code).
53-
"""
54-
55-
model_type = "internlm3"
56-
57-
def __init__(
58-
self,
59-
vocab_size: int = 128512,
60-
hidden_size: int = 4096,
61-
intermediate_size: int = 11008,
62-
num_hidden_layers: int = 32,
63-
num_attention_heads: int = 32,
64-
num_key_value_heads: int = 32,
65-
hidden_act: str = "silu",
66-
max_position_embeddings: int = 32768,
67-
initializer_range: float = 0.02,
68-
rms_norm_eps: float = 1e-6,
69-
tie_word_embeddings: bool = False,
70-
rope_theta: float = 10000.0,
71-
rope_scaling: Optional[dict] = None,
72-
qkv_bias: bool = False,
73-
attention_dropout: float = 0.0,
74-
bias: bool = False,
75-
head_dim: Optional[int] = None,
76-
pad_token_id: Optional[int] = None,
77-
**kwargs,
78-
):
79-
self.vocab_size = vocab_size
80-
self.hidden_size = hidden_size
81-
self.intermediate_size = intermediate_size
82-
self.num_hidden_layers = num_hidden_layers
83-
self.num_attention_heads = num_attention_heads
84-
self.num_key_value_heads = (
85-
num_key_value_heads if num_key_value_heads is not None else num_attention_heads
86-
)
87-
self.hidden_act = hidden_act
88-
self.max_position_embeddings = max_position_embeddings
89-
self.initializer_range = initializer_range
90-
self.rms_norm_eps = rms_norm_eps
91-
self.rope_theta = rope_theta
92-
self.rope_scaling = rope_scaling
93-
self.qkv_bias = qkv_bias
94-
self.attention_dropout = attention_dropout
95-
self.bias = bias
96-
self.head_dim = (
97-
head_dim if head_dim is not None else self.hidden_size // self.num_attention_heads
98-
)
99-
100-
# Normalize rope_scaling type field
101-
if self.rope_scaling is not None and "type" in self.rope_scaling:
102-
self.rope_scaling["rope_type"] = self.rope_scaling["type"]
103-
104-
super().__init__(
105-
pad_token_id=pad_token_id,
106-
tie_word_embeddings=tie_word_embeddings,
107-
**kwargs,
108-
)
109-
110-
111-
# Register config with AutoConfig so it can be loaded from HF hub
112-
try:
113-
AutoConfig.register("internlm3", InternLM3Config, exist_ok=True)
114-
except TypeError:
115-
try:
116-
AutoConfig.register("internlm3", InternLM3Config)
117-
except ValueError:
118-
pass
119-
120-
12148
class InternLM3RMSNorm(nn.Module):
12249
"""RMS Normalization using AutoDeploy torch_rmsnorm reference op."""
12350

@@ -310,7 +237,6 @@ class InternLM3CausalLMOutput(ModelOutput):
310237
class InternLM3PreTrainedModel(PreTrainedModel):
311238
"""Base class for InternLM3 models."""
312239

313-
config_class = InternLM3Config
314240
base_model_prefix = "model"
315241
_no_split_modules = ["InternLM3DecoderLayer"]
316242
supports_gradient_checkpointing = False

tests/unittest/auto_deploy/singlegpu/models/test_internlm3_modeling.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@
2020
for export compatibility. InternLM3 uses GQA with SwiGLU MLP, RMSNorm,
2121
and dynamic NTK-scaled RoPE.
2222
23-
Since InternLM3 is not natively in the installed transformers (requires
24-
trust_remote_code), HF reference classes are defined inline for equivalence
25-
testing.
23+
HF reference classes are defined inline for equivalence testing because the
24+
HF modeling_internlm3.py (from the HF checkpoint) cannot be imported on the
25+
installed transformers version — it requires ``LossKwargs`` from
26+
``transformers.utils`` which is only available in transformers >=4.48.
27+
The config class *can* be loaded via AutoConfig with trust_remote_code.
2628
"""
2729

2830
import math
@@ -33,13 +35,13 @@
3335
from _model_test_utils import assert_rmse_close
3436
from torch import nn
3537
from torch.export import Dim
38+
from transformers import AutoConfig
3639
from transformers.activations import ACT2FN
3740
from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS
3841

3942
from tensorrt_llm._torch.auto_deploy.export import torch_export_to_gm
4043
from tensorrt_llm._torch.auto_deploy.models.custom.modeling_internlm3 import (
4144
InternLM3Attention,
42-
InternLM3Config,
4345
InternLM3DecoderLayer,
4446
InternLM3ForCausalLM,
4547
InternLM3MLP,
@@ -50,14 +52,26 @@
5052

5153
_BATCH_AND_SEQUENCE_TEST_CASES = ((2, 6), (1, 8))
5254

55+
# Load InternLM3Config from the HF checkpoint cache (trust_remote_code).
56+
# The config class is not in the installed transformers but is bundled with the HF repo.
57+
try:
58+
_hf_config = AutoConfig.from_pretrained(
59+
"internlm/internlm3-8b-instruct", trust_remote_code=True
60+
)
61+
InternLM3Config = type(_hf_config)
62+
except Exception:
63+
InternLM3Config = None
64+
5365

5466
@pytest.fixture(scope="function", autouse=True)
5567
def set_seed():
5668
torch.manual_seed(42)
5769

5870

59-
def _create_small_config() -> InternLM3Config:
71+
def _create_small_config():
6072
"""Create a small InternLM3 config for testing."""
73+
if InternLM3Config is None:
74+
pytest.skip("InternLM3Config not available (internlm/internlm3-8b-instruct not cached)")
6175
return InternLM3Config(
6276
vocab_size=1000,
6377
hidden_size=64,

0 commit comments

Comments
 (0)