Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions paddleformers/cli/utils/llm_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,14 @@ def get_lora_target_modules(model):
"model.visual.blocks.*mlp.up_proj.*",
"model.visual.blocks.*mlp.down_proj.*",
]
elif model.config.model_type == "internlm2_5":
target_modules = [
".*wqkv.*",
".*wo.*",
".*w1.*",
".*w2.*",
".*w3.*",
]
else:
raise ValueError(f"Unknown base_model_prefix: {model.config.model_type}.")
return target_modules
Expand Down
10 changes: 10 additions & 0 deletions paddleformers/datasets/template/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -985,3 +985,13 @@ def _get_gpt_oss_prefix():
chat_sep="<|assistant|>\n",
mm_plugin=get_mm_plugin(name="glm_ocr", image_token="<|image|>"),
)
register_template(
name="internlm2_5",
format_user=StringFormatter(slots=["<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n"]),
format_assistant=StringFormatter(slots=["{{content}}<|im_end|>\n"]),
format_system=StringFormatter(slots=["<|im_start|>system\n{{content}}<|im_end|>\n"]),
format_prefix=EmptyFormatter(slots=["<s>"]),
chat_sep="<|im_end|>\n",
suffix=["<|im_end|>\n"],
enable_thinking=None,
)
12 changes: 12 additions & 0 deletions paddleformers/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,17 @@
],
"glm_ocr.processor": ["Glm46VProcessor"],
"glm_ocr.image_processor": ["Glm46VImageProcessor"],
"intern_lm2_5.configuration": ["InternLM25Config"],
"intern_lm2_5.modeling": [
"InternLM25DecoderLayer",
"InternLM25Model",
"InternLM25ForCausalLM",
"InternLM25PretrainedModel",
"InternLM25ForSequenceClassification",
"InternLM25ForQuestionAnswering",
"InternLM25ForTokenClassification",
],
"intern_lm2_5.tokenizer": ["InternLM25Tokenizer"],
}

if TYPE_CHECKING:
Expand Down Expand Up @@ -410,6 +421,7 @@
from .phi3 import *
from .gemma3_text import *
from .glm_ocr import *
from .intern_lm2_5 import *
else:
sys.modules[__name__] = _LazyModule(
__name__,
Expand Down
3 changes: 3 additions & 0 deletions paddleformers/transformers/auto/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
("glm_ocr", "GlmOcrConfig"),
("qwen3_5", "Qwen3_5Config"),
("qwen3_5_moe", "Qwen3_5MoEConfig"),
("internlm2", "InternLM2Config"),
]
)

Expand Down Expand Up @@ -89,6 +90,7 @@
("glm_ocr", "GlmOcrForConditionalGeneration"),
("qwen3_5_moe", "Qwen3_5MoEForConditionalGeneration"),
("qwen3_5", "Qwen3_5ForConditionalGeneration"),
("internlm2", "InternLM2"),
]
)

Expand All @@ -102,6 +104,7 @@
("qwen2_5_vl_text", "qwen2_5_vl"),
("qwen3_vl_text", "qwen3_vl"),
("qwen3_vl_moe_text", "qwen3_vl_moe"),
("internlm2", "intern"),
]
)

Expand Down
1 change: 1 addition & 0 deletions paddleformers/transformers/auto/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
("Gemma3", "gemma3_text"),
("Glm4vMoe", "glm4v_moe"),
("GlmOcr", "glm_ocr"),
("InternLM2", "intern"),
]
)

Expand Down
44 changes: 44 additions & 0 deletions paddleformers/transformers/intern/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
InternLM2 Common Module

This module provides unified access to both InternLM2 2.0 and 2.5 models.
It automatically routes to the correct implementation based on the model configuration.
"""

from .configuration import InternLM2Config
from .modeling import (
InternLM2ForCausalLM,
InternLM2ForQuestionAnswering,
InternLM2ForSequenceClassification,
InternLM2ForTokenClassification,
InternLM2Model,
InternLM2PretrainedModel,
)

# Alias for auto system compatibility
InternLM2 = InternLM2Model

__all__ = [
"InternLM2Config",
"InternLM2Model",
"InternLM2",
"InternLM2PretrainedModel",
"InternLM2ForCausalLM",
"InternLM2ForSequenceClassification",
"InternLM2ForQuestionAnswering",
"InternLM2ForTokenClassification",
]
126 changes: 126 additions & 0 deletions paddleformers/transformers/intern/configuration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
# Copyright (c) 2026 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
InternLM2 Common Configuration

This module provides a unified configuration for both InternLM2 2.0 and 2.5 models.
It detects the version based on the configuration fields and routes accordingly.
"""

from paddleformers.transformers.configuration_utils import PretrainedConfig


class InternLM2Config(PretrainedConfig):
"""
InternLM2 configuration. This is a unified config that handles both 2.0 and 2.5 versions.

When loading from HuggingFace, the `model_type` will be "internlm2" (not "internlm2_5").
This config detects the actual version and routes to the appropriate implementation.
"""

model_type = "internlm2" # Important: must match HuggingFace config
_auto_class = "AutoConfig"
keys_to_ignore_at_inference = ["past_key_values"]

def __init__(
self,
vocab_size=92550,
hidden_size=4096,
intermediate_size=11008,
num_hidden_layers=32,
num_attention_heads=32,
num_key_value_heads=None,
hidden_act="silu",
max_position_embeddings=2048,
initializer_range=0.02,
rms_norm_eps=1e-6,
use_cache=True,
pad_token_id=0,
bos_token_id=1,
eos_token_id=2,
pretraining_tp=1,
tie_word_embeddings=False,
bias=True,
rope_theta=10000,
rope_scaling=None,
attn_implementation=None,
dtype="bfloat16",
**kwargs,
):
self.vocab_size = vocab_size
self.max_position_embeddings = max_position_embeddings
self.hidden_size = hidden_size
self.intermediate_size = intermediate_size
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.bias = bias

import paddle

if isinstance(dtype, str):
dtype_map = {
"float32": paddle.float32,
"float16": paddle.float16,
"bfloat16": paddle.bfloat16,
}
self.dtype = dtype_map.get(dtype.lower(), paddle.float32)
else:
self.dtype = dtype

if num_key_value_heads is None:
num_key_value_heads = num_attention_heads
self.num_key_value_heads = num_key_value_heads

self.hidden_act = hidden_act
self.initializer_range = initializer_range
self.rms_norm_eps = rms_norm_eps
self.pretraining_tp = pretraining_tp
self.use_cache = use_cache
self.rope_theta = rope_theta
self.rope_scaling = rope_scaling
self._rope_scaling_validation()
self.attn_implementation = attn_implementation
if self.attn_implementation is None:
self.attn_implementation = "eager"

super().__init__(
pad_token_id=pad_token_id,
bos_token_id=bos_token_id,
eos_token_id=eos_token_id,
tie_word_embeddings=tie_word_embeddings,
**kwargs,
)

def _rope_scaling_validation(self):
if self.rope_scaling is None:
return

if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2:
raise ValueError(
"`rope_scaling` must be a dictionary with two fields, `type` and `factor`, " f"got {self.rope_scaling}"
)
rope_scaling_type = self.rope_scaling.get("type", None)
rope_scaling_factor = self.rope_scaling.get("factor", None)
if rope_scaling_type is None or rope_scaling_factor is None:
raise ValueError("`rope_scaling` must contain 'type' and 'factor' keys, " f"got {self.rope_scaling}")
if rope_scaling_type not in ["linear", "dynamic"]:
raise ValueError(f"`rope_scaling` type must be 'linear' or 'dynamic', got '{rope_scaling_type}'")

@property
def is_version_2_5(self):
if hasattr(self, "auto_map") and self.auto_map is not None:
if "AutoModelForSequenceClassification" in self.auto_map:
return True
return False
Loading
Loading