Skip to content

Commit f731429

Browse files
[FIX] LazyTurtle tensor-key alias resolution for nested HF weight renames (#2835)
* fix test_internvl_chat.py Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> * fix _resolve_checkpoint_tensor_name() Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> * cleanup Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> --------- Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
1 parent f1da794 commit f731429

3 files changed

Lines changed: 42 additions & 8 deletions

File tree

gptqmodel/utils/structure.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1312,17 +1312,26 @@ def _resolve_checkpoint_tensor_name(self, module_path: str, rel_name: str) -> st
13121312
full_name = self._join_tensor_name(module_path, rel_name)
13131313
candidates: list[str] = []
13141314
seen: set[str] = set()
1315+
1316+
def add_candidate(candidate_name: str) -> None:
1317+
# Keep module-tree aliases attached to the fully resolved tensor key.
1318+
# Some HF mappings rewrite nested relative names (for example
1319+
# `spatial_linear.ln` -> `spatial_linear.3`), so alias expansion has
1320+
# to happen after `module_path` and `rel_name` are joined.
1321+
if candidate_name not in seen:
1322+
seen.add(candidate_name)
1323+
candidates.append(candidate_name)
1324+
for alias in self._module_tree_name_aliases(candidate_name):
1325+
if alias in seen:
1326+
continue
1327+
seen.add(alias)
1328+
candidates.append(alias)
1329+
13151330
for candidate_path in self._candidate_module_paths(module_path, allow_empty=True):
13161331
for aliased_path in self._runtime_to_checkpoint_alias_candidates(candidate_path):
13171332
candidate_name = self._join_tensor_name(aliased_path, rel_name)
1318-
if candidate_name not in seen:
1319-
seen.add(candidate_name)
1320-
candidates.append(candidate_name)
1321-
for alias in self._module_tree_name_aliases(candidate_name):
1322-
if alias in seen:
1323-
continue
1324-
seen.add(alias)
1325-
candidates.append(alias)
1333+
for aliased_name in self._runtime_to_checkpoint_alias_candidates(candidate_name):
1334+
add_candidate(aliased_name)
13261335

13271336
for candidate in candidates:
13281337
if candidate in self._weight_map:

tests/models/test_internvl_chat.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def test_internvl_chat(self):
5050

5151
inputs = model.prepare_inputs_for_conversation(messages)
5252
inputs = model.move_input_capture_example(inputs, model.device)
53+
model.model.img_context_token_id = inputs.pop("img_context_token_id")
5354
inputs.pop("eos_token_id")
5455

5556
output_text = self.generate_stable_with_limit(

tests/test_lazy_turtle_conversion_mapping.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,30 @@ def test_lazy_turtle_applies_reversed_weight_renamings_with_capturing_groups(tmp
317317
assert turtle._resolve_checkpoint_tensor_name("timm_model.backbone.conv", "weight") == "backbone.conv.weight"
318318

319319

320+
def test_lazy_turtle_applies_reversed_weight_renamings_inside_module_relative_name(tmp_path):
321+
turtle = _build_lazy_turtle(
322+
tmp_path,
323+
{
324+
"model.resampler_model.spatial_linear.3.weight": torch.zeros(2),
325+
},
326+
hf_conversion_map_reversed=[
327+
SimpleNamespace(
328+
source_patterns=["spatial_linear.ln"],
329+
target_patterns=["spatial_linear.3"],
330+
operations=[],
331+
)
332+
],
333+
)
334+
335+
assert (
336+
turtle._resolve_checkpoint_tensor_name(
337+
"model.resampler_model",
338+
"spatial_linear.ln.weight",
339+
)
340+
== "model.resampler_model.spatial_linear.3.weight"
341+
)
342+
343+
320344
def test_lazy_turtle_uses_transformers_checkpoint_conversion_mapping_for_gemma3(tmp_path, monkeypatch):
321345
conversion_mapping_module = SimpleNamespace(
322346
get_checkpoint_conversion_mapping=lambda model_type: _gemma3_weight_renamings()

0 commit comments

Comments
 (0)