|
2 | 2 | # SPDX-FileCopyrightText: 2024-2025 qubitium@modelcloud.ai |
3 | 3 | # SPDX-License-Identifier: Apache-2.0 |
4 | 4 | # Contact: qubitium@modelcloud.ai, x.com/qubitium |
| 5 | +import importlib |
5 | 6 | import os |
6 | 7 | import sys |
7 | 8 | import threading |
@@ -583,7 +584,10 @@ def test_qwen3_5_moe_subset_early_stop_follows_module_tree_execution_order(): |
583 | 584 | layer = model.model.layers[0] |
584 | 585 | replace_module_with_hooked_legacy(layer) |
585 | 586 |
|
586 | | - quant_cfg = _make_quant_config() |
| 587 | + is_causal_conv1d_available = importlib.util.find_spec("causal_conv1d") is not None |
| 588 | + |
| 589 | + device = "cuda" if is_causal_conv1d_available else "cpu" |
| 590 | + quant_cfg = _make_quant_config(device) |
587 | 591 |
|
588 | 592 | class _DummyQwen3_5Model: |
589 | 593 | moe_lifecycle_hooks = Qwen3_5_MoeQModel.moe_lifecycle_hooks |
@@ -632,7 +636,7 @@ def prepare_layer_replay_kwargs(self, layer, layer_input, additional_inputs, tar |
632 | 636 | ] |
633 | 637 | assert subset_names[-1] == "mlp.experts.3.up_proj" |
634 | 638 |
|
635 | | - layer_inputs = [[torch.randn(1, 4, cfg.hidden_size)]] |
| 639 | + layer_inputs = [[torch.randn(1, 4, cfg.hidden_size).to(device)]] |
636 | 640 | full_modules = find_modules(layer) |
637 | 641 | subset = looper.create_named_modules( |
638 | 642 | module=layer, |
@@ -665,7 +669,7 @@ def prepare_layer_replay_kwargs(self, layer, layer_input, additional_inputs, tar |
665 | 669 | layer_input_kwargs=[{}], |
666 | 670 | position_ids=[None], |
667 | 671 | attention_masks=[None], |
668 | | - cur_layer_device=torch.device("cpu"), |
| 672 | + cur_layer_device=torch.device(device), |
669 | 673 | is_lm_head_module=False, |
670 | 674 | layer_descriptor="layers.0", |
671 | 675 | layer_title="subset-check", |
|
0 commit comments