Skip to content

Commit 6d25706

Browse files
committed
update modular files to match source
1 parent 123b776 commit 6d25706

3 files changed

Lines changed: 6 additions & 6 deletions

File tree

src/transformers/models/olmo_hybrid/modeling_olmo_hybrid.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
3939
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
4040
from ...processing_utils import Unpack
41-
from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, logging
41+
from ...utils import TransformersKwargs, agnostic, auto_docstring, can_return_tuple, logging
4242
from ...utils.generic import maybe_autocast, merge_with_config_defaults
4343
from ...utils.import_utils import is_flash_linear_attention_available
4444
from ...utils.output_capturing import capture_outputs
@@ -695,7 +695,7 @@ def __init__(self, config: OlmoHybridConfig, layer_idx: int):
695695
else FusedRMSNormGated(
696696
self.head_v_dim,
697697
eps=1e-5,
698-
device=torch.cuda.current_device(),
698+
device=agnostic.gpu.current_device(),
699699
dtype=config.dtype if config.dtype is not None else torch.get_default_dtype(),
700700
)
701701
)

src/transformers/models/qwen3_5_moe/modeling_qwen3_5_moe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
4646
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
4747
from ...processing_utils import Unpack
48-
from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, logging, torch_compilable_check
48+
from ...utils import TransformersKwargs, agnostic, auto_docstring, can_return_tuple, logging, torch_compilable_check
4949
from ...utils.generic import is_flash_attention_requested, maybe_autocast, merge_with_config_defaults
5050
from ...utils.import_utils import is_causal_conv1d_available, is_flash_linear_attention_available
5151
from ...utils.output_capturing import OutputRecorder, capture_outputs
@@ -396,7 +396,7 @@ def __init__(self, config: Qwen3_5MoeConfig, layer_idx: int):
396396
self.head_v_dim,
397397
eps=self.layer_norm_epsilon,
398398
activation=self.activation,
399-
device=torch.cuda.current_device(),
399+
device=agnostic.gpu.current_device(),
400400
dtype=config.dtype if config.dtype is not None else torch.get_default_dtype(),
401401
)
402402
)

src/transformers/models/qwen3_next/modeling_qwen3_next.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
4343
from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
4444
from ...processing_utils import Unpack
45-
from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, logging
45+
from ...utils import TransformersKwargs, agnostic, auto_docstring, can_return_tuple, logging
4646
from ...utils.generic import maybe_autocast, merge_with_config_defaults
4747
from ...utils.import_utils import is_causal_conv1d_available, is_flash_linear_attention_available
4848
from ...utils.output_capturing import OutputRecorder, capture_outputs
@@ -541,7 +541,7 @@ def __init__(self, config: Qwen3NextConfig, layer_idx: int):
541541
self.head_v_dim,
542542
eps=self.layer_norm_epsilon,
543543
activation=self.activation,
544-
device=torch.cuda.current_device(),
544+
device=agnostic.gpu.current_device(),
545545
dtype=config.dtype if config.dtype is not None else torch.get_default_dtype(),
546546
)
547547
)

0 commit comments

Comments
 (0)