1010import torch
1111
1212from gptqmodel .quantization .npu_linalg import npu_inverse_cholesky_factor
13- from gptqmodel .utils .torch import HAS_NPU
13+ from gptqmodel .utils .torch import HAS_NPU , last_npu_device_by_pci_bus_order
1414
1515
1616pytestmark = pytest .mark .skipif (not HAS_NPU , reason = "Ascend NPU is required" )
17+ DEFAULT_ASCEND_RT_VISIBLE_DEVICES = "7"
18+
19+
20+ def _default_npu_test_device () -> str :
21+ selected = last_npu_device_by_pci_bus_order ()
22+ return str (selected ) if selected is not None else "npu:0"
23+
24+
25+ NPU_TEST_DEVICE = os .environ .get ("GPTQMODEL_TEST_NPU_DEVICE" , _default_npu_test_device ())
26+
27+
28+ def _test_npu_device () -> torch .device :
29+ device = torch .device (NPU_TEST_DEVICE )
30+ if HAS_NPU :
31+ torch .npu .set_device (device )
32+ return device
33+
34+
35+ def _default_subprocess_env () -> dict [str , str ]:
36+ env = os .environ .copy ()
37+ visible_devices = env .get ("ASCEND_RT_VISIBLE_DEVICES" , "" ).strip ()
38+ if not visible_devices :
39+ visible_devices = env .get ("GPTQMODEL_TEST_ASCEND_RT_VISIBLE_DEVICES" , DEFAULT_ASCEND_RT_VISIBLE_DEVICES )
40+ env ["ASCEND_RT_VISIBLE_DEVICES" ] = visible_devices
41+ if "GPTQMODEL_TEST_NPU_DEVICE" not in env and visible_devices and "," not in visible_devices :
42+ env ["GPTQMODEL_TEST_NPU_DEVICE" ] = "npu:0"
43+ return env
1744
1845
1946def _spd_matrix (size : int , seed : int ) -> torch .Tensor :
@@ -23,7 +50,7 @@ def _spd_matrix(size: int, seed: int) -> torch.Tensor:
2350
2451
2552def test_npu_inverse_cholesky_factor_matches_cpu_reference ():
26- device = torch . device ( "npu:0" )
53+ device = _test_npu_device ( )
2754
2855 for size in (8 , 64 , 128 ):
2956 matrix_cpu = _spd_matrix (size , seed = 1000 + size )
@@ -46,7 +73,7 @@ def test_npu_inverse_cholesky_factor_matches_cpu_reference():
4673
4774
4875def test_npu_inverse_cholesky_factor_rejects_non_positive_definite_matrix ():
49- matrix = torch .tensor ([[0.0 , 1.0 ], [1.0 , 0.0 ]], dtype = torch .float32 , device = "npu:0" )
76+ matrix = torch .tensor ([[0.0 , 1.0 ], [1.0 , 0.0 ]], dtype = torch .float32 , device = _test_npu_device () )
5077
5178 with pytest .raises (torch ._C ._LinAlgError ):
5279 npu_inverse_cholesky_factor (matrix )
@@ -55,6 +82,7 @@ def test_npu_inverse_cholesky_factor_rejects_non_positive_definite_matrix():
5582def test_gptq_npu_hessian_inverse_avoids_torch_npu_cpu_fallback_warnings ():
5683 script = textwrap .dedent (
5784 """
85+ import os
5886 import torch
5987 import torch.nn as nn
6088 from gptqmodel.quantization.config import QuantizeConfig
@@ -64,15 +92,16 @@ def test_gptq_npu_hessian_inverse_avoids_torch_npu_cpu_fallback_warnings():
6492 if not HAS_NPU:
6593 raise RuntimeError("Ascend NPU is not available")
6694
67- torch.npu.set_device(0)
95+ npu_test_device = os.environ.get("GPTQMODEL_TEST_NPU_DEVICE", "npu:0")
96+ torch.npu.set_device(npu_test_device)
6897 torch.manual_seed(0)
6998
70- module = nn.Linear(16, 16, bias=False, device="npu:0" , dtype=torch.float16)
99+ module = nn.Linear(16, 16, bias=False, device=npu_test_device , dtype=torch.float16)
71100 gptq = GPTQ(module, qcfg=QuantizeConfig(damp_percent=0.05, damp_auto_increment=0.05))
72101
73102 base = torch.randn(16, 16, dtype=torch.float32)
74103 hessian_cpu = base.matmul(base.T) + torch.eye(16, dtype=torch.float32) * 0.25
75- hessian = hessian_cpu.to(device="npu:0" )
104+ hessian = hessian_cpu.to(device=npu_test_device )
76105
77106 factor, damp = gptq.hessian_inverse(hessian)
78107 torch.npu.synchronize()
@@ -90,8 +119,7 @@ def test_gptq_npu_hessian_inverse_avoids_torch_npu_cpu_fallback_warnings():
90119 """
91120 )
92121
93- env = os .environ .copy ()
94- env .setdefault ("ASCEND_RT_VISIBLE_DEVICES" , "0" )
122+ env = _default_subprocess_env ()
95123 proc = subprocess .run (
96124 [sys .executable , "-c" , script ],
97125 cwd = os .getcwd (),
0 commit comments