From 0c2294c1119973439697119048b86aab2b0030f1 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Fri, 22 May 2026 06:36:10 +0000 Subject: [PATCH 1/2] Pin Ascend unit tests to NPU 7 --- .github/scripts/ci_tests.py | 41 ++++++++++++++++++++++++------- .github/scripts/ci_workflow.py | 7 +++++- tests/test_npu_linalg.py | 44 +++++++++++++++++++++++++++------- 3 files changed, 75 insertions(+), 17 deletions(-) diff --git a/.github/scripts/ci_tests.py b/.github/scripts/ci_tests.py index faa8bf2cb..bda63d1f0 100644 --- a/.github/scripts/ci_tests.py +++ b/.github/scripts/ci_tests.py @@ -24,6 +24,27 @@ ERROR_PATTERN = re.compile( r"nvcc fatal|error:|fatal error|ModuleNotFoundError|ImportError|AssertionError|Exception|is the correct path|No such file or directory|Repo id must be in" ) +DEFAULT_ASCEND_RT_VISIBLE_DEVICES = "7" + + +def is_ascend_npu_test(test_script: str) -> bool: + return "npu" in re.split(r"[/_.-]+", test_script.removesuffix(".py")) + + +def configure_ascend_npu_test_env(env: dict[str, str], test_script: str) -> bool: + if not is_ascend_npu_test(test_script): + return False + + visible_devices = env.get("ASCEND_RT_VISIBLE_DEVICES", "").strip() + if not visible_devices: + visible_devices = env.get("GPTQMODEL_TEST_ASCEND_RT_VISIBLE_DEVICES", DEFAULT_ASCEND_RT_VISIBLE_DEVICES) + env["ASCEND_RT_VISIBLE_DEVICES"] = visible_devices + + if "GPTQMODEL_TEST_NPU_DEVICE" not in env and visible_devices and "," not in visible_devices: + env["GPTQMODEL_TEST_NPU_DEVICE"] = "npu:0" + + env["CUDA_VISIBLE_DEVICES"] = "" + return True def kill_process_group(proc: subprocess.Popen[str]) -> None: @@ -112,6 +133,11 @@ def run_tests(args: argparse.Namespace) -> int: env["CUDA_VISIBLE_DEVICES"] = "" print("CUDA_VISIBLE_DEVICES=") + ascend_npu_test = configure_ascend_npu_test_env(env, args.test_script) + if ascend_npu_test: + print(f"ASCEND_RT_VISIBLE_DEVICES={env.get('ASCEND_RT_VISIBLE_DEVICES', '')}") + print(f"GPTQMODEL_TEST_NPU_DEVICE={env.get('GPTQMODEL_TEST_NPU_DEVICE', '')}") + if args.xpu_mode: maybe_uninstall_vllm() @@ -138,17 +164,16 @@ def run_tests(args: argparse.Namespace) -> int: start_new_session=True, ) - keepalive_endpoint = f"{normalize_base_url(args.base_url)}/keepalive" - keepalive_payload = build_job_request( - runner_name=args.runner, - run_id=args.run_id, - test_name=args.test_script, - ) - monitor_thread = None monitor_stop = None monitor_state = {"forced_exit_code": 0} - if env.get("CUDA_VISIBLE_DEVICES", ""): + if env.get("CUDA_VISIBLE_DEVICES", "") and not ascend_npu_test: + keepalive_endpoint = f"{normalize_base_url(args.base_url)}/keepalive" + keepalive_payload = build_job_request( + runner_name=args.runner, + run_id=args.run_id, + test_name=args.test_script, + ) monitor_thread, monitor_stop, monitor_state = start_keepalive_monitor( proc=proc, keepalive_endpoint=keepalive_endpoint, diff --git a/.github/scripts/ci_workflow.py b/.github/scripts/ci_workflow.py index 7e256c39f..58112f383 100644 --- a/.github/scripts/ci_workflow.py +++ b/.github/scripts/ci_workflow.py @@ -121,6 +121,10 @@ def normalize_test_name(name: str) -> str: return strip_py_suffix(name.removeprefix("tests/")) +def is_npu_test_name(name: str) -> bool: + return "npu" in re.split(r"[/_.-]+", normalize_test_name(name)) + + def test_path_from_name(test_name: str, tests_root: str | Path = "tests") -> Path: normalized = normalize_test_name(test_name) return Path(tests_root) / f"{normalized}.py" @@ -372,7 +376,8 @@ def resolve_test_runtime(test_name: str, tests_root: str | Path = "tests") -> Te normalized = normalize_test_name(test_name) test_path = test_path_from_name(normalized, tests_root=tests_root) xpu_mode = "xpu" in normalized - skip_gpu_allocation = xpu_mode or has_no_gpu_marker(test_path) + npu_mode = is_npu_test_name(normalized) + skip_gpu_allocation = xpu_mode or npu_mode or has_no_gpu_marker(test_path) return TestRuntime( test_name=normalized, test_path=str(test_path), diff --git a/tests/test_npu_linalg.py b/tests/test_npu_linalg.py index d865d0827..a6eb4a0a8 100644 --- a/tests/test_npu_linalg.py +++ b/tests/test_npu_linalg.py @@ -10,10 +10,37 @@ import torch from gptqmodel.quantization.npu_linalg import npu_inverse_cholesky_factor -from gptqmodel.utils.torch import HAS_NPU +from gptqmodel.utils.torch import HAS_NPU, last_npu_device_by_pci_bus_order pytestmark = pytest.mark.skipif(not HAS_NPU, reason="Ascend NPU is required") +DEFAULT_ASCEND_RT_VISIBLE_DEVICES = "7" + + +def _default_npu_test_device() -> str: + selected = last_npu_device_by_pci_bus_order() + return str(selected) if selected is not None else "npu:0" + + +NPU_TEST_DEVICE = os.environ.get("GPTQMODEL_TEST_NPU_DEVICE", _default_npu_test_device()) + + +def _test_npu_device() -> torch.device: + device = torch.device(NPU_TEST_DEVICE) + if HAS_NPU: + torch.npu.set_device(device) + return device + + +def _default_subprocess_env() -> dict[str, str]: + env = os.environ.copy() + visible_devices = env.get("ASCEND_RT_VISIBLE_DEVICES", "").strip() + if not visible_devices: + visible_devices = env.get("GPTQMODEL_TEST_ASCEND_RT_VISIBLE_DEVICES", DEFAULT_ASCEND_RT_VISIBLE_DEVICES) + env["ASCEND_RT_VISIBLE_DEVICES"] = visible_devices + if "GPTQMODEL_TEST_NPU_DEVICE" not in env and visible_devices and "," not in visible_devices: + env["GPTQMODEL_TEST_NPU_DEVICE"] = "npu:0" + return env def _spd_matrix(size: int, seed: int) -> torch.Tensor: @@ -23,7 +50,7 @@ def _spd_matrix(size: int, seed: int) -> torch.Tensor: def test_npu_inverse_cholesky_factor_matches_cpu_reference(): - device = torch.device("npu:0") + device = _test_npu_device() for size in (8, 64, 128): matrix_cpu = _spd_matrix(size, seed=1000 + size) @@ -46,7 +73,7 @@ def test_npu_inverse_cholesky_factor_matches_cpu_reference(): def test_npu_inverse_cholesky_factor_rejects_non_positive_definite_matrix(): - matrix = torch.tensor([[0.0, 1.0], [1.0, 0.0]], dtype=torch.float32, device="npu:0") + matrix = torch.tensor([[0.0, 1.0], [1.0, 0.0]], dtype=torch.float32, device=_test_npu_device()) with pytest.raises(torch._C._LinAlgError): npu_inverse_cholesky_factor(matrix) @@ -55,6 +82,7 @@ def test_npu_inverse_cholesky_factor_rejects_non_positive_definite_matrix(): def test_gptq_npu_hessian_inverse_avoids_torch_npu_cpu_fallback_warnings(): script = textwrap.dedent( """ + import os import torch import torch.nn as nn from gptqmodel.quantization.config import QuantizeConfig @@ -64,15 +92,16 @@ def test_gptq_npu_hessian_inverse_avoids_torch_npu_cpu_fallback_warnings(): if not HAS_NPU: raise RuntimeError("Ascend NPU is not available") - torch.npu.set_device(0) + npu_test_device = os.environ.get("GPTQMODEL_TEST_NPU_DEVICE", "npu:0") + torch.npu.set_device(npu_test_device) torch.manual_seed(0) - module = nn.Linear(16, 16, bias=False, device="npu:0", dtype=torch.float16) + module = nn.Linear(16, 16, bias=False, device=npu_test_device, dtype=torch.float16) gptq = GPTQ(module, qcfg=QuantizeConfig(damp_percent=0.05, damp_auto_increment=0.05)) base = torch.randn(16, 16, dtype=torch.float32) hessian_cpu = base.matmul(base.T) + torch.eye(16, dtype=torch.float32) * 0.25 - hessian = hessian_cpu.to(device="npu:0") + hessian = hessian_cpu.to(device=npu_test_device) factor, damp = gptq.hessian_inverse(hessian) torch.npu.synchronize() @@ -90,8 +119,7 @@ def test_gptq_npu_hessian_inverse_avoids_torch_npu_cpu_fallback_warnings(): """ ) - env = os.environ.copy() - env.setdefault("ASCEND_RT_VISIBLE_DEVICES", "0") + env = _default_subprocess_env() proc = subprocess.run( [sys.executable, "-c", script], cwd=os.getcwd(), From 504b69f1803fe0dc43c7a4d0d010a7fc84baa6a1 Mon Sep 17 00:00:00 2001 From: Qubitium Date: Fri, 22 May 2026 08:54:13 +0000 Subject: [PATCH 2/2] Use logical NPU 0 in Ascend tests --- .github/scripts/ci_tests.py | 4 ---- tests/test_npu_linalg.py | 29 ++++------------------------- tests/test_npu_support.py | 8 +------- 3 files changed, 5 insertions(+), 36 deletions(-) diff --git a/.github/scripts/ci_tests.py b/.github/scripts/ci_tests.py index bda63d1f0..c6ff92175 100644 --- a/.github/scripts/ci_tests.py +++ b/.github/scripts/ci_tests.py @@ -40,9 +40,6 @@ def configure_ascend_npu_test_env(env: dict[str, str], test_script: str) -> bool visible_devices = env.get("GPTQMODEL_TEST_ASCEND_RT_VISIBLE_DEVICES", DEFAULT_ASCEND_RT_VISIBLE_DEVICES) env["ASCEND_RT_VISIBLE_DEVICES"] = visible_devices - if "GPTQMODEL_TEST_NPU_DEVICE" not in env and visible_devices and "," not in visible_devices: - env["GPTQMODEL_TEST_NPU_DEVICE"] = "npu:0" - env["CUDA_VISIBLE_DEVICES"] = "" return True @@ -136,7 +133,6 @@ def run_tests(args: argparse.Namespace) -> int: ascend_npu_test = configure_ascend_npu_test_env(env, args.test_script) if ascend_npu_test: print(f"ASCEND_RT_VISIBLE_DEVICES={env.get('ASCEND_RT_VISIBLE_DEVICES', '')}") - print(f"GPTQMODEL_TEST_NPU_DEVICE={env.get('GPTQMODEL_TEST_NPU_DEVICE', '')}") if args.xpu_mode: maybe_uninstall_vllm() diff --git a/tests/test_npu_linalg.py b/tests/test_npu_linalg.py index a6eb4a0a8..9ab810812 100644 --- a/tests/test_npu_linalg.py +++ b/tests/test_npu_linalg.py @@ -10,19 +10,11 @@ import torch from gptqmodel.quantization.npu_linalg import npu_inverse_cholesky_factor -from gptqmodel.utils.torch import HAS_NPU, last_npu_device_by_pci_bus_order +from gptqmodel.utils.torch import HAS_NPU pytestmark = pytest.mark.skipif(not HAS_NPU, reason="Ascend NPU is required") -DEFAULT_ASCEND_RT_VISIBLE_DEVICES = "7" - - -def _default_npu_test_device() -> str: - selected = last_npu_device_by_pci_bus_order() - return str(selected) if selected is not None else "npu:0" - - -NPU_TEST_DEVICE = os.environ.get("GPTQMODEL_TEST_NPU_DEVICE", _default_npu_test_device()) +NPU_TEST_DEVICE = "npu:0" def _test_npu_device() -> torch.device: @@ -32,17 +24,6 @@ def _test_npu_device() -> torch.device: return device -def _default_subprocess_env() -> dict[str, str]: - env = os.environ.copy() - visible_devices = env.get("ASCEND_RT_VISIBLE_DEVICES", "").strip() - if not visible_devices: - visible_devices = env.get("GPTQMODEL_TEST_ASCEND_RT_VISIBLE_DEVICES", DEFAULT_ASCEND_RT_VISIBLE_DEVICES) - env["ASCEND_RT_VISIBLE_DEVICES"] = visible_devices - if "GPTQMODEL_TEST_NPU_DEVICE" not in env and visible_devices and "," not in visible_devices: - env["GPTQMODEL_TEST_NPU_DEVICE"] = "npu:0" - return env - - def _spd_matrix(size: int, seed: int) -> torch.Tensor: generator = torch.Generator(device="cpu").manual_seed(seed) values = torch.randn(size, size, generator=generator, dtype=torch.float32) @@ -82,7 +63,6 @@ def test_npu_inverse_cholesky_factor_rejects_non_positive_definite_matrix(): def test_gptq_npu_hessian_inverse_avoids_torch_npu_cpu_fallback_warnings(): script = textwrap.dedent( """ - import os import torch import torch.nn as nn from gptqmodel.quantization.config import QuantizeConfig @@ -92,7 +72,7 @@ def test_gptq_npu_hessian_inverse_avoids_torch_npu_cpu_fallback_warnings(): if not HAS_NPU: raise RuntimeError("Ascend NPU is not available") - npu_test_device = os.environ.get("GPTQMODEL_TEST_NPU_DEVICE", "npu:0") + npu_test_device = "npu:0" torch.npu.set_device(npu_test_device) torch.manual_seed(0) @@ -119,11 +99,10 @@ def test_gptq_npu_hessian_inverse_avoids_torch_npu_cpu_fallback_warnings(): """ ) - env = _default_subprocess_env() proc = subprocess.run( [sys.executable, "-c", script], cwd=os.getcwd(), - env=env, + env=os.environ.copy(), text=True, capture_output=True, timeout=60, diff --git a/tests/test_npu_support.py b/tests/test_npu_support.py index e69d705b1..5a102abe1 100644 --- a/tests/test_npu_support.py +++ b/tests/test_npu_support.py @@ -1,5 +1,4 @@ import copy -import os import sys import warnings @@ -29,12 +28,7 @@ from gptqmodel.utils.torch import HAS_NPU, last_npu_device_by_pci_bus_order -def _default_npu_test_device() -> str: - selected = last_npu_device_by_pci_bus_order() - return str(selected) if selected is not None else "npu:0" - - -NPU_TEST_DEVICE = os.environ.get("GPTQMODEL_TEST_NPU_DEVICE", _default_npu_test_device()) +NPU_TEST_DEVICE = "npu:0" NPU_CPU_FALLBACK_MARKERS = ( "not currently supported on the NPU backend", "fall back to run on the CPU",