From a0ef1ed5839de7c0a6a35b8b5b92fb34c5d59870 Mon Sep 17 00:00:00 2001
From: Andres Guzman-Ballen <acosmicflamingo@gmail.com>
Date: Sat, 9 May 2026 23:37:04 -0500
Subject: [PATCH 01/10] Replace numba.runtests with pytest in
 subprocess_test_runner

This is the first of many commits that will be migrating the
test suite to using pytest. Since numba.runtests uses unittest,
switching to pytest when running subprocesses seems like a good
starting point.
---
 numba_cuda/numba/cuda/tests/support.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/support.py b/numba_cuda/numba/cuda/tests/support.py
index a895c97ed..3024fcef1 100644
--- a/numba_cuda/numba/cuda/tests/support.py
+++ b/numba_cuda/numba/cuda/tests/support.py
@@ -675,8 +675,11 @@ def subprocess_test_runner(
         executed without error. The timeout kwarg can be used to allow more time
         for longer running tests, it defaults to 60 seconds.
         """
-        parts = (test_module, test_class, test_name)
-        fully_qualified_test = ".".join(x for x in parts if x is not None)
+        fully_qualified_test = test_module
+        if test_class:
+            fully_qualified_test += f"::{test_class}"
+        if test_name:
+            fully_qualified_test += f"::{test_name}"
         flags_args = []
         if flags is not None:
             for flag, value in flags.items():
@@ -686,7 +689,8 @@ def subprocess_test_runner(
             sys.executable,
             *flags_args,
             "-m",
-            "numba.runtests",
+            "pytest",
+            "--pyargs",
             fully_qualified_test,
         ]
         env_copy = os.environ.copy()
@@ -715,7 +719,10 @@ def subprocess_test_runner(
         if no_tests_ran in status.stderr:
             self.skipTest(no_tests_ran)
         else:
-            self.assertIn("OK", status.stderr)
+            # status.stderr for successful runs comprise a string like
+            # "...Ran 1 test in 0.565s\n\nOK\n". Migrating to pytest means the
+            # error stream in successful runs are empty
+            self.assertEqual("", status.stderr)
         return status
 
     def run_test_in_subprocess(maybefunc=None, timeout=60, envvars=None):

From 3df3170e492d1b996bba5633d223b8121453318d Mon Sep 17 00:00:00 2001
From: Andres Guzman-Ballen <acosmicflamingo@gmail.com>
Date: Sat, 9 May 2026 23:56:47 -0500
Subject: [PATCH 02/10] Move subprocess functions outside TestCase class

Given that many of the test modules are TestCase subclasses,
migrating methods to functions so they can be imported elsewhere
seems like a good path forward to the pytest migration endeavor.
---
 .../numba/cuda/tests/cudapy/test_extending.py |   3 +-
 numba_cuda/numba/cuda/tests/support.py        | 228 +++++++++---------
 2 files changed, 117 insertions(+), 114 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_extending.py b/numba_cuda/numba/cuda/tests/cudapy/test_extending.py
index 70e2298bd..2ed459bb2 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_extending.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_extending.py
@@ -12,6 +12,7 @@
 from numba.cuda.testing import skip_on_standalone_numba_cuda
 from numba.cuda import types
 from numba.cuda import config
+from numba.cuda.tests.support import run_test_in_subprocess
 
 if config.ENABLE_CUDASIM:
     raise unittest.SkipTest("Simulator does not support extending types")
@@ -408,7 +409,7 @@ def test_func1(self):
             cfunc[1, 1](18.0, res)
         self.assertPreciseEqual(res[0], 6.0)
 
-    @TestCase.run_test_in_subprocess
+    @run_test_in_subprocess
     def test_func1_isolated(self):
         self.test_func1()
 
diff --git a/numba_cuda/numba/cuda/tests/support.py b/numba_cuda/numba/cuda/tests/support.py
index 3024fcef1..93377f998 100644
--- a/numba_cuda/numba/cuda/tests/support.py
+++ b/numba_cuda/numba/cuda/tests/support.py
@@ -217,6 +217,121 @@ def run_in_subprocess(code, flags=(), env=None, timeout=30):
         return proc.stdout, proc.stderr
 
 
+def run_test_in_subprocess(maybefunc=None, timeout=60, envvars=None):
+    """
+    Runs the decorated test in a subprocess via invoking pytest.
+    kwargs timeout and envvars are passed through to
+    subprocess_test_runner.
+    """
+
+    def wrapper(func):
+        def inner(self, *args, **kwargs):
+            if os.environ.get("SUBPROC_TEST", None) != func.__name__:
+                # Not in a subprocess test env, so stage the call to run the
+                # test in a subprocess which will set the env var.
+                class_name = self.__class__.__name__
+                subprocess_test_runner(
+                    test_module=self.__module__,
+                    test_class=class_name,
+                    test_name=func.__name__,
+                    timeout=timeout,
+                    envvars=envvars,
+                    _subproc_test_env=func.__name__,
+                )
+            else:
+                # env var is set, so we're in the subprocess, run the
+                # actual test.
+                func(self)
+
+        return inner
+
+    if isinstance(maybefunc, pytypes.FunctionType):
+        return wrapper(maybefunc)
+    else:
+        return wrapper
+
+
+def subprocess_test_runner(
+    test_module,
+    test_class=None,
+    test_name=None,
+    envvars=None,
+    timeout=60,
+    flags=None,
+    _subproc_test_env="1",
+):
+    """
+    Runs named unit test(s) as specified in the arguments as:
+    test_module.test_class.test_name. test_module must always be supplied
+    and if no further refinement is made with test_class and test_name then
+    all tests in the module will be run. The tests will be run in a
+    subprocess with environment variables specified in `envvars`.
+    If given, envvars must be a map of form:
+        environment variable name (str) -> value (str)
+    If given, flags must be a map of form:
+        flag including the `-` (str) -> value (str)
+    It is most convenient to use this method in conjunction with
+    @needs_subprocess as the decorator will cause the decorated test to be
+    skipped unless the `SUBPROC_TEST` environment variable is set to
+    the same value of ``_subproc_test_env``
+    (this special environment variable is set by this method such that the
+    specified test(s) will not be skipped in the subprocess).
+
+
+    Following execution in the subprocess this method will check the test(s)
+    executed without error. The timeout kwarg can be used to allow more time
+    for longer running tests, it defaults to 60 seconds.
+    """
+    fully_qualified_test = test_module
+    if test_class:
+        fully_qualified_test += f"::{test_class}"
+    if test_name:
+        fully_qualified_test += f"::{test_name}"
+    flags_args = []
+    if flags is not None:
+        for flag, value in flags.items():
+            flags_args.append(f"{flag}")
+            flags_args.append(f"{value}")
+    cmd = [
+        sys.executable,
+        *flags_args,
+        "-m",
+        "pytest",
+        "--pyargs",
+        fully_qualified_test,
+    ]
+    env_copy = os.environ.copy()
+    env_copy["SUBPROC_TEST"] = _subproc_test_env
+    try:
+        env_copy["COVERAGE_PROCESS_START"] = os.environ["COVERAGE_RCFILE"]
+    except KeyError:
+        pass  # ignored
+    envvars = pytypes.MappingProxyType({} if envvars is None else envvars)
+    env_copy.update(envvars)
+    status = subprocess.run(
+        cmd,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        timeout=timeout,
+        env=env_copy,
+        universal_newlines=True,
+    )
+    streams = (
+        f"\ncaptured stdout: {status.stdout}\ncaptured stderr: {status.stderr}"
+    )
+    assert status.returncode == 0, streams
+    # Python 3.12.1 report
+    no_tests_ran = "NO TESTS RAN"
+    if no_tests_ran in status.stderr:
+        pytest.skip(no_tests_ran)
+    else:
+        # status.stderr for successful runs comprise a string like
+        # "...Ran 1 test in 0.565s\n\nOK\n". Migrating to pytest means the
+        # error stream in successful runs are empty
+        assert status.stderr == ""
+    return status
+
+
 def captured_stdout():
     """Capture the output of sys.stdout:
 
@@ -643,119 +758,6 @@ def _assertNumberEqual(first, second, delta=None):
         else:
             _assertNumberEqual(first, second, delta)
 
-    def subprocess_test_runner(
-        self,
-        test_module,
-        test_class=None,
-        test_name=None,
-        envvars=None,
-        timeout=60,
-        flags=None,
-        _subproc_test_env="1",
-    ):
-        """
-        Runs named unit test(s) as specified in the arguments as:
-        test_module.test_class.test_name. test_module must always be supplied
-        and if no further refinement is made with test_class and test_name then
-        all tests in the module will be run. The tests will be run in a
-        subprocess with environment variables specified in `envvars`.
-        If given, envvars must be a map of form:
-            environment variable name (str) -> value (str)
-        If given, flags must be a map of form:
-            flag including the `-` (str) -> value (str)
-        It is most convenient to use this method in conjunction with
-        @needs_subprocess as the decorator will cause the decorated test to be
-        skipped unless the `SUBPROC_TEST` environment variable is set to
-        the same value of ``_subproc_test_env``
-        (this special environment variable is set by this method such that the
-        specified test(s) will not be skipped in the subprocess).
-
-
-        Following execution in the subprocess this method will check the test(s)
-        executed without error. The timeout kwarg can be used to allow more time
-        for longer running tests, it defaults to 60 seconds.
-        """
-        fully_qualified_test = test_module
-        if test_class:
-            fully_qualified_test += f"::{test_class}"
-        if test_name:
-            fully_qualified_test += f"::{test_name}"
-        flags_args = []
-        if flags is not None:
-            for flag, value in flags.items():
-                flags_args.append(f"{flag}")
-                flags_args.append(f"{value}")
-        cmd = [
-            sys.executable,
-            *flags_args,
-            "-m",
-            "pytest",
-            "--pyargs",
-            fully_qualified_test,
-        ]
-        env_copy = os.environ.copy()
-        env_copy["SUBPROC_TEST"] = _subproc_test_env
-        try:
-            env_copy["COVERAGE_PROCESS_START"] = os.environ["COVERAGE_RCFILE"]
-        except KeyError:
-            pass  # ignored
-        envvars = pytypes.MappingProxyType({} if envvars is None else envvars)
-        env_copy.update(envvars)
-        status = subprocess.run(
-            cmd,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            timeout=timeout,
-            env=env_copy,
-            universal_newlines=True,
-        )
-        streams = (
-            f"\ncaptured stdout: {status.stdout}\n"
-            f"captured stderr: {status.stderr}"
-        )
-        self.assertEqual(status.returncode, 0, streams)
-        # Python 3.12.1 report
-        no_tests_ran = "NO TESTS RAN"
-        if no_tests_ran in status.stderr:
-            self.skipTest(no_tests_ran)
-        else:
-            # status.stderr for successful runs comprise a string like
-            # "...Ran 1 test in 0.565s\n\nOK\n". Migrating to pytest means the
-            # error stream in successful runs are empty
-            self.assertEqual("", status.stderr)
-        return status
-
-    def run_test_in_subprocess(maybefunc=None, timeout=60, envvars=None):
-        """Runs the decorated test in a subprocess via invoking numba's test
-        runner. kwargs timeout and envvars are passed through to
-        subprocess_test_runner."""
-
-        def wrapper(func):
-            def inner(self, *args, **kwargs):
-                if os.environ.get("SUBPROC_TEST", None) != func.__name__:
-                    # Not in a subprocess test env, so stage the call to run the
-                    # test in a subprocess which will set the env var.
-                    class_name = self.__class__.__name__
-                    self.subprocess_test_runner(
-                        test_module=self.__module__,
-                        test_class=class_name,
-                        test_name=func.__name__,
-                        timeout=timeout,
-                        envvars=envvars,
-                        _subproc_test_env=func.__name__,
-                    )
-                else:
-                    # env var is set, so we're in the subprocess, run the
-                    # actual test.
-                    func(self)
-
-            return inner
-
-        if isinstance(maybefunc, pytypes.FunctionType):
-            return wrapper(maybefunc)
-        else:
-            return wrapper
-
     def make_dummy_type(self):
         """Use to generate a dummy type unique to this test. Returns a python
         Dummy class and a corresponding Numba type DummyType."""

From 7a9d01fe9e2546db7f75b6f9780778fd1ccb5034 Mon Sep 17 00:00:00 2001
From: Andres Guzman-Ballen <acosmicflamingo@gmail.com>
Date: Sun, 10 May 2026 00:05:07 -0500
Subject: [PATCH 03/10] Move arithmetic functions outside TestCase class

Pluck assertPreciseEqual and assertStridesEqual methods out of
TestCase class (along with many internal helper functions) so
they can be used via importing
---
 .../numba/cuda/tests/core/test_serialize.py   |   3 +-
 .../cuda/tests/cudadrv/test_array_attr.py     |  13 +-
 .../numba/cuda/tests/cudapy/cache_usecases.py |  11 +-
 .../tests/cudapy/cache_with_cpu_usecases.py   |   9 +-
 .../numba/cuda/tests/cudapy/test_analysis.py  |   8 +-
 .../numba/cuda/tests/cudapy/test_array.py     |   3 +-
 .../tests/cudapy/test_array_reductions.py     |  33 +-
 .../numba/cuda/tests/cudapy/test_caching.py   |  87 +--
 .../numba/cuda/tests/cudapy/test_complex.py   |   3 +-
 .../numba/cuda/tests/cudapy/test_datetime.py  |  21 +-
 .../numba/cuda/tests/cudapy/test_debuginfo.py |   7 +-
 .../cuda/tests/cudapy/test_dispatcher.py      |  34 +-
 .../numba/cuda/tests/cudapy/test_enums.py     |  11 +-
 .../numba/cuda/tests/cudapy/test_extending.py |   8 +-
 .../cuda/tests/cudapy/test_gufunc_scalar.py   |   3 +-
 .../cuda/tests/cudapy/test_intrinsics.py      |  15 +-
 .../numba/cuda/tests/cudapy/test_recursion.py |   5 +-
 numba_cuda/numba/cuda/tests/support.py        | 640 +++++++++---------
 18 files changed, 460 insertions(+), 454 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/core/test_serialize.py b/numba_cuda/numba/cuda/tests/core/test_serialize.py
index a2798b4a0..aee32ae27 100644
--- a/numba_cuda/numba/cuda/tests/core/test_serialize.py
+++ b/numba_cuda/numba/cuda/tests/core/test_serialize.py
@@ -21,6 +21,7 @@
 from numba.cuda.tests.support import TestCase
 from numba.cuda.cloudpickle import dumps, loads
 from numba.cuda.testing import skip_on_standalone_numba_cuda
+from numba.cuda.tests.support import assertPreciseEqual
 
 
 @skip_on_standalone_numba_cuda
@@ -50,7 +51,7 @@ def check_result(func):
             ):
                 self.assertRaises(expected_result, func, *args)
             else:
-                self.assertPreciseEqual(func(*args), expected_result)
+                assertPreciseEqual(func(*args), expected_result)
 
         # Control
         check_result(func)
diff --git a/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py b/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py
index 9bcb78309..8f17bf5d2 100644
--- a/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py
+++ b/numba_cuda/numba/cuda/tests/cudadrv/test_array_attr.py
@@ -4,6 +4,7 @@
 import numpy as np
 from numba import cuda
 from numba.cuda.testing import unittest, CUDATestCase, skip_on_cudasim
+from numba.cuda.tests.support import assertPreciseEqual
 
 
 class TestArrayAttr(CUDATestCase):
@@ -52,7 +53,7 @@ def test_ravel_1d(self):
             flat = dflat.copy_to_host()
             self.assertTrue(dary is not dflat)  # ravel returns new array
             self.assertEqual(flat.ndim, 1)
-            self.assertPreciseEqual(expect, flat)
+            assertPreciseEqual(expect, flat)
 
     @skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
     def test_ravel_stride_1d(self):
@@ -77,7 +78,7 @@ def test_ravel_c(self):
         flat = dflat.copy_to_host()
         self.assertTrue(dary is not dflat)
         self.assertEqual(flat.ndim, 1)
-        self.assertPreciseEqual(expect, flat)
+        assertPreciseEqual(expect, flat)
 
         # explicit order kwarg
         for order in "CA":
@@ -87,7 +88,7 @@ def test_ravel_c(self):
             flat = dflat.copy_to_host()
             self.assertTrue(dary is not dflat)
             self.assertEqual(flat.ndim, 1)
-            self.assertPreciseEqual(expect, flat)
+            assertPreciseEqual(expect, flat)
 
     @skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
     def test_ravel_stride_c(self):
@@ -112,7 +113,7 @@ def test_ravel_f(self):
             flat = dflat.copy_to_host()
             self.assertTrue(dary is not dflat)
             self.assertEqual(flat.ndim, 1)
-            self.assertPreciseEqual(expect, flat)
+            assertPreciseEqual(expect, flat)
 
     @skip_on_cudasim("CUDA Array Interface is not supported in the simulator")
     def test_ravel_stride_f(self):
@@ -132,7 +133,7 @@ def test_reshape_c(self):
         dary = cuda.to_device(ary)
         dary_reshaped = dary.reshape(2, 5)
         got = dary_reshaped.copy_to_host()
-        self.assertPreciseEqual(expect, got)
+        assertPreciseEqual(expect, got)
 
     def test_reshape_f(self):
         ary = np.arange(10)
@@ -140,7 +141,7 @@ def test_reshape_f(self):
         dary = cuda.to_device(ary)
         dary_reshaped = dary.reshape(2, 5, order="F")
         got = dary_reshaped.copy_to_host()
-        self.assertPreciseEqual(expect, got)
+        assertPreciseEqual(expect, got)
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py b/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py
index 637f6d1c3..96c29b1fc 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/cache_usecases.py
@@ -3,6 +3,7 @@
 
 from numba import cuda
 from numba.cuda.testing import CUDATestCase
+from numba.cuda.tests.support import assertPreciseEqual
 import numpy as np
 import sys
 
@@ -214,14 +215,14 @@ class _TestModule(CUDATestCase):
     """
 
     def check_module(self, mod):
-        self.assertPreciseEqual(mod.add_usecase(2, 3), 6)
-        self.assertPreciseEqual(mod.outer_uncached(3, 2), 2)
-        self.assertPreciseEqual(mod.outer(3, 2), 2)
+        assertPreciseEqual(mod.add_usecase(2, 3), 6)
+        assertPreciseEqual(mod.outer_uncached(3, 2), 2)
+        assertPreciseEqual(mod.outer(3, 2), 2)
 
         packed_rec = mod.record_return_packed(mod.packed_arr, 1)
-        self.assertPreciseEqual(tuple(packed_rec), (2, 43.5))
+        assertPreciseEqual(tuple(packed_rec), (2, 43.5))
         aligned_rec = mod.record_return_aligned(mod.aligned_arr, 1)
-        self.assertPreciseEqual(tuple(aligned_rec), (2, 43.5))
+        assertPreciseEqual(tuple(aligned_rec), (2, 43.5))
 
         mod.simple_usecase_caller(2)
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py b/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py
index ce1d9a041..d3a368def 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/cache_with_cpu_usecases.py
@@ -7,6 +7,7 @@
 from numba.cuda import HAS_NUMBA
 from numba.cuda.testing import CUDATestCase, skip_on_standalone_numba_cuda
 from numba.cuda.tests.cudapy.cache_usecases import CUDAUseCase, UseCase
+from numba.cuda.tests.support import assertPreciseEqual
 
 
 class CPUUseCase(UseCase):
@@ -39,10 +40,10 @@ class _TestModule(CUDATestCase):
     """
 
     def check_module(self, mod):
-        self.assertPreciseEqual(mod.assign_cpu(5), 5)
-        self.assertPreciseEqual(mod.assign_cpu(5.5), 5.5)
-        self.assertPreciseEqual(mod.assign_cuda(5), 5)
-        self.assertPreciseEqual(mod.assign_cuda(5.5), 5.5)
+        assertPreciseEqual(mod.assign_cpu(5), 5)
+        assertPreciseEqual(mod.assign_cpu(5.5), 5.5)
+        assertPreciseEqual(mod.assign_cuda(5), 5)
+        assertPreciseEqual(mod.assign_cuda(5.5), 5.5)
 
 
 def self_test():
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_analysis.py b/numba_cuda/numba/cuda/tests/cudapy/test_analysis.py
index 9ae545d48..909358ac0 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_analysis.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_analysis.py
@@ -17,7 +17,11 @@
 from numba.cuda.core import postproc, rewrites, ir_utils
 from numba.cuda.core.options import ParallelOptions
 from numba.cuda.core.inline_closurecall import InlineClosureCallPass
-from numba.cuda.tests.support import TestCase, override_config
+from numba.cuda.tests.support import (
+    assertPreciseEqual,
+    TestCase,
+    override_config,
+)
 from numba.cuda.core.analysis import (
     dead_branch_prune,
     rewrite_semantic_constants,
@@ -159,7 +163,7 @@ def run_func(self, impl, args):
         cres.py_func(*args)
         with override_config("DISABLE_PERFORMANCE_WARNINGS", 1):
             cres[1, 1](*dargs)
-        self.assertPreciseEqual(out[0], cout[0])
+        assertPreciseEqual(out[0], cout[0])
 
 
 class TestBranchPrune(TestBranchPruneBase):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array.py b/numba_cuda/numba/cuda/tests/cudapy/test_array.py
index be335d8f4..c8f8f6cea 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array.py
@@ -9,6 +9,7 @@
 from numba.cuda import config
 import pytest
 from numba.cuda.np.numpy_support import carray, farray
+from numba.cuda.tests.support import assertPreciseEqual
 
 
 if config.ENABLE_CUDASIM:
@@ -302,7 +303,7 @@ def check(pyfunc, kernelfunc, arr, shape):
             expected = pyfunc(arr, shape)
             got = np.zeros(expected.shape, dtype=arr.dtype)
             kernel[1, 1](arr, shape, got)
-            self.assertPreciseEqual(got, expected)
+            assertPreciseEqual(got, expected)
 
         def check_only_shape(kernelfunc, arr, shape, expected_shape):
             kernel = cuda.jit(kernelfunc)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
index 8038c7bc6..aaa53d17c 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
@@ -7,6 +7,7 @@
 from numba.cuda.testing import skip_on_cudasim, skip_on_nvjitlink_13_1_sm_120
 from numba.cuda.misc.special import literal_unroll
 from numba.cuda import config
+from numba.cuda.tests.support import assertPreciseEqual
 
 
 @skip_on_cudasim("doesn't work in the simulator")
@@ -48,7 +49,7 @@ def kernel(out):
         out = cuda.to_device(np.zeros(len(cases), dtype=np.bool_))
         kernel[1, 1](out)
         got = out.copy_to_host()
-        self.assertPreciseEqual(expected, got)
+        assertPreciseEqual(expected, got)
 
     def test_any_basic(self):
         cases = (
@@ -70,7 +71,7 @@ def kernel(out):
         expected = np.array([np.any(a) for a in cases], dtype=np.bool_)
         out = cuda.to_device(np.zeros(len(cases), dtype=np.bool_))
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        assertPreciseEqual(expected, out.copy_to_host())
 
     @skip_on_nvjitlink_13_1_sm_120(
         "sum fails at link time on sm_120 + CUDA 13.1"
@@ -100,7 +101,7 @@ def kernel(out):
         expected = np.array([np.sum(a) for a in arrays], dtype=np.float64)
         out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        assertPreciseEqual(expected, out.copy_to_host())
 
     @skip_on_nvjitlink_13_1_sm_120(
         "mean fails at link time on sm_120 + CUDA 13.1"
@@ -130,7 +131,7 @@ def kernel(out):
         expected = np.array([np.mean(a) for a in arrays], dtype=np.float64)
         out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        assertPreciseEqual(expected, out.copy_to_host())
 
     def test_var_basic(self):
         arrays = (
@@ -157,7 +158,7 @@ def kernel(out):
         expected = np.array([np.var(a) for a in arrays], dtype=np.float64)
         out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host(), prec="double")
+        assertPreciseEqual(expected, out.copy_to_host(), prec="double")
 
     def test_std_basic(self):
         arrays = (
@@ -184,7 +185,7 @@ def kernel(out):
         expected = np.array([np.std(a) for a in arrays], dtype=np.float64)
         out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        assertPreciseEqual(expected, out.copy_to_host())
 
     def test_min_basic(self):
         arrays = (
@@ -211,7 +212,7 @@ def kernel(out):
         expected = np.array([np.min(a) for a in arrays], dtype=np.float64)
         out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        assertPreciseEqual(expected, out.copy_to_host())
 
     def test_max_basic(self):
         arrays = (
@@ -238,7 +239,7 @@ def kernel(out):
         expected = np.array([np.max(a) for a in arrays], dtype=np.float64)
         out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        assertPreciseEqual(expected, out.copy_to_host())
 
     def test_nanmin_basic(self):
         arrays = (
@@ -263,7 +264,7 @@ def kernel(out):
         expected = np.array([np.nanmin(a) for a in arrays], dtype=np.float64)
         out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        assertPreciseEqual(expected, out.copy_to_host())
 
     def test_nanmax_basic(self):
         arrays = (
@@ -288,7 +289,7 @@ def kernel(out):
         expected = np.array([np.nanmax(a) for a in arrays], dtype=np.float64)
         out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        assertPreciseEqual(expected, out.copy_to_host())
 
     @skip_on_nvjitlink_13_1_sm_120(
         "nanmean fails at link time on sm_120 + CUDA 13.1"
@@ -316,7 +317,7 @@ def kernel(out):
         expected = np.array([np.nanmean(a) for a in arrays], dtype=np.float64)
         out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        assertPreciseEqual(expected, out.copy_to_host())
 
     @skip_on_nvjitlink_13_1_sm_120(
         "nansum fails at link time on sm_120 + CUDA 13.1"
@@ -344,7 +345,7 @@ def kernel(out):
         expected = np.array([np.nansum(a) for a in arrays], dtype=np.float64)
         out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        assertPreciseEqual(expected, out.copy_to_host())
 
     @skip_on_nvjitlink_13_1_sm_120(
         "nanprod fails at link time on sm_120 + CUDA 13.1"
@@ -372,7 +373,7 @@ def kernel(out):
         expected = np.array([np.nanprod(a) for a in arrays], dtype=np.float64)
         out = cuda.to_device(np.zeros(len(arrays), dtype=np.float64))
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        assertPreciseEqual(expected, out.copy_to_host())
 
     def test_count_nonzero_basic(self):
         cases = (
@@ -396,7 +397,7 @@ def kernel(out):
         expected = np.array([np.count_nonzero(a) for a in cases], dtype=np.intp)
         out = cuda.to_device(np.zeros(len(cases), dtype=np.intp))
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        assertPreciseEqual(expected, out.copy_to_host())
 
     def test_count_nonzero_bool(self):
         cases = (
@@ -415,7 +416,7 @@ def kernel(out):
         expected = np.array([np.count_nonzero(a) for a in cases], dtype=np.intp)
         out = cuda.to_device(np.zeros(len(cases), dtype=np.intp))
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        assertPreciseEqual(expected, out.copy_to_host())
 
     def test_count_nonzero_2d(self):
         cases = (
@@ -434,4 +435,4 @@ def kernel(out):
         expected = np.array([np.count_nonzero(a) for a in cases], dtype=np.intp)
         out = cuda.to_device(np.zeros(len(cases), dtype=np.intp))
         kernel[1, 1](out)
-        self.assertPreciseEqual(expected, out.copy_to_host())
+        assertPreciseEqual(expected, out.copy_to_host())
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_caching.py b/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
index 4db395ccd..2da869a4d 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
@@ -22,6 +22,7 @@
     skip_on_standalone_numba_cuda,
 )
 from numba.cuda.tests.support import (
+    assertPreciseEqual,
     TestCase,
     temp_directory,
     import_dynamic,
@@ -220,19 +221,19 @@ def test_caching(self):
         self.check_pycache(0)
 
         f = mod.add_usecase
-        self.assertPreciseEqual(f(2, 3), 6)
+        assertPreciseEqual(f(2, 3), 6)
         self.check_pycache(2)  # 1 index, 1 data
-        self.assertPreciseEqual(f(2.5, 3), 6.5)
+        assertPreciseEqual(f(2.5, 3), 6.5)
         self.check_pycache(3)  # 1 index, 2 data
         self.check_hits(f.func, 0, 2)
 
         f = mod.record_return_aligned
         rec = f(mod.aligned_arr, 1)
-        self.assertPreciseEqual(tuple(rec), (2, 43.5))
+        assertPreciseEqual(tuple(rec), (2, 43.5))
 
         f = mod.record_return_packed
         rec = f(mod.packed_arr, 1)
-        self.assertPreciseEqual(tuple(rec), (2, 43.5))
+        assertPreciseEqual(tuple(rec), (2, 43.5))
         self.check_pycache(6)  # 2 index, 4 data
         self.check_hits(f.func, 0, 2)
 
@@ -243,7 +244,7 @@ def test_no_caching(self):
         mod = self.import_module()
 
         f = mod.add_nocache_usecase
-        self.assertPreciseEqual(f(2, 3), 6)
+        assertPreciseEqual(f(2, 3), 6)
         self.check_pycache(0)
 
     def test_many_locals(self):
@@ -265,13 +266,13 @@ def test_closure(self):
             warnings.simplefilter("error", NumbaWarning)
 
             f = mod.closure1
-            self.assertPreciseEqual(f(3), 6)  # 3 + 3 = 6
+            assertPreciseEqual(f(3), 6)  # 3 + 3 = 6
             f = mod.closure2
-            self.assertPreciseEqual(f(3), 8)  # 3 + 5 = 8
+            assertPreciseEqual(f(3), 8)  # 3 + 5 = 8
             f = mod.closure3
-            self.assertPreciseEqual(f(3), 10)  # 3 + 7 = 10
+            assertPreciseEqual(f(3), 10)  # 3 + 7 = 10
             f = mod.closure4
-            self.assertPreciseEqual(f(3), 12)  # 3 + 9 = 12
+            assertPreciseEqual(f(3), 12)  # 3 + 9 = 12
             self.check_pycache(5)  # 1 nbi, 4 nbc
 
     def test_cache_reuse(self):
@@ -304,7 +305,7 @@ def test_cache_reuse(self):
     def test_cache_invalidate(self):
         mod = self.import_module()
         f = mod.add_usecase
-        self.assertPreciseEqual(f(2, 3), 6)
+        assertPreciseEqual(f(2, 3), 6)
 
         # This should change the functions' results
         with open(self.modfile, "a") as f:
@@ -312,33 +313,33 @@ def test_cache_invalidate(self):
 
         mod = self.import_module()
         f = mod.add_usecase
-        self.assertPreciseEqual(f(2, 3), 15)
+        assertPreciseEqual(f(2, 3), 15)
 
     def test_recompile(self):
         # Explicit call to recompile() should overwrite the cache
         mod = self.import_module()
         f = mod.add_usecase
-        self.assertPreciseEqual(f(2, 3), 6)
+        assertPreciseEqual(f(2, 3), 6)
 
         mod = self.import_module()
         f = mod.add_usecase
         mod.Z = 10
-        self.assertPreciseEqual(f(2, 3), 6)
+        assertPreciseEqual(f(2, 3), 6)
         f.func.recompile()
-        self.assertPreciseEqual(f(2, 3), 15)
+        assertPreciseEqual(f(2, 3), 15)
 
         # Freshly recompiled version is re-used from other imports
         mod = self.import_module()
         f = mod.add_usecase
-        self.assertPreciseEqual(f(2, 3), 15)
+        assertPreciseEqual(f(2, 3), 15)
 
     def test_same_names(self):
         # Function with the same names should still disambiguate
         mod = self.import_module()
         f = mod.renamed_function1
-        self.assertPreciseEqual(f(2), 4)
+        assertPreciseEqual(f(2), 4)
         f = mod.renamed_function2
-        self.assertPreciseEqual(f(2), 8)
+        assertPreciseEqual(f(2), 8)
 
     def _test_pycache_fallback(self):
         """
@@ -353,14 +354,14 @@ def _test_pycache_fallback(self):
             shutil.rmtree, f.func.stats.cache_path, ignore_errors=True
         )
 
-        self.assertPreciseEqual(f(2, 3), 6)
+        assertPreciseEqual(f(2, 3), 6)
         # It's a cache miss since the file was copied to a new temp location
         self.check_hits(f.func, 0, 1)
 
         # Test re-use
         mod2 = self.import_module()
         f = mod2.add_usecase
-        self.assertPreciseEqual(f(2, 3), 6)
+        assertPreciseEqual(f(2, 3), 6)
         self.check_hits(f.func, 1, 0)
 
         # The __pycache__ is empty (otherwise the test's preconditions
@@ -675,17 +676,17 @@ def test_cpu_and_cuda_targets(self):
 
         f_cpu = mod.assign_cpu
         f_cuda = mod.assign_cuda
-        self.assertPreciseEqual(f_cpu(5), 5)
+        assertPreciseEqual(f_cpu(5), 5)
         self.check_pycache(2)  # 1 index, 1 data
-        self.assertPreciseEqual(f_cuda(5), 5)
+        assertPreciseEqual(f_cuda(5), 5)
         self.check_pycache(3)  # 1 index, 2 data
 
         self.check_hits(f_cpu.func, 0, 1)
         self.check_hits(f_cuda.func, 0, 1)
 
-        self.assertPreciseEqual(f_cpu(5.5), 5.5)
+        assertPreciseEqual(f_cpu(5.5), 5.5)
         self.check_pycache(4)  # 1 index, 3 data
-        self.assertPreciseEqual(f_cuda(5.5), 5.5)
+        assertPreciseEqual(f_cuda(5.5), 5.5)
         self.check_pycache(5)  # 1 index, 4 data
 
         self.check_hits(f_cpu.func, 0, 2)
@@ -770,19 +771,19 @@ def test_cache(self):
         # Step 1. Populate the cache with the first GPU
         with gpus[0]:
             f = mod.add_usecase
-            self.assertPreciseEqual(f(2, 3), 6)
+            assertPreciseEqual(f(2, 3), 6)
             self.check_pycache(2)  # 1 index, 1 data
-            self.assertPreciseEqual(f(2.5, 3), 6.5)
+            assertPreciseEqual(f(2.5, 3), 6.5)
             self.check_pycache(3)  # 1 index, 2 data
             self.check_hits(f.func, 0, 2)
 
             f = mod.record_return_aligned
             rec = f(mod.aligned_arr, 1)
-            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+            assertPreciseEqual(tuple(rec), (2, 43.5))
 
             f = mod.record_return_packed
             rec = f(mod.packed_arr, 1)
-            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+            assertPreciseEqual(tuple(rec), (2, 43.5))
             self.check_pycache(6)  # 2 index, 4 data
             self.check_hits(f.func, 0, 2)
 
@@ -790,19 +791,19 @@ def test_cache(self):
         # doesn't further populate the cache.
         with gpus[1]:
             f = mod.add_usecase
-            self.assertPreciseEqual(f(2, 3), 6)
+            assertPreciseEqual(f(2, 3), 6)
             self.check_pycache(6)  # cache unchanged
-            self.assertPreciseEqual(f(2.5, 3), 6.5)
+            assertPreciseEqual(f(2.5, 3), 6.5)
             self.check_pycache(6)  # cache unchanged
             self.check_hits(f.func, 0, 2)
 
             f = mod.record_return_aligned
             rec = f(mod.aligned_arr, 1)
-            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+            assertPreciseEqual(tuple(rec), (2, 43.5))
 
             f = mod.record_return_packed
             rec = f(mod.packed_arr, 1)
-            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+            assertPreciseEqual(tuple(rec), (2, 43.5))
             self.check_pycache(6)  # cache unchanged
             self.check_hits(f.func, 0, 2)
 
@@ -813,19 +814,19 @@ def test_cache(self):
 
         with gpus[1]:
             f = mod2.add_usecase
-            self.assertPreciseEqual(f(2, 3), 6)
+            assertPreciseEqual(f(2, 3), 6)
             self.check_pycache(7)  # 2 index, 5 data
-            self.assertPreciseEqual(f(2.5, 3), 6.5)
+            assertPreciseEqual(f(2.5, 3), 6.5)
             self.check_pycache(8)  # 2 index, 6 data
             self.check_hits(f.func, 0, 2)
 
             f = mod2.record_return_aligned
             rec = f(mod.aligned_arr, 1)
-            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+            assertPreciseEqual(tuple(rec), (2, 43.5))
 
             f = mod2.record_return_packed
             rec = f(mod.packed_arr, 1)
-            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+            assertPreciseEqual(tuple(rec), (2, 43.5))
             self.check_pycache(10)  # 2 index, 8 data
             self.check_hits(f.func, 0, 2)
 
@@ -842,31 +843,31 @@ def test_cache(self):
         # during Step 3.
         with gpus[1]:
             f = mod3.add_usecase
-            self.assertPreciseEqual(f(2, 3), 6)
-            self.assertPreciseEqual(f(2.5, 3), 6.5)
+            assertPreciseEqual(f(2, 3), 6)
+            assertPreciseEqual(f(2.5, 3), 6.5)
 
             f = mod3.record_return_aligned
             rec = f(mod.aligned_arr, 1)
-            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+            assertPreciseEqual(tuple(rec), (2, 43.5))
 
             f = mod3.record_return_packed
             rec = f(mod.packed_arr, 1)
-            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+            assertPreciseEqual(tuple(rec), (2, 43.5))
 
         # Step 5. Run with GPU 0 using the module from Step 4, to force PTX
         # generation from cached NVVM IR.
         with gpus[0]:
             f = mod3.add_usecase
-            self.assertPreciseEqual(f(2, 3), 6)
-            self.assertPreciseEqual(f(2.5, 3), 6.5)
+            assertPreciseEqual(f(2, 3), 6)
+            assertPreciseEqual(f(2.5, 3), 6.5)
 
             f = mod3.record_return_aligned
             rec = f(mod.aligned_arr, 1)
-            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+            assertPreciseEqual(tuple(rec), (2, 43.5))
 
             f = mod3.record_return_packed
             rec = f(mod.packed_arr, 1)
-            self.assertPreciseEqual(tuple(rec), (2, 43.5))
+            assertPreciseEqual(tuple(rec), (2, 43.5))
 
 
 def child_initializer():
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_complex.py b/numba_cuda/numba/cuda/tests/cudapy/test_complex.py
index 8348ca96e..9ec3596b0 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_complex.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_complex.py
@@ -41,6 +41,7 @@
     tanh_usecase,
 )
 from numba.cuda.np import numpy_support
+from numba.cuda.tests.support import assertPreciseEqual
 
 
 def compile_scalar_func(pyfunc, argtypes, restype):
@@ -155,7 +156,7 @@ def run_func(self, pyfunc, sigs, values, ulps=1, ignore_sign_on_zero=False):
             got_list = cudafunc(ok_values)
             for got, expected, args in zip(got_list, expected_list, ok_values):
                 msg = "for input %r with prec %r" % (args, prec)
-                self.assertPreciseEqual(
+                assertPreciseEqual(
                     got,
                     expected,
                     prec=prec,
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py b/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
index bcdbcb70a..0431f6475 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_datetime.py
@@ -1,14 +1,15 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: BSD-2-Clause
 
+import unittest
+
 import numpy as np
+import pytest
 
 from numba import cuda, vectorize, guvectorize
 from numba.cuda.np.numpy_support import from_dtype
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
-import unittest
-
-import pytest
+from numba.cuda.tests.support import assertPreciseEqual
 
 
 class TestCudaDateTime(CUDATestCase):
@@ -24,7 +25,7 @@ def foo(start, end, delta):
 
         foo[1, 32](arr1, arr2, delta)
 
-        self.assertPreciseEqual(delta, arr2 - arr1)
+        assertPreciseEqual(delta, arr2 - arr1)
 
     def test_scalar_datetime_kernel(self):
         @cuda.jit
@@ -43,7 +44,7 @@ def foo(dates, target, delta, matches, outdelta):
         where = matches.nonzero()
 
         self.assertEqual(list(where), [5])
-        self.assertPreciseEqual(outdelta, arr1 - delta)
+        assertPreciseEqual(outdelta, arr1 - delta)
 
     @skip_on_cudasim("ufunc API unsupported in the simulator")
     def test_ufunc(self):
@@ -58,7 +59,7 @@ def timediff(start, end):
 
         delta = timediff(arr1, arr2)
 
-        self.assertPreciseEqual(delta, arr2 - arr1)
+        assertPreciseEqual(delta, arr2 - arr1)
 
     @skip_on_cudasim("API unsupported in the simulator")
     def test_datetime_cupy_inputs(self):
@@ -82,7 +83,7 @@ def assign(out, arr):
         out = cp.empty(arr.size, dtype="float64").view("datetime64[D]")
         assign[1, 1](out, arr)
 
-        self.assertPreciseEqual(arr.get(), out.get())
+        assertPreciseEqual(arr.get(), out.get())
 
     @skip_on_cudasim("ufunc API unsupported in the simulator")
     def test_gufunc(self):
@@ -102,14 +103,14 @@ def timediff(start, end, out):
 
         delta = timediff(arr1, arr2)
 
-        self.assertPreciseEqual(delta, arr2 - arr1)
+        assertPreciseEqual(delta, arr2 - arr1)
 
     @skip_on_cudasim("no .copy_to_host() in the simulator")
     def test_datetime_view_as_int64(self):
         arr = np.arange("2005-02", "2006-02", dtype="datetime64[D]")
         darr = cuda.to_device(arr)
         viewed = darr.view(np.int64)
-        self.assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
+        assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
         self.assertEqual(viewed.gpu_data, darr.gpu_data)
 
     @skip_on_cudasim("no .copy_to_host() in the simulator")
@@ -119,7 +120,7 @@ def test_timedelta_view_as_int64(self):
         self.assertEqual(arr.dtype, np.dtype("timedelta64[D]"))
         darr = cuda.to_device(arr)
         viewed = darr.view(np.int64)
-        self.assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
+        assertPreciseEqual(arr.view(np.int64), viewed.copy_to_host())
         self.assertEqual(viewed.gpu_data, darr.gpu_data)
 
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
index 39fb17c7a..f2e381628 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_debuginfo.py
@@ -16,7 +16,10 @@
 import unittest
 import warnings
 from numba.cuda.core.errors import NumbaDebugInfoWarning
-from numba.cuda.tests.support import ignore_internal_warnings
+from numba.cuda.tests.support import (
+    assertPreciseEqual,
+    ignore_internal_warnings,
+)
 import numpy as np
 import inspect
 
@@ -806,7 +809,7 @@ def foo(x, y):
             result.copy_to_host()
 
         result_host = math.sin(np.pi) + math.cos(np.pi)
-        self.assertPreciseEqual(result[0], result_host)
+        assertPreciseEqual(result[0], result_host)
 
         ir_content = foo.inspect_llvm()[foo.signatures[0]]
         self.assertFileCheckMatches(ir_content, foo.__doc__)
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
index 06efe7f4d..51d9ccdd5 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_dispatcher.py
@@ -1,6 +1,8 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: BSD-2-Clause
 
+import math
+
 from cuda.core._utils.cuda_utils import CUDAError
 import numpy as np
 import threading
@@ -24,7 +26,7 @@
     unittest,
     CUDATestCase,
 )
-import math
+from numba.cuda.tests.support import assertPreciseEqual
 
 
 def add(x, y):
@@ -270,7 +272,7 @@ def test_coerce_input_types(self):
         r = np.zeros(1, dtype=np.int32)
 
         c_add[1, 1](r, 123, 456)
-        self.assertPreciseEqual(r[0], add(123, 456))
+        assertPreciseEqual(r[0], add(123, 456))
 
     @skip_on_cudasim("Simulator ignores signature")
     @unittest.expectedFailure
@@ -287,7 +289,7 @@ def test_coerce_input_types_unsafe(self):
         r = np.zeros(1, dtype=np.int32)
 
         c_add[1, 1](r, 12.3, 45.6)
-        self.assertPreciseEqual(r[0], add(12, 45))
+        assertPreciseEqual(r[0], add(12, 45))
 
     @skip_on_cudasim("Simulator ignores signature")
     def test_coerce_input_types_unsafe_complex(self):
@@ -360,11 +362,11 @@ def _test_explicit_signatures(self, sigs):
         # Exact signature matches
         r = np.zeros(1, dtype=np.int64)
         f[1, 1](r, 1, 2)
-        self.assertPreciseEqual(r[0], 3)
+        assertPreciseEqual(r[0], 3)
 
         r = np.zeros(1, dtype=np.float64)
         f[1, 1](r, 1.5, 2.5)
-        self.assertPreciseEqual(r[0], 4.0)
+        assertPreciseEqual(r[0], 4.0)
 
         if config.ENABLE_CUDASIM:
             # Pass - we can't check for no conversion on the simulator.
@@ -433,11 +435,11 @@ def test_explicit_signatures_same_type_class(self):
 
         r = np.zeros(1, dtype=np.float64)
         f[1, 1](r, np.float32(1), np.float32(2**-25))
-        self.assertPreciseEqual(r[0], 1.0)
+        assertPreciseEqual(r[0], 1.0)
 
         r = np.zeros(1, dtype=np.float64)
         f[1, 1](r, 1, 2**-25)
-        self.assertPreciseEqual(r[0], 1.0000000298023224)
+        assertPreciseEqual(r[0], 1.0000000298023224)
 
     @skip_on_cudasim("No overload resolution in the simulator")
     def test_explicit_signatures_ambiguous_resolution(self):
@@ -480,7 +482,7 @@ def test_explicit_signatures_unsafe(self):
 
         # Approximate match (unsafe conversion)
         f[1, 1](r, 1.5, 2.5)
-        self.assertPreciseEqual(r[0], 3)
+        assertPreciseEqual(r[0], 3)
         self.assertEqual(len(f.overloads), 1, f.overloads)
 
         sigs = [
@@ -491,7 +493,7 @@ def test_explicit_signatures_unsafe(self):
         r = np.zeros(1, dtype=np.float64)
         # Approximate match (int32 -> float64 is a safe conversion)
         f[1, 1](r, np.int32(1), 2.5)
-        self.assertPreciseEqual(r[0], 3.5)
+        assertPreciseEqual(r[0], 3.5)
 
     def add_device_usecase(self, sigs):
         # Generate a kernel that calls the add device function compiled with a
@@ -513,11 +515,11 @@ def test_explicit_signatures_device(self):
         # Exact signature matches
         r = np.zeros(1, dtype=np.int64)
         f[1, 1](r, 1, 2)
-        self.assertPreciseEqual(r[0], 3)
+        assertPreciseEqual(r[0], 3)
 
         r = np.zeros(1, dtype=np.float64)
         f[1, 1](r, 1.5, 2.5)
-        self.assertPreciseEqual(r[0], 4.0)
+        assertPreciseEqual(r[0], 4.0)
 
         if config.ENABLE_CUDASIM:
             # Pass - we can't check for no conversion on the simulator.
@@ -544,11 +546,11 @@ def test_explicit_signatures_device_same_type_class(self):
 
         r = np.zeros(1, dtype=np.float64)
         f[1, 1](r, np.float32(1), np.float32(2**-25))
-        self.assertPreciseEqual(r[0], 1.0)
+        assertPreciseEqual(r[0], 1.0)
 
         r = np.zeros(1, dtype=np.float64)
         f[1, 1](r, 1, 2**-25)
-        self.assertPreciseEqual(r[0], 1.0000000298023224)
+        assertPreciseEqual(r[0], 1.0000000298023224)
 
     def test_explicit_signatures_device_ambiguous(self):
         # Ambiguity between the two best overloads resolves. This is somewhat
@@ -561,7 +563,7 @@ def test_explicit_signatures_device_ambiguous(self):
 
         r = np.zeros(1, dtype=np.float64)
         f[1, 1](r, 1.5, 2.5)
-        self.assertPreciseEqual(r[0], 4.0)
+        assertPreciseEqual(r[0], 4.0)
 
     @skip_on_cudasim("CUDA Simulator does not force casting")
     def test_explicit_signatures_device_unsafe(self):
@@ -575,7 +577,7 @@ def test_explicit_signatures_device_unsafe(self):
         # Approximate match (unsafe conversion)
         r = np.zeros(1, dtype=np.int64)
         f[1, 1](r, 1.5, 2.5)
-        self.assertPreciseEqual(r[0], 3)
+        assertPreciseEqual(r[0], 3)
         self.assertEqual(len(f.overloads), 1, f.overloads)
 
         sigs = ["(int64, int64)", "(float64, float64)"]
@@ -584,7 +586,7 @@ def test_explicit_signatures_device_unsafe(self):
         # Approximate match (int32 -> float64 is a safe conversion)
         r = np.zeros(1, dtype=np.float64)
         f[1, 1](r, np.int32(1), 2.5)
-        self.assertPreciseEqual(r[0], 3.5)
+        assertPreciseEqual(r[0], 3.5)
 
     def test_dispatcher_docstring(self):
         # Ensure that CUDA-jitting a function preserves its docstring. See
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_enums.py b/numba_cuda/numba/cuda/tests/cudapy/test_enums.py
index 8077536e2..e1553f07e 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_enums.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_enums.py
@@ -27,6 +27,7 @@
     RequestError,
     IntEnumWithNegatives,
 )
+from numba.cuda.tests.support import assertPreciseEqual
 
 
 class EnumTest(CUDATestCase):
@@ -51,7 +52,7 @@ def f(a, b, out):
             expected = got.copy()
             cuda_f[1, 1](a, b, got)
             f(a, b, expected)
-            self.assertPreciseEqual(expected, got)
+            assertPreciseEqual(expected, got)
 
     def test_getattr_getitem(self):
         def f(out):
@@ -64,7 +65,7 @@ def f(out):
         expected = got.copy()
         cuda_f[1, 1](got)
         f(expected)
-        self.assertPreciseEqual(expected, got)
+        assertPreciseEqual(expected, got)
 
     @skip_on_standalone_numba_cuda
     def test_return_from_device_func(self):
@@ -81,7 +82,7 @@ def f(pred, out):
         expected = got.copy()
         f(True, expected)
         cuda_f[1, 1](True, got)
-        self.assertPreciseEqual(expected, got)
+        assertPreciseEqual(expected, got)
 
     def test_int_coerce(self):
         def f(x, out):
@@ -97,7 +98,7 @@ def f(x, out):
             expected = got.copy()
             cuda_f[1, 1](x, got)
             f(x, expected)
-            self.assertPreciseEqual(expected, got)
+            assertPreciseEqual(expected, got)
 
     def test_int_cast(self):
         def f(x, out):
@@ -127,7 +128,7 @@ def f(x):
         arr = np.array([2, 404, 500, 404], dtype=np.int64)
         expected = np.array([f(x) for x in arr], dtype=np.int64)
         got = cuda_func(arr)
-        self.assertPreciseEqual(expected, got)
+        assertPreciseEqual(expected, got)
 
     @skip_on_cudasim("No typing context in CUDA simulator")
     def test_int_enum_no_conversion(self):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_extending.py b/numba_cuda/numba/cuda/tests/cudapy/test_extending.py
index 2ed459bb2..38c25921c 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_extending.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_extending.py
@@ -12,7 +12,7 @@
 from numba.cuda.testing import skip_on_standalone_numba_cuda
 from numba.cuda import types
 from numba.cuda import config
-from numba.cuda.tests.support import run_test_in_subprocess
+from numba.cuda.tests.support import assertPreciseEqual, run_test_in_subprocess
 
 if config.ENABLE_CUDASIM:
     raise unittest.SkipTest("Simulator does not support extending types")
@@ -400,14 +400,14 @@ def test_func1(self):
         res = np.zeros(1)
         with override_config("DISABLE_PERFORMANCE_WARNINGS", 1):
             cfunc[1, 1](res)
-        self.assertPreciseEqual(res[0], 42.0)
+        assertPreciseEqual(res[0], 42.0)
         pyfunc = call_func1_unary
         with override_config("DISABLE_PERFORMANCE_WARNINGS", 1):
             cfunc = jit(pyfunc)
-        self.assertPreciseEqual(res[0], 42.0)
+        assertPreciseEqual(res[0], 42.0)
         with override_config("DISABLE_PERFORMANCE_WARNINGS", 1):
             cfunc[1, 1](18.0, res)
-        self.assertPreciseEqual(res[0], 6.0)
+        assertPreciseEqual(res[0], 6.0)
 
     @run_test_in_subprocess
     def test_func1_isolated(self):
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
index 64769f585..4dad3823f 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_gufunc_scalar.py
@@ -10,6 +10,7 @@
 import numpy as np
 from numba import cuda, guvectorize
 from numba.cuda.testing import skip_on_cudasim, CUDATestCase
+from numba.cuda.tests.support import assertPreciseEqual
 import unittest
 
 
@@ -64,7 +65,7 @@ def twice(inp, out):
 
         self.assertEqual(twice(10), 20)
         arg = np.arange(10).astype(np.int32)
-        self.assertPreciseEqual(twice(arg), arg * 2)
+        assertPreciseEqual(twice(arg), arg * 2)
 
     def test_gufunc_scalar_input_saxpy(self):
         @guvectorize(
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py b/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py
index 93fbec026..c135e7d0e 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_intrinsics.py
@@ -21,6 +21,7 @@
     skip_unless_cc_53,
     skip_if_nvjitlink_missing,
 )
+from numba.cuda.tests.support import assertPreciseEqual
 
 
 def simple_threadidx(ary):
@@ -1134,9 +1135,7 @@ def test_round_to_f4(self):
         for val, ndigits in itertools.product(vals, digits):
             with self.subTest(val=val, ndigits=ndigits):
                 compiled[1, 1](ary, val, ndigits)
-                self.assertPreciseEqual(
-                    ary[0], round(val, ndigits), prec="single"
-                )
+                assertPreciseEqual(ary[0], round(val, ndigits), prec="single")
 
     # CPython on most platforms uses rounding based on dtoa.c, whereas the CUDA
     # round-to implementation uses CPython's fallback implementation, which has
@@ -1165,7 +1164,7 @@ def test_round_to_f4_halfway(self):
         val = 0.3425
         ndigits = 3
         compiled[1, 1](ary, val, ndigits)
-        self.assertPreciseEqual(ary[0], round(val, ndigits), prec="single")
+        assertPreciseEqual(ary[0], round(val, ndigits), prec="single")
 
     def test_round_to_f8(self):
         compiled = cuda.jit("void(float64[:], float64, int32)")(simple_round_to)
@@ -1178,16 +1177,14 @@ def test_round_to_f8(self):
         for val, ndigits in itertools.product(vals, digits):
             with self.subTest(val=val, ndigits=ndigits):
                 compiled[1, 1](ary, val, ndigits)
-                self.assertPreciseEqual(
-                    ary[0], round(val, ndigits), prec="exact"
-                )
+                assertPreciseEqual(ary[0], round(val, ndigits), prec="exact")
 
         # Trigger the "overflow safe" branch of the implementation
         val = 0.12345678987654321 * 10e-15
         ndigits = 23
         with self.subTest(val=val, ndigits=ndigits):
             compiled[1, 1](ary, val, ndigits)
-            self.assertPreciseEqual(ary[0], round(val, ndigits), prec="double")
+            assertPreciseEqual(ary[0], round(val, ndigits), prec="double")
 
     # Skipped on cudasim for the same reasons as test_round_to_f4 above.
     @skip_on_cudasim("Overflow behavior differs on CPython")
@@ -1212,7 +1209,7 @@ def test_round_to_f8_halfway(self):
         val = 0.5425
         ndigits = 3
         compiled[1, 1](ary, val, ndigits)
-        self.assertPreciseEqual(ary[0], round(val, ndigits), prec="double")
+        assertPreciseEqual(ary[0], round(val, ndigits), prec="double")
 
 
 if __name__ == "__main__":
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py b/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py
index 846007ef7..59c4a1809 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_recursion.py
@@ -4,6 +4,7 @@
 from numba import cuda
 from numba.cuda.core.errors import TypingError
 from numba.cuda.testing import CUDATestCase, skip_on_cudasim
+from numba.cuda.tests.support import assertPreciseEqual
 import numpy as np
 import unittest
 
@@ -28,7 +29,7 @@ def kernel(r, x):
 
         actual = r[0]
         expected = 55
-        self.assertPreciseEqual(actual, expected)
+        assertPreciseEqual(actual, expected)
 
     def test_global_explicit_sig(self):
         self.check_fib(self.mod.fib1)
@@ -71,7 +72,7 @@ def kernel(r, x, y):
         expected = pfunc(*args)
         actual = r[0]
 
-        self.assertPreciseEqual(actual, expected)
+        assertPreciseEqual(actual, expected)
 
     @unittest.expectedFailure
     def test_raise(self):
diff --git a/numba_cuda/numba/cuda/tests/support.py b/numba_cuda/numba/cuda/tests/support.py
index 93377f998..cda7a53e8 100644
--- a/numba_cuda/numba/cuda/tests/support.py
+++ b/numba_cuda/numba/cuda/tests/support.py
@@ -22,6 +22,7 @@
 import traceback
 
 import numpy as np
+import pytest
 
 from numba.cuda import types
 from numba.cuda.core import errors
@@ -47,6 +48,20 @@
     from numba.core import types as upstream_types
 
 
+_bool_types = (bool, np.bool_)
+_exact_typesets = [
+    _bool_types,
+    (int,),
+    (str,),
+    (np.integer,),
+    (bytes, np.bytes_),
+]
+_approx_typesets = [(float,), (complex,), (np.inexact)]
+_sequence_typesets = [(tuple, list)]
+_float_types = (float, np.floating)
+_complex_types = (complex, np.complexfloating)
+
+
 class EnableNRTStatsMixin:
     """Mixin to enable the NRT statistics counters."""
 
@@ -352,6 +367,293 @@ def captured_stderr():
     return contextlib.redirect_stderr(io.StringIO())
 
 
+def assertStridesEqual(first, second):
+    """
+    Test that two arrays have the same shape and strides.
+    """
+    assert first.shape == second.shape, "shapes differ"
+    assert first.itemsize == second.itemsize, "itemsizes differ"
+    assert _fix_strides(first) == _fix_strides(second), "strides differ"
+
+
+def assertPreciseEqual(
+    first,
+    second,
+    prec="exact",
+    ulps=1,
+    msg=None,
+    ignore_sign_on_zero=False,
+    abs_tol=None,
+):
+    """
+    Versatile equality testing function with more built-in checks than
+    standard assertEqual().
+
+    For arrays, test that layout, dtype, shape are identical, and
+    recursively call assertPreciseEqual() on the contents.
+
+    For other sequences, recursively call assertPreciseEqual() on
+    the contents.
+
+    For scalars, test that two scalars or have similar types and are
+    equal up to a computed precision.
+    If the scalars are instances of exact types or if *prec* is
+    'exact', they are compared exactly.
+    If the scalars are instances of inexact types (float, complex)
+    and *prec* is not 'exact', then the number of significant bits
+    is computed according to the value of *prec*: 53 bits if *prec*
+    is 'double', 24 bits if *prec* is single.  This number of bits
+    can be lowered by raising the *ulps* value.
+    ignore_sign_on_zero can be set to True if zeros are to be considered
+    equal regardless of their sign bit.
+    abs_tol if this is set to a float value its value is used in the
+    following. If, however, this is set to the string "eps" then machine
+    precision of the type(first) is used in the following instead. This
+    kwarg is used to check if the absolute difference in value between first
+    and second is less than the value set, if so the numbers being compared
+    are considered equal. (This is to handle small numbers typically of
+    magnitude less than machine precision).
+
+    Any value of *prec* other than 'exact', 'single' or 'double'
+    will raise an error.
+    """
+    try:
+        _assertPreciseEqual(
+            first, second, prec, ulps, msg, ignore_sign_on_zero, abs_tol
+        )
+    except AssertionError as exc:
+        failure_msg = str(exc)
+        # Fall off of the 'except' scope to avoid Python 3 exception
+        # chaining.
+    else:
+        return
+    # Decorate the failure message with more information
+    pytest.fail(reason=f"when comparing {first} and {second}: {failure_msg}")
+
+
+def _assertPreciseEqual(
+    first,
+    second,
+    prec="exact",
+    ulps=1,
+    msg=None,
+    ignore_sign_on_zero=False,
+    abs_tol=None,
+):
+    """Recursive workhorse for assertPreciseEqual()."""
+
+    def _assertNumberEqual(first, second, delta=None):
+        if (
+            delta is None
+            or first == second == 0.0
+            or math.isinf(first)
+            or math.isinf(second)
+        ):
+            assert first == second, msg
+            # For signed zeros
+            if not ignore_sign_on_zero:
+                try:
+                    if math.copysign(1, first) != math.copysign(1, second):
+                        pytest.fail(reason=f"{first} != {second} : {msg}")
+                except TypeError:
+                    pass
+        else:
+            assert first == pytest.approx(second, abs=delta), msg
+
+    first_family = _detect_family(first)
+    second_family = _detect_family(second)
+
+    assertion_message = (
+        f"Type Family mismatch. ({first_family} != {second_family})"
+    )
+    if msg:
+        assertion_message += ": {msg}"
+    assert first_family == second_family, assertion_message
+
+    # We now know they are in the same comparison family
+    compare_family = first_family
+
+    # For recognized sequences, recurse
+    if compare_family == "ndarray":
+        dtype = _fix_dtype(first.dtype)
+        assert dtype == _fix_dtype(second.dtype)
+        assert first.ndim == second.ndim, "different number of dimensions"
+        assert first.shape == second.shape, "different shapes"
+        assert first.flags.writeable == second.flags.writeable, (
+            "different mutability"
+        )
+        # itemsize is already checked by the dtype test above
+        assert _fix_strides(first) == _fix_strides(second), "different strides"
+        if first.dtype != dtype:
+            first = first.astype(dtype)
+        if second.dtype != dtype:
+            second = second.astype(dtype)
+        for a, b in zip(first.flat, second.flat):
+            _assertPreciseEqual(
+                a, b, prec, ulps, msg, ignore_sign_on_zero, abs_tol
+            )
+        return
+
+    elif compare_family == "sequence":
+        assert len(first) == len(second), msg
+        for a, b in zip(first, second):
+            _assertPreciseEqual(
+                a, b, prec, ulps, msg, ignore_sign_on_zero, abs_tol
+            )
+        return
+
+    elif compare_family == "exact":
+        exact_comparison = True
+
+    elif compare_family in ["complex", "approximate"]:
+        exact_comparison = False
+
+    elif compare_family == "enum":
+        assert first.__class__ is second.__class__
+        _assertPreciseEqual(
+            first.value,
+            second.value,
+            prec,
+            ulps,
+            msg,
+            ignore_sign_on_zero,
+            abs_tol,
+        )
+        return
+
+    elif compare_family == "unknown":
+        # Assume these are non-numeric types: we will fall back
+        # on regular unittest comparison.
+        assert first.__class__ is second.__class__
+        exact_comparison = True
+
+    else:
+        assert 0, "unexpected family"
+
+    # If a Numpy scalar, check the dtype is exactly the same too
+    # (required for datetime64 and timedelta64).
+    if hasattr(first, "dtype") and hasattr(second, "dtype"):
+        assert first.dtype == second.dtype
+
+    # Mixing bools and non-bools should always fail
+    if isinstance(first, _bool_types) != isinstance(second, _bool_types):
+        assertion_message = (
+            "Mismatching return types "
+            f"({first.__class__} vs. {second.__class__})"
+        )
+        if msg:
+            assertion_message += f": {msg}"
+        pytest.fail(reason=assertion_message)
+
+    try:
+        if cmath.isnan(first) and cmath.isnan(second):
+            # The NaNs will compare unequal, skip regular comparison
+            return
+    except TypeError:
+        # Not floats.
+        pass
+
+    # if absolute comparison is set, use it
+    if abs_tol is not None:
+        if abs_tol == "eps":
+            rtol = np.finfo(type(first)).eps
+        elif isinstance(abs_tol, float):
+            rtol = abs_tol
+        else:
+            raise ValueError(
+                f'abs_tol is not "eps" or a float, found {abs_tol}'
+            )
+        if abs(first - second) < rtol:
+            return
+
+    exact_comparison = exact_comparison or prec == "exact"
+
+    if not exact_comparison and prec != "exact":
+        if prec == "single":
+            bits = 24
+        elif prec == "double":
+            bits = 53
+        else:
+            raise ValueError(f"unsupported precision {prec!r}")
+        k = 2 ** (ulps - bits - 1)
+        delta = k * (abs(first) + abs(second))
+    else:
+        delta = None
+    if isinstance(first, _complex_types):
+        _assertNumberEqual(first.real, second.real, delta)
+        _assertNumberEqual(first.imag, second.imag, delta)
+    elif isinstance(first, (np.timedelta64, np.datetime64)):
+        # Since Np 1.16 NaT == NaT is False, so special comparison needed
+        if np.isnat(first):
+            assert np.isnat(first) == np.isnat(second)
+        else:
+            _assertNumberEqual(first, second, delta)
+    else:
+        _assertNumberEqual(first, second, delta)
+
+
+def _detect_family(numeric_object):
+    """
+    This function returns a string description of the type family
+    that the object in question belongs to.  Possible return values
+    are: "exact", "complex", "approximate", "sequence", and "unknown"
+    """
+    if isinstance(numeric_object, np.ndarray):
+        return "ndarray"
+
+    if isinstance(numeric_object, enum.Enum):
+        return "enum"
+
+    for tp in _sequence_typesets:
+        if isinstance(numeric_object, tp):
+            return "sequence"
+
+    for tp in _exact_typesets:
+        if isinstance(numeric_object, tp):
+            return "exact"
+
+    for tp in _complex_types:
+        if isinstance(numeric_object, tp):
+            return "complex"
+
+    for tp in _approx_typesets:
+        if isinstance(numeric_object, tp):
+            return "approximate"
+
+    return "unknown"
+
+
+def _fix_dtype(dtype):
+    """
+    Fix the given *dtype* for comparison.
+    """
+    # Under 64-bit Windows, Numpy may return either int32 or int64
+    # arrays depending on the function.
+    if (
+        sys.platform == "win32"
+        and sys.maxsize > 2**32
+        and dtype == np.dtype("int32")
+    ):
+        return np.dtype("int64")
+    else:
+        return dtype
+
+
+def _fix_strides(arr):
+    """
+    Return the strides of the given array, fixed for comparison.
+    Strides for 0- or 1-sized dimensions are ignored.
+    """
+    if arr.size == 0:
+        return [0] * arr.ndim
+    else:
+        return [
+            stride / arr.itemsize
+            for (stride, shape) in zip(arr.strides, arr.shape)
+            if shape > 1
+        ]
+
+
 class TestCase(unittest.TestCase):
     longMessage = True
 
@@ -407,17 +709,18 @@ def assertRefCount(self, *objects):
                     % (old, new, obj)
                 )
 
-    def assertRefCountEqual(self, *objects):
-        gc.collect()
-        rc = [sys.getrefcount(x) for x in objects]
-        rc_0 = rc[0]
-        for i in range(len(objects))[1:]:
-            rc_i = rc[i]
-            if rc_0 != rc_i:
-                self.fail(
-                    f"Refcount for objects does not match. "
-                    f"#0({rc_0}) != #{i}({rc_i}) does not match."
-                )
+    # TODO: safe to delete since it's not used anywhere in numba_cuda?
+    # def assertRefCountEqual(self, *objects):
+    #     gc.collect()
+    #     rc = [sys.getrefcount(x) for x in objects]
+    #     rc_0 = rc[0]
+    #     for i in range(len(objects))[1:]:
+    #         rc_i = rc[i]
+    #         if rc_0 != rc_i:
+    #             self.fail(
+    #                 f"Refcount for objects does not match. "
+    #                 f"#0({rc_0}) != #{i}({rc_i}) does not match."
+    #             )
 
     @contextlib.contextmanager
     def assertNoNRTLeak(self):
@@ -443,321 +746,6 @@ def assertNoNRTLeak(self):
             "number of meminfo allocs != number of meminfo frees",
         )
 
-    _bool_types = (bool, np.bool_)
-    _exact_typesets = [
-        _bool_types,
-        (int,),
-        (str,),
-        (np.integer,),
-        (bytes, np.bytes_),
-    ]
-    _approx_typesets = [(float,), (complex,), (np.inexact)]
-    _sequence_typesets = [(tuple, list)]
-    _float_types = (float, np.floating)
-    _complex_types = (complex, np.complexfloating)
-
-    def _detect_family(self, numeric_object):
-        """
-        This function returns a string description of the type family
-        that the object in question belongs to.  Possible return values
-        are: "exact", "complex", "approximate", "sequence", and "unknown"
-        """
-        if isinstance(numeric_object, np.ndarray):
-            return "ndarray"
-
-        if isinstance(numeric_object, enum.Enum):
-            return "enum"
-
-        for tp in self._sequence_typesets:
-            if isinstance(numeric_object, tp):
-                return "sequence"
-
-        for tp in self._exact_typesets:
-            if isinstance(numeric_object, tp):
-                return "exact"
-
-        for tp in self._complex_types:
-            if isinstance(numeric_object, tp):
-                return "complex"
-
-        for tp in self._approx_typesets:
-            if isinstance(numeric_object, tp):
-                return "approximate"
-
-        return "unknown"
-
-    def _fix_dtype(self, dtype):
-        """
-        Fix the given *dtype* for comparison.
-        """
-        # Under 64-bit Windows, Numpy may return either int32 or int64
-        # arrays depending on the function.
-        if (
-            sys.platform == "win32"
-            and sys.maxsize > 2**32
-            and dtype == np.dtype("int32")
-        ):
-            return np.dtype("int64")
-        else:
-            return dtype
-
-    def _fix_strides(self, arr):
-        """
-        Return the strides of the given array, fixed for comparison.
-        Strides for 0- or 1-sized dimensions are ignored.
-        """
-        if arr.size == 0:
-            return [0] * arr.ndim
-        else:
-            return [
-                stride / arr.itemsize
-                for (stride, shape) in zip(arr.strides, arr.shape)
-                if shape > 1
-            ]
-
-    def assertStridesEqual(self, first, second):
-        """
-        Test that two arrays have the same shape and strides.
-        """
-        self.assertEqual(first.shape, second.shape, "shapes differ")
-        self.assertEqual(first.itemsize, second.itemsize, "itemsizes differ")
-        self.assertEqual(
-            self._fix_strides(first),
-            self._fix_strides(second),
-            "strides differ",
-        )
-
-    def assertPreciseEqual(
-        self,
-        first,
-        second,
-        prec="exact",
-        ulps=1,
-        msg=None,
-        ignore_sign_on_zero=False,
-        abs_tol=None,
-    ):
-        """
-        Versatile equality testing function with more built-in checks than
-        standard assertEqual().
-
-        For arrays, test that layout, dtype, shape are identical, and
-        recursively call assertPreciseEqual() on the contents.
-
-        For other sequences, recursively call assertPreciseEqual() on
-        the contents.
-
-        For scalars, test that two scalars or have similar types and are
-        equal up to a computed precision.
-        If the scalars are instances of exact types or if *prec* is
-        'exact', they are compared exactly.
-        If the scalars are instances of inexact types (float, complex)
-        and *prec* is not 'exact', then the number of significant bits
-        is computed according to the value of *prec*: 53 bits if *prec*
-        is 'double', 24 bits if *prec* is single.  This number of bits
-        can be lowered by raising the *ulps* value.
-        ignore_sign_on_zero can be set to True if zeros are to be considered
-        equal regardless of their sign bit.
-        abs_tol if this is set to a float value its value is used in the
-        following. If, however, this is set to the string "eps" then machine
-        precision of the type(first) is used in the following instead. This
-        kwarg is used to check if the absolute difference in value between first
-        and second is less than the value set, if so the numbers being compared
-        are considered equal. (This is to handle small numbers typically of
-        magnitude less than machine precision).
-
-        Any value of *prec* other than 'exact', 'single' or 'double'
-        will raise an error.
-        """
-        try:
-            self._assertPreciseEqual(
-                first, second, prec, ulps, msg, ignore_sign_on_zero, abs_tol
-            )
-        except AssertionError as exc:
-            failure_msg = str(exc)
-            # Fall off of the 'except' scope to avoid Python 3 exception
-            # chaining.
-        else:
-            return
-        # Decorate the failure message with more information
-        self.fail("when comparing %s and %s: %s" % (first, second, failure_msg))
-
-    def _assertPreciseEqual(
-        self,
-        first,
-        second,
-        prec="exact",
-        ulps=1,
-        msg=None,
-        ignore_sign_on_zero=False,
-        abs_tol=None,
-    ):
-        """Recursive workhorse for assertPreciseEqual()."""
-
-        def _assertNumberEqual(first, second, delta=None):
-            if (
-                delta is None
-                or first == second == 0.0
-                or math.isinf(first)
-                or math.isinf(second)
-            ):
-                self.assertEqual(first, second, msg=msg)
-                # For signed zeros
-                if not ignore_sign_on_zero:
-                    try:
-                        if math.copysign(1, first) != math.copysign(1, second):
-                            self.fail(
-                                self._formatMessage(
-                                    msg, "%s != %s" % (first, second)
-                                )
-                            )
-                    except TypeError:
-                        pass
-            else:
-                self.assertAlmostEqual(first, second, delta=delta, msg=msg)
-
-        first_family = self._detect_family(first)
-        second_family = self._detect_family(second)
-
-        assertion_message = "Type Family mismatch. (%s != %s)" % (
-            first_family,
-            second_family,
-        )
-        if msg:
-            assertion_message += ": %s" % (msg,)
-        self.assertEqual(first_family, second_family, msg=assertion_message)
-
-        # We now know they are in the same comparison family
-        compare_family = first_family
-
-        # For recognized sequences, recurse
-        if compare_family == "ndarray":
-            dtype = self._fix_dtype(first.dtype)
-            self.assertEqual(dtype, self._fix_dtype(second.dtype))
-            self.assertEqual(
-                first.ndim, second.ndim, "different number of dimensions"
-            )
-            self.assertEqual(first.shape, second.shape, "different shapes")
-            self.assertEqual(
-                first.flags.writeable,
-                second.flags.writeable,
-                "different mutability",
-            )
-            # itemsize is already checked by the dtype test above
-            self.assertEqual(
-                self._fix_strides(first),
-                self._fix_strides(second),
-                "different strides",
-            )
-            if first.dtype != dtype:
-                first = first.astype(dtype)
-            if second.dtype != dtype:
-                second = second.astype(dtype)
-            for a, b in zip(first.flat, second.flat):
-                self._assertPreciseEqual(
-                    a, b, prec, ulps, msg, ignore_sign_on_zero, abs_tol
-                )
-            return
-
-        elif compare_family == "sequence":
-            self.assertEqual(len(first), len(second), msg=msg)
-            for a, b in zip(first, second):
-                self._assertPreciseEqual(
-                    a, b, prec, ulps, msg, ignore_sign_on_zero, abs_tol
-                )
-            return
-
-        elif compare_family == "exact":
-            exact_comparison = True
-
-        elif compare_family in ["complex", "approximate"]:
-            exact_comparison = False
-
-        elif compare_family == "enum":
-            self.assertIs(first.__class__, second.__class__)
-            self._assertPreciseEqual(
-                first.value,
-                second.value,
-                prec,
-                ulps,
-                msg,
-                ignore_sign_on_zero,
-                abs_tol,
-            )
-            return
-
-        elif compare_family == "unknown":
-            # Assume these are non-numeric types: we will fall back
-            # on regular unittest comparison.
-            self.assertIs(first.__class__, second.__class__)
-            exact_comparison = True
-
-        else:
-            assert 0, "unexpected family"
-
-        # If a Numpy scalar, check the dtype is exactly the same too
-        # (required for datetime64 and timedelta64).
-        if hasattr(first, "dtype") and hasattr(second, "dtype"):
-            self.assertEqual(first.dtype, second.dtype)
-
-        # Mixing bools and non-bools should always fail
-        if isinstance(first, self._bool_types) != isinstance(
-            second, self._bool_types
-        ):
-            assertion_message = "Mismatching return types (%s vs. %s)" % (
-                first.__class__,
-                second.__class__,
-            )
-            if msg:
-                assertion_message += ": %s" % (msg,)
-            self.fail(assertion_message)
-
-        try:
-            if cmath.isnan(first) and cmath.isnan(second):
-                # The NaNs will compare unequal, skip regular comparison
-                return
-        except TypeError:
-            # Not floats.
-            pass
-
-        # if absolute comparison is set, use it
-        if abs_tol is not None:
-            if abs_tol == "eps":
-                rtol = np.finfo(type(first)).eps
-            elif isinstance(abs_tol, float):
-                rtol = abs_tol
-            else:
-                raise ValueError(
-                    'abs_tol is not "eps" or a float, found %s' % abs_tol
-                )
-            if abs(first - second) < rtol:
-                return
-
-        exact_comparison = exact_comparison or prec == "exact"
-
-        if not exact_comparison and prec != "exact":
-            if prec == "single":
-                bits = 24
-            elif prec == "double":
-                bits = 53
-            else:
-                raise ValueError("unsupported precision %r" % (prec,))
-            k = 2 ** (ulps - bits - 1)
-            delta = k * (abs(first) + abs(second))
-        else:
-            delta = None
-        if isinstance(first, self._complex_types):
-            _assertNumberEqual(first.real, second.real, delta)
-            _assertNumberEqual(first.imag, second.imag, delta)
-        elif isinstance(first, (np.timedelta64, np.datetime64)):
-            # Since Np 1.16 NaT == NaT is False, so special comparison needed
-            if np.isnat(first):
-                self.assertEqual(np.isnat(first), np.isnat(second))
-            else:
-                _assertNumberEqual(first, second, delta)
-        else:
-            _assertNumberEqual(first, second, delta)
-
     def make_dummy_type(self):
         """Use to generate a dummy type unique to this test. Returns a python
         Dummy class and a corresponding Numba type DummyType."""

From ca451f19b4412ef1a7d82cb2bffd48f631499b10 Mon Sep 17 00:00:00 2001
From: Andres Guzman-Ballen <acosmicflamingo@gmail.com>
Date: Sun, 10 May 2026 12:41:05 -0500
Subject: [PATCH 04/10] Move reset_module_warnings outside TestCase class

Although the make_dummy_type method was moved, it would be great to
figure out how to write the function in a way where deterministic
ID can be generated without access to an instance.
---
 .../numba/cuda/tests/cudapy/test_overload.py  |   7 +-
 .../numba/cuda/tests/cudapy/test_ufuncs.py    |   4 +-
 numba_cuda/numba/cuda/tests/support.py        | 101 +++++++++---------
 3 files changed, 59 insertions(+), 53 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_overload.py b/numba_cuda/numba/cuda/tests/cudapy/test_overload.py
index 2bba8f4e7..a6a14d407 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_overload.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_overload.py
@@ -1,6 +1,8 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: BSD-2-Clause
 
+import numpy as np
+
 from numba import cuda
 from numba.cuda import types
 from numba.cuda import HAS_NUMBA
@@ -20,8 +22,7 @@
     unittest,
     skip_on_standalone_numba_cuda,
 )
-import numpy as np
-
+from numba.cuda.tests.support import make_dummy_type
 
 # Dummy function definitions to overload
 
@@ -349,7 +350,7 @@ def kernel(x):
 
     @skip_on_standalone_numba_cuda
     def test_overload_attribute_target(self):
-        MyDummy, MyDummyType = self.make_dummy_type()
+        MyDummy, MyDummyType = make_dummy_type(self)
         mydummy_type_cpu = cpu_typeof(MyDummy())  # For @njit (cpu)
         mydummy_type = typeof(MyDummy())  # For @cuda.jit (CUDA)
 
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py b/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py
index c3d85e8ac..8222f9866 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py
@@ -15,7 +15,7 @@
 from numba.cuda.testing import skip_on_standalone_numba_cuda
 from numba.cuda.typing.typeof import typeof
 from numba.cuda.np import numpy_support
-from numba.cuda.tests.support import TestCase
+from numba.cuda.tests.support import TestCase, reset_module_warnings
 
 
 class BaseUFuncTest:
@@ -121,7 +121,7 @@ def basic_ufunc_test(
     ):
         # Necessary to avoid some Numpy warnings being silenced, despite
         # the simplefilter() call below.
-        self.reset_module_warnings(__name__)
+        reset_module_warnings(__name__)
 
         pyfunc = self._make_ufunc_usecase(ufunc)
 
diff --git a/numba_cuda/numba/cuda/tests/support.py b/numba_cuda/numba/cuda/tests/support.py
index cda7a53e8..1187fc942 100644
--- a/numba_cuda/numba/cuda/tests/support.py
+++ b/numba_cuda/numba/cuda/tests/support.py
@@ -193,6 +193,20 @@ def ignore_internal_warnings():
     )
 
 
+def reset_module_warnings(module):
+    """
+    Reset the warnings registry of a module.  This can be necessary
+    as the warnings module is buggy in that regard.
+    See http://bugs.python.org/issue4180
+    """
+    if isinstance(module, str):
+        module = sys.modules[module]
+    try:
+        del module.__warningregistry__
+    except AttributeError:
+        pass
+
+
 @contextlib.contextmanager
 def override_config(name, value):
     """
@@ -208,6 +222,45 @@ def override_config(name, value):
         setattr(config, name, old_value)
 
 
+# TODO: how to generate self.id() without access to instance?
+def make_dummy_type(test_obj: unittest.TestCase):
+    """
+    Use to generate a dummy type unique to this test. Returns a python
+    Dummy class and a corresponding Numba type DummyType.
+    """
+
+    # Use test_id to make sure no collision is possible.
+    test_id = test_obj.id()
+    DummyType = type("DummyTypeFor{}".format(test_id), (types.Opaque,), {})
+
+    dummy_type = DummyType("my_dummy")
+    register_model(DummyType)(OpaqueModel)
+
+    class Dummy:
+        pass
+
+    @typeof_impl.register(Dummy)
+    def typeof_dummy(val, c):
+        return dummy_type
+
+    # Dual registration for cross-target tests
+    if HAS_NUMBA:
+        UpstreamDummyType = type(
+            "DummyTypeFor{}".format(test_id), (upstream_types.Opaque,), {}
+        )
+        upstream_dummy_type = UpstreamDummyType("my_dummy")
+
+        @upstream_typeof_impl.register(Dummy)
+        def typeof_dummy_core(val, c):
+            return upstream_dummy_type
+
+    @unbox(DummyType)
+    def unbox_dummy(typ, obj, c):
+        return NativeValue(c.context.get_dummy_value())
+
+    return Dummy, DummyType
+
+
 def run_in_subprocess(code, flags=(), env=None, timeout=30):
     """Run a snippet of Python code in a subprocess with flags, if any are
     given. 'env' is passed to subprocess.Popen(). 'timeout' is passed to
@@ -663,19 +716,6 @@ class TestCase(unittest.TestCase):
     def random(self):
         return np.random.RandomState(42)
 
-    def reset_module_warnings(self, module):
-        """
-        Reset the warnings registry of a module.  This can be necessary
-        as the warnings module is buggy in that regard.
-        See http://bugs.python.org/issue4180
-        """
-        if isinstance(module, str):
-            module = sys.modules[module]
-        try:
-            del module.__warningregistry__
-        except AttributeError:
-            pass
-
     @contextlib.contextmanager
     def assertTypingError(self):
         """
@@ -746,41 +786,6 @@ def assertNoNRTLeak(self):
             "number of meminfo allocs != number of meminfo frees",
         )
 
-    def make_dummy_type(self):
-        """Use to generate a dummy type unique to this test. Returns a python
-        Dummy class and a corresponding Numba type DummyType."""
-
-        # Use test_id to make sure no collision is possible.
-        test_id = self.id()
-        DummyType = type("DummyTypeFor{}".format(test_id), (types.Opaque,), {})
-
-        dummy_type = DummyType("my_dummy")
-        register_model(DummyType)(OpaqueModel)
-
-        class Dummy:
-            pass
-
-        @typeof_impl.register(Dummy)
-        def typeof_dummy(val, c):
-            return dummy_type
-
-        # Dual registration for cross-target tests
-        if HAS_NUMBA:
-            UpstreamDummyType = type(
-                "DummyTypeFor{}".format(test_id), (upstream_types.Opaque,), {}
-            )
-            upstream_dummy_type = UpstreamDummyType("my_dummy")
-
-            @upstream_typeof_impl.register(Dummy)
-            def typeof_dummy_core(val, c):
-                return upstream_dummy_type
-
-        @unbox(DummyType)
-        def unbox_dummy(typ, obj, c):
-            return NativeValue(c.context.get_dummy_value())
-
-        return Dummy, DummyType
-
 
 class MemoryLeak:
     __enable_leak_check = True

From 104dd3f9a1377f568068e08c94eb509243509e7d Mon Sep 17 00:00:00 2001
From: Andres Guzman-Ballen <acosmicflamingo@gmail.com>
Date: Sun, 10 May 2026 13:03:22 -0500
Subject: [PATCH 05/10] Comment out some unused assertion functions from
 TestCase class

---
 numba_cuda/numba/cuda/tests/support.py | 110 +++++++++++++------------
 1 file changed, 56 insertions(+), 54 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/support.py b/numba_cuda/numba/cuda/tests/support.py
index 1187fc942..29805a333 100644
--- a/numba_cuda/numba/cuda/tests/support.py
+++ b/numba_cuda/numba/cuda/tests/support.py
@@ -716,38 +716,40 @@ class TestCase(unittest.TestCase):
     def random(self):
         return np.random.RandomState(42)
 
-    @contextlib.contextmanager
-    def assertTypingError(self):
-        """
-        A context manager that asserts the enclosed code block fails
-        compiling in nopython mode.
-        """
-        _accepted_errors = (
-            errors.LoweringError,
-            errors.TypingError,
-            TypeError,
-            NotImplementedError,
-        )
-        with self.assertRaises(_accepted_errors) as cm:
-            yield cm
+    # TODO: safe to delete since it's not used anywhere in numba_cuda?
+    # @contextlib.contextmanager
+    # def assertTypingError(self):
+    #     """
+    #     A context manager that asserts the enclosed code block fails
+    #     compiling in nopython mode.
+    #     """
+    #     _accepted_errors = (
+    #         errors.LoweringError,
+    #         errors.TypingError,
+    #         TypeError,
+    #         NotImplementedError,
+    #     )
+    #     with self.assertRaises(_accepted_errors) as cm:
+    #         yield cm
 
-    @contextlib.contextmanager
-    def assertRefCount(self, *objects):
-        """
-        A context manager that asserts the given objects have the
-        same reference counts before and after executing the
-        enclosed block.
-        """
-        old_refcounts = [sys.getrefcount(x) for x in objects]
-        yield
-        gc.collect()
-        new_refcounts = [sys.getrefcount(x) for x in objects]
-        for old, new, obj in zip(old_refcounts, new_refcounts, objects):
-            if old != new:
-                self.fail(
-                    "Refcount changed from %d to %d for object: %r"
-                    % (old, new, obj)
-                )
+    # TODO: safe to delete since it's not used anywhere in numba_cuda?
+    # @contextlib.contextmanager
+    # def assertRefCount(self, *objects):
+    #     """
+    #     A context manager that asserts the given objects have the
+    #     same reference counts before and after executing the
+    #     enclosed block.
+    #     """
+    #     old_refcounts = [sys.getrefcount(x) for x in objects]
+    #     yield
+    #     gc.collect()
+    #     new_refcounts = [sys.getrefcount(x) for x in objects]
+    #     for old, new, obj in zip(old_refcounts, new_refcounts, objects):
+    #         if old != new:
+    #             self.fail(
+    #                 "Refcount changed from %d to %d for object: %r"
+    #                 % (old, new, obj)
+    #             )
 
     # TODO: safe to delete since it's not used anywhere in numba_cuda?
     # def assertRefCountEqual(self, *objects):
@@ -762,29 +764,29 @@ def assertRefCount(self, *objects):
     #                 f"#0({rc_0}) != #{i}({rc_i}) does not match."
     #             )
 
-    @contextlib.contextmanager
-    def assertNoNRTLeak(self):
-        """
-        A context manager that asserts no NRT leak was created during
-        the execution of the enclosed block.
-        """
-        old = rtsys.get_allocation_stats()
-        yield
-        new = rtsys.get_allocation_stats()
-        total_alloc = new.alloc - old.alloc
-        total_free = new.free - old.free
-        total_mi_alloc = new.mi_alloc - old.mi_alloc
-        total_mi_free = new.mi_free - old.mi_free
-        self.assertEqual(
-            total_alloc,
-            total_free,
-            "number of data allocs != number of data frees",
-        )
-        self.assertEqual(
-            total_mi_alloc,
-            total_mi_free,
-            "number of meminfo allocs != number of meminfo frees",
-        )
+    # @contextlib.contextmanager
+    # def assertNoNRTLeak(self):
+    #     """
+    #     A context manager that asserts no NRT leak was created during
+    #     the execution of the enclosed block.
+    #     """
+    #     old = rtsys.get_allocation_stats()
+    #     yield
+    #     new = rtsys.get_allocation_stats()
+    #     total_alloc = new.alloc - old.alloc
+    #     total_free = new.free - old.free
+    #     total_mi_alloc = new.mi_alloc - old.mi_alloc
+    #     total_mi_free = new.mi_free - old.mi_free
+    #     self.assertEqual(
+    #         total_alloc,
+    #         total_free,
+    #         "number of data allocs != number of data frees",
+    #     )
+    #     self.assertEqual(
+    #         total_mi_alloc,
+    #         total_mi_free,
+    #         "number of meminfo allocs != number of meminfo frees",
+    #     )
 
 
 class MemoryLeak:

From 0aa1a40b4e823b354c1e8ea4b0b2477ec50a99eb Mon Sep 17 00:00:00 2001
From: Andres Guzman-Ballen <acosmicflamingo@gmail.com>
Date: Sun, 10 May 2026 21:19:56 -0500
Subject: [PATCH 06/10] Migrate test_flow_control from unittest to pytest

Moving the remaining function 'random' outside of TestCase class
while maintaining its caching behavior meant making it a pytest
fixture. However, this will only work if TestFlowControl class is
no longer a unittest.TestCase subclass. This commit does so, and
replaces unittest API usage with assert statements.
---
 .../cuda/tests/cudapy/test_flow_control.py    | 338 +++++++++---------
 numba_cuda/numba/cuda/tests/support.py        |  14 +-
 2 files changed, 168 insertions(+), 184 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py b/numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py
index 3fa45a09e..b04065c43 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_flow_control.py
@@ -2,8 +2,8 @@
 # SPDX-License-Identifier: BSD-2-Clause
 
 import itertools
+import pytest
 
-import unittest
 from numba.cuda import jit
 from numba.cuda.core.controlflow import CFGraph, ControlFlowAnalysis
 from numba.cuda import types
@@ -12,12 +12,12 @@
     ByteCode,
     _fix_LOAD_GLOBAL_arg,
 )
-from numba.cuda.tests.support import TestCase
 from numba.cuda import utils
 from numba.cuda.core import config
+from numba.cuda.tests.support import random
 
 if config.ENABLE_CUDASIM:
-    raise unittest.SkipTest("Analysis passes not done in simulator")
+    pytest.skip("Analysis passes not done in simulator")
 
 
 def for_loop_usecase1(x, y, res1, res2):
@@ -214,7 +214,7 @@ def try_except_usecase():
         pass
 
 
-class TestFlowControl(TestCase):
+class TestFlowControl:
     def run_test(
         self,
         pyfunc,
@@ -242,15 +242,15 @@ def run_test(
                 if pyerr is None:
                     raise
                 cerr = e
-                self.assertEqual(type(pyerr), type(cerr))
+                assert type(pyerr) == type(cerr)
             else:
                 if pyerr is not None:
                     self.fail(
                         "Invalid for pure-python but numba-cuda works\n"
                         + str(pyerr)
                     )
-                self.assertEqual(res1, cres1)
-                self.assertEqual(res2, cres2)
+                assert res1 == cres1
+                assert res2 == cres2
 
     def test_for_loop1(self):
         self.run_test(for_loop_usecase1, [-10, 0, 10], [0], [0], [0])
@@ -288,11 +288,11 @@ def test_for_loop7(self):
     def test_for_loop7_npm(self):
         self.test_for_loop7()
 
-    @unittest.expectedFailure
+    @pytest.mark.xfail
     def test_for_loop8(self):
         self.run_test(for_loop_usecase8, [0, 1], [0, 2, 10], [0], [0])
 
-    @unittest.expectedFailure
+    @pytest.mark.xfail
     def test_for_loop8_npm(self):
         self.test_for_loop8()
 
@@ -381,7 +381,7 @@ def test_double_infinite_loop_npm(self):
         self.test_double_infinite_loop()
 
 
-class TestCFGraph(TestCase):
+class TestCFGraph:
     """
     Test the numba.controlflow.CFGraph class.
     """
@@ -556,104 +556,98 @@ def infinite_loop2(self):
 
     def test_simple_properties(self):
         g = self.loopless1()
-        self.assertEqual(sorted(g.successors(0)), [(12, None), (18, None)])
-        self.assertEqual(sorted(g.successors(21)), [])
-        self.assertEqual(sorted(g.predecessors(0)), [])
-        self.assertEqual(sorted(g.predecessors(21)), [(12, None), (18, None)])
+        assert sorted(g.successors(0)) == [(12, None), (18, None)]
+        assert sorted(g.successors(21)) == []
+        assert sorted(g.predecessors(0)) == []
+        assert sorted(g.predecessors(21)) == [(12, None), (18, None)]
 
     def test_exit_points(self):
         g = self.loopless1()
-        self.assertEqual(sorted(g.exit_points()), [21])
+        assert sorted(g.exit_points()) == [21]
         g = self.loopless1_dead_nodes()
-        self.assertEqual(sorted(g.exit_points()), [21])
+        assert sorted(g.exit_points()) == [21]
         g = self.loopless2()
-        self.assertEqual(sorted(g.exit_points()), [34, 42])
+        assert sorted(g.exit_points()) == [34, 42]
         g = self.multiple_loops()
-        self.assertEqual(sorted(g.exit_points()), [80, 88])
+        assert sorted(g.exit_points()) == [80, 88]
         g = self.infinite_loop1()
-        self.assertEqual(sorted(g.exit_points()), [6])
+        assert sorted(g.exit_points()) == [6]
         g = self.infinite_loop2()
-        self.assertEqual(sorted(g.exit_points()), [])
+        assert sorted(g.exit_points()) == []
         g = self.multiple_exits()
-        self.assertEqual(sorted(g.exit_points()), [19, 37])
+        assert sorted(g.exit_points()) == [19, 37]
 
     def test_dead_nodes(self):
         g = self.loopless1()
-        self.assertEqual(len(g.dead_nodes()), 0)
-        self.assertEqual(sorted(g.nodes()), [0, 12, 18, 21])
+        assert len(g.dead_nodes()) == 0
+        assert sorted(g.nodes()) == [0, 12, 18, 21]
         g = self.loopless2()
-        self.assertEqual(len(g.dead_nodes()), 0)
-        self.assertEqual(sorted(g.nodes()), [12, 18, 21, 34, 42, 99])
+        assert len(g.dead_nodes()) == 0
+        assert sorted(g.nodes()) == [12, 18, 21, 34, 42, 99]
         g = self.multiple_loops()
-        self.assertEqual(len(g.dead_nodes()), 0)
+        assert len(g.dead_nodes()) == 0
         g = self.infinite_loop1()
-        self.assertEqual(len(g.dead_nodes()), 0)
+        assert len(g.dead_nodes()) == 0
         g = self.multiple_exits()
-        self.assertEqual(len(g.dead_nodes()), 0)
+        assert len(g.dead_nodes()) == 0
         # Only this example has dead nodes
         g = self.loopless1_dead_nodes()
-        self.assertEqual(sorted(g.dead_nodes()), [91, 92, 93, 94])
-        self.assertEqual(sorted(g.nodes()), [0, 12, 18, 21])
+        assert sorted(g.dead_nodes()) == [91, 92, 93, 94]
+        assert sorted(g.nodes()) == [0, 12, 18, 21]
 
     def test_descendents(self):
         g = self.loopless2()
         d = g.descendents(34)
-        self.assertEqual(sorted(d), [])
+        assert sorted(d) == []
         d = g.descendents(42)
-        self.assertEqual(sorted(d), [])
+        assert sorted(d) == []
         d = g.descendents(21)
-        self.assertEqual(sorted(d), [34, 42])
+        assert sorted(d) == [34, 42]
         d = g.descendents(99)
-        self.assertEqual(sorted(d), [12, 18, 21, 34, 42])
+        assert sorted(d) == [12, 18, 21, 34, 42]
         g = self.infinite_loop1()
         d = g.descendents(26)
-        self.assertEqual(sorted(d), [])
+        assert sorted(d) == []
         d = g.descendents(19)
-        self.assertEqual(sorted(d), [])
+        assert sorted(d) == []
         d = g.descendents(13)
-        self.assertEqual(sorted(d), [19, 26])
+        assert sorted(d) == [19, 26]
         d = g.descendents(10)
-        self.assertEqual(sorted(d), [13, 19, 26])
+        assert sorted(d) == [13, 19, 26]
         d = g.descendents(6)
-        self.assertEqual(sorted(d), [])
+        assert sorted(d) == []
         d = g.descendents(0)
-        self.assertEqual(sorted(d), [6, 10, 13, 19, 26])
+        assert sorted(d) == [6, 10, 13, 19, 26]
 
     def test_topo_order(self):
         g = self.loopless1()
-        self.assertIn(g.topo_order(), ([0, 12, 18, 21], [0, 18, 12, 21]))
+        assert g.topo_order() in ([0, 12, 18, 21], [0, 18, 12, 21])
         g = self.loopless2()
-        self.assertIn(
-            g.topo_order(), ([99, 18, 12, 21, 34, 42], [99, 12, 18, 21, 34, 42])
+        assert g.topo_order() in (
+            [99, 18, 12, 21, 34, 42],
+            [99, 12, 18, 21, 34, 42],
         )
         g = self.infinite_loop2()
-        self.assertIn(g.topo_order(), ([0, 3, 9, 16], [0, 3, 16, 9]))
+        assert g.topo_order() in ([0, 3, 9, 16], [0, 3, 16, 9])
         g = self.infinite_loop1()
-        self.assertIn(
-            g.topo_order(),
-            (
-                [0, 6, 10, 13, 19, 26],
-                [0, 6, 10, 13, 26, 19],
-                [0, 10, 13, 19, 26, 6],
-                [0, 10, 13, 26, 19, 6],
-            ),
+        assert g.topo_order() in (
+            [0, 6, 10, 13, 19, 26],
+            [0, 6, 10, 13, 26, 19],
+            [0, 10, 13, 19, 26, 6],
+            [0, 10, 13, 26, 19, 6],
         )
 
-    def test_topo_sort(self):
+    def test_topo_sort(self, random):
         def check_topo_sort(nodes, expected):
-            self.assertIn(list(g.topo_sort(nodes)), expected)
-            self.assertIn(list(g.topo_sort(nodes[::-1])), expected)
-            self.assertIn(
-                list(g.topo_sort(nodes, reverse=True))[::-1], expected
-            )
-            self.assertIn(
-                list(g.topo_sort(nodes[::-1], reverse=True))[::-1], expected
-            )
-            self.random.shuffle(nodes)
-            self.assertIn(list(g.topo_sort(nodes)), expected)
-            self.assertIn(
-                list(g.topo_sort(nodes, reverse=True))[::-1], expected
+            assert list(g.topo_sort(nodes)) in expected
+            assert list(g.topo_sort(nodes[::-1])) in expected
+            assert list(g.topo_sort(nodes, reverse=True))[::-1] in expected
+            assert (
+                list(g.topo_sort(nodes[::-1], reverse=True))[::-1] in expected
             )
+            random.shuffle(nodes)
+            assert list(g.topo_sort(nodes)) in expected
+            assert list(g.topo_sort(nodes, reverse=True))[::-1] in expected
 
         g = self.loopless2()
         check_topo_sort([21, 99, 12, 34], ([99, 12, 21, 34],))
@@ -665,17 +659,15 @@ def check_topo_sort(nodes, expected):
         )
 
     def check_dominators(self, got, expected):
-        self.assertEqual(sorted(got), sorted(expected))
+        assert sorted(got) == sorted(expected)
         for node in sorted(got):
-            self.assertEqual(
-                sorted(got[node]),
-                sorted(expected[node]),
-                "mismatch for %r" % (node,),
+            assert sorted(got[node]) == sorted(expected[node]), (
+                f"mismatch for {node!r}"
             )
 
     def test_dominators_loopless(self):
         def eq_(d, l):
-            self.assertEqual(sorted(doms[d]), l)
+            assert sorted(doms[d]) == l
 
         for g in [self.loopless1(), self.loopless1_dead_nodes()]:
             doms = g.dominators()
@@ -748,7 +740,7 @@ def test_dominators_loops(self):
 
     def test_post_dominators_loopless(self):
         def eq_(d, l):
-            self.assertEqual(sorted(doms[d]), l)
+            assert sorted(doms[d]) == l
 
         for g in [self.loopless1(), self.loopless1_dead_nodes()]:
             doms = g.post_dominators()
@@ -837,7 +829,7 @@ def test_post_dominators_infinite_loops(self):
     def test_dominator_tree(self):
         def check(graph, expected):
             domtree = graph.dominator_tree()
-            self.assertEqual(domtree, expected)
+            assert domtree == expected
 
         check(
             self.loopless1(), {0: {12, 18, 21}, 12: set(), 18: set(), 21: set()}
@@ -908,7 +900,7 @@ def check(graph, expected):
     def test_immediate_dominators(self):
         def check(graph, expected):
             idoms = graph.immediate_dominators()
-            self.assertEqual(idoms, expected)
+            assert idoms == expected
 
         check(self.loopless1(), {0: 0, 12: 0, 18: 0, 21: 0})
         check(
@@ -949,7 +941,7 @@ def check(graph, expected):
     def test_dominance_frontier(self):
         def check(graph, expected):
             df = graph.dominance_frontier()
-            self.assertEqual(df, expected)
+            assert df == expected
 
         check(self.loopless1(), {0: set(), 12: {21}, 18: {21}, 21: set()})
         check(
@@ -1003,17 +995,17 @@ def check(graph, expected):
 
     def test_backbone_loopless(self):
         for g in [self.loopless1(), self.loopless1_dead_nodes()]:
-            self.assertEqual(sorted(g.backbone()), [0, 21])
+            assert sorted(g.backbone()) == [0, 21]
         g = self.loopless2()
-        self.assertEqual(sorted(g.backbone()), [21, 99])
+        assert sorted(g.backbone()) == [21, 99]
 
     def test_backbone_loops(self):
         g = self.multiple_loops()
-        self.assertEqual(sorted(g.backbone()), [0, 7, 60, 61, 68])
+        assert sorted(g.backbone()) == [0, 7, 60, 61, 68]
         g = self.infinite_loop1()
-        self.assertEqual(sorted(g.backbone()), [0])
+        assert sorted(g.backbone()) == [0]
         g = self.infinite_loop2()
-        self.assertEqual(sorted(g.backbone()), [0, 3])
+        assert sorted(g.backbone()) == [0, 3]
 
     def test_loops(self):
         for g in [
@@ -1021,75 +1013,73 @@ def test_loops(self):
             self.loopless1_dead_nodes(),
             self.loopless2(),
         ]:
-            self.assertEqual(len(g.loops()), 0)
+            assert len(g.loops()) == 0
 
         g = self.multiple_loops()
         # Loop headers
-        self.assertEqual(sorted(g.loops()), [7, 20, 68])
+        assert sorted(g.loops()) == [7, 20, 68]
         outer1 = g.loops()[7]
         inner1 = g.loops()[20]
         outer2 = g.loops()[68]
-        self.assertEqual(outer1.header, 7)
-        self.assertEqual(sorted(outer1.entries), [0])
-        self.assertEqual(sorted(outer1.exits), [60])
-        self.assertEqual(
-            sorted(outer1.body), [7, 10, 13, 20, 23, 32, 44, 56, 57]
-        )
-        self.assertEqual(inner1.header, 20)
-        self.assertEqual(sorted(inner1.entries), [13])
-        self.assertEqual(sorted(inner1.exits), [56])
-        self.assertEqual(sorted(inner1.body), [20, 23, 32, 44])
-        self.assertEqual(outer2.header, 68)
-        self.assertEqual(sorted(outer2.entries), [61])
-        self.assertEqual(sorted(outer2.exits), [80, 87])
-        self.assertEqual(sorted(outer2.body), [68, 71])
+        assert outer1.header == 7
+        assert sorted(outer1.entries) == [0]
+        assert sorted(outer1.exits) == [60]
+        assert sorted(outer1.body) == [7, 10, 13, 20, 23, 32, 44, 56, 57]
+        assert inner1.header == 20
+        assert sorted(inner1.entries) == [13]
+        assert sorted(inner1.exits) == [56]
+        assert sorted(inner1.body) == [20, 23, 32, 44]
+        assert outer2.header == 68
+        assert sorted(outer2.entries) == [61]
+        assert sorted(outer2.exits) == [80, 87]
+        assert sorted(outer2.body) == [68, 71]
         for node in [0, 60, 61, 80, 87, 88]:
-            self.assertEqual(g.in_loops(node), [])
+            assert g.in_loops(node) == []
         for node in [7, 10, 13, 56, 57]:
-            self.assertEqual(g.in_loops(node), [outer1])
+            assert g.in_loops(node) == [outer1]
         for node in [20, 23, 32, 44]:
-            self.assertEqual(g.in_loops(node), [inner1, outer1])
+            assert g.in_loops(node) == [inner1, outer1]
         for node in [68, 71]:
-            self.assertEqual(g.in_loops(node), [outer2])
+            assert g.in_loops(node) == [outer2]
 
         g = self.infinite_loop1()
         # Loop headers
-        self.assertEqual(sorted(g.loops()), [13])
+        assert sorted(g.loops()) == [13]
         loop = g.loops()[13]
-        self.assertEqual(loop.header, 13)
-        self.assertEqual(sorted(loop.entries), [10])
-        self.assertEqual(sorted(loop.exits), [])
-        self.assertEqual(sorted(loop.body), [13, 19, 26])
+        assert loop.header == 13
+        assert sorted(loop.entries) == [10]
+        assert sorted(loop.exits) == []
+        assert sorted(loop.body) == [13, 19, 26]
         for node in [0, 6, 10]:
-            self.assertEqual(g.in_loops(node), [])
+            assert g.in_loops(node) == []
         for node in [13, 19, 26]:
-            self.assertEqual(g.in_loops(node), [loop])
+            assert g.in_loops(node) == [loop]
 
         g = self.infinite_loop2()
         # Loop headers
-        self.assertEqual(sorted(g.loops()), [3])
+        assert sorted(g.loops()) == [3]
         loop = g.loops()[3]
-        self.assertEqual(loop.header, 3)
-        self.assertEqual(sorted(loop.entries), [0])
-        self.assertEqual(sorted(loop.exits), [])
-        self.assertEqual(sorted(loop.body), [3, 9, 16])
+        assert loop.header == 3
+        assert sorted(loop.entries) == [0]
+        assert sorted(loop.exits) == []
+        assert sorted(loop.body) == [3, 9, 16]
         for node in [0]:
-            self.assertEqual(g.in_loops(node), [])
+            assert g.in_loops(node) == []
         for node in [3, 9, 16]:
-            self.assertEqual(g.in_loops(node), [loop])
+            assert g.in_loops(node) == [loop]
 
         g = self.multiple_exits()
         # Loop headers
-        self.assertEqual(sorted(g.loops()), [7])
+        assert sorted(g.loops()) == [7]
         loop = g.loops()[7]
-        self.assertEqual(loop.header, 7)
-        self.assertEqual(sorted(loop.entries), [0])
-        self.assertEqual(sorted(loop.exits), [19, 29, 36])
-        self.assertEqual(sorted(loop.body), [7, 10, 23])
+        assert loop.header == 7
+        assert sorted(loop.entries) == [0]
+        assert sorted(loop.exits) == [19, 29, 36]
+        assert sorted(loop.body) == [7, 10, 23]
         for node in [0, 19, 29, 36]:
-            self.assertEqual(g.in_loops(node), [])
+            assert g.in_loops(node) == []
         for node in [7, 10, 23]:
-            self.assertEqual(g.in_loops(node), [loop])
+            assert g.in_loops(node) == [loop]
 
     def test_loop_dfs_pathological(self):
         # The follow adjlist is an export from the reproducer in #6186
@@ -1164,8 +1154,8 @@ def test_loop_dfs_pathological(self):
         stats = {}
         # Compute backedges and store the iteration count for testing
         back_edges = g._find_back_edges(stats=stats)
-        self.assertEqual(back_edges, {(666, 610), (778, 722)})
-        self.assertEqual(stats["iteration_count"], 155)
+        assert back_edges == {(666, 610), (778, 722)}
+        assert stats["iteration_count"] == 155
 
     def test_equals(self):
         def get_new():
@@ -1178,19 +1168,19 @@ def get_new():
         y = get_new()
 
         # identical
-        self.assertEqual(x, y)
+        assert x == y
 
         # identical but defined in a different order
         g = self.from_adj_list({0: [12, 18], 18: [21], 21: [], 12: [21]})
         g.set_entry_point(0)
         g.process()
-        self.assertEqual(x, g)
+        assert x == g
 
         # different entry point
         z = get_new()
         z.set_entry_point(18)
         z.process()
-        self.assertNotEqual(x, z)
+        assert x != z
 
         # extra node/edge, same entry point
         z = self.from_adj_list(
@@ -1198,7 +1188,7 @@ def get_new():
         )
         z.set_entry_point(0)
         z.process()
-        self.assertNotEqual(x, z)
+        assert x != z
 
         # same nodes, different edges
         a = self.from_adj_list({0: [18, 12], 12: [0], 18: []})
@@ -1207,10 +1197,10 @@ def get_new():
         z = self.from_adj_list({0: [18, 12], 12: [18], 18: []})
         z.set_entry_point(0)
         z.process()
-        self.assertNotEqual(a, z)
+        assert a != z
 
 
-class TestRealCodeDomFront(TestCase):
+class TestRealCodeDomFront:
     """Test IDOM and DOMFRONT computation on real python bytecode.
     Note: there will be less testing on IDOM (esp in loop) because of
     the extra blocks inserted by the interpreter.  But, testing on DOMFRONT
@@ -1290,8 +1280,8 @@ def foo(n):
         # Also, `SET_BLOCK_B0` is duplicated. As a result, the second B0
         # is picked up by `blkpts`.
         domfront = cfa.graph.dominance_frontier()
-        self.assertFalse(domfront[blkpts["A"]])
-        self.assertFalse(domfront[blkpts["C"]])
+        assert not domfront[blkpts["A"]]
+        assert not domfront[blkpts["C"]]
 
     def test_loop_nested_and_break(self):
         def foo(n):
@@ -1310,16 +1300,16 @@ def foo(n):
             SET_BLOCK_G  # noqa: F821
 
         cfa, blkpts = self.get_cfa_and_namedblocks(foo)
-        self.assertEqual(blkpts["D0"], blkpts["C1"])
+        assert blkpts["D0"] == blkpts["C1"]
 
         # Py3.10 changes while loop into if-do-while
         domfront = cfa.graph.dominance_frontier()
-        self.assertFalse(domfront[blkpts["A"]])
-        self.assertFalse(domfront[blkpts["G"]])
+        assert not domfront[blkpts["A"]]
+        assert not domfront[blkpts["G"]]
         # 2 domfront members for C1
         # C0 because of the loop; F because of the break.
-        self.assertEqual({blkpts["F"]}, domfront[blkpts["D1"]])
-        self.assertEqual({blkpts["E"]}, domfront[blkpts["D2"]])
+        assert {blkpts["F"]} == domfront[blkpts["D1"]]
+        assert {blkpts["E"]} == domfront[blkpts["D2"]]
 
     def test_if_else(self):
         def foo(a, b):
@@ -1345,23 +1335,23 @@ def foo(a, b):
         cfa, blkpts = self.get_cfa_and_namedblocks(foo)
 
         idoms = cfa.graph.immediate_dominators()
-        self.assertEqual(blkpts["A"], idoms[blkpts["B"]])
-        self.assertEqual(blkpts["A"], idoms[blkpts["C0"]])
-        self.assertEqual(blkpts["C0"], idoms[blkpts["C1"]])
-        self.assertEqual(blkpts["C0"], idoms[blkpts["D"]])
-        self.assertEqual(blkpts["A"], idoms[blkpts["E"]])
-        self.assertEqual(blkpts["E"], idoms[blkpts["F"]])
-        self.assertEqual(blkpts["E"], idoms[blkpts["G"]])
+        assert blkpts["A"] == idoms[blkpts["B"]]
+        assert blkpts["A"] == idoms[blkpts["C0"]]
+        assert blkpts["C0"] == idoms[blkpts["C1"]]
+        assert blkpts["C0"] == idoms[blkpts["D"]]
+        assert blkpts["A"] == idoms[blkpts["E"]]
+        assert blkpts["E"] == idoms[blkpts["F"]]
+        assert blkpts["E"] == idoms[blkpts["G"]]
 
         domfront = cfa.graph.dominance_frontier()
-        self.assertFalse(domfront[blkpts["A"]])
-        self.assertFalse(domfront[blkpts["E"]])
-        self.assertFalse(domfront[blkpts["G"]])
-        self.assertEqual({blkpts["E"]}, domfront[blkpts["B"]])
-        self.assertEqual({blkpts["E"]}, domfront[blkpts["C0"]])
-        self.assertEqual({blkpts["E"]}, domfront[blkpts["C1"]])
-        self.assertEqual({blkpts["E"]}, domfront[blkpts["D"]])
-        self.assertEqual({blkpts["G"]}, domfront[blkpts["F"]])
+        assert not domfront[blkpts["E"]]
+        assert not domfront[blkpts["A"]]
+        assert not domfront[blkpts["G"]]
+        assert {blkpts["E"]} == domfront[blkpts["B"]]
+        assert {blkpts["E"]} == domfront[blkpts["C0"]]
+        assert {blkpts["E"]} == domfront[blkpts["C1"]]
+        assert {blkpts["E"]} == domfront[blkpts["D"]]
+        assert {blkpts["G"]} == domfront[blkpts["F"]]
 
     def test_if_else_nested(self):
         def foo():
@@ -1385,20 +1375,20 @@ def foo():
         cfa, blkpts = self.get_cfa_and_namedblocks(foo)
 
         idoms = cfa.graph.immediate_dominators()
-        self.assertEqual(blkpts["A0"], idoms[blkpts["A1"]])
-        self.assertEqual(blkpts["A1"], idoms[blkpts["B1"]])
-        self.assertEqual(blkpts["A1"], idoms[blkpts["C0"]])
-        self.assertEqual(blkpts["C0"], idoms[blkpts["D"]])
-        self.assertEqual(blkpts["A1"], idoms[blkpts["E"]])
-        self.assertEqual(blkpts["A0"], idoms[blkpts["F"]])
+        assert blkpts["A0"] == idoms[blkpts["A1"]]
+        assert blkpts["A1"] == idoms[blkpts["B1"]]
+        assert blkpts["A1"] == idoms[blkpts["C0"]]
+        assert blkpts["C0"] == idoms[blkpts["D"]]
+        assert blkpts["A1"] == idoms[blkpts["E"]]
+        assert blkpts["A0"] == idoms[blkpts["F"]]
 
         domfront = cfa.graph.dominance_frontier()
-        self.assertFalse(domfront[blkpts["A0"]])
-        self.assertFalse(domfront[blkpts["F"]])
-        self.assertEqual({blkpts["E"]}, domfront[blkpts["B1"]])
-        self.assertEqual({blkpts["D"]}, domfront[blkpts["C1"]])
-        self.assertEqual({blkpts["E"]}, domfront[blkpts["D"]])
-        self.assertEqual({blkpts["F"]}, domfront[blkpts["E"]])
+        assert not domfront[blkpts["A0"]]
+        assert not domfront[blkpts["F"]]
+        assert {blkpts["E"]} == domfront[blkpts["B1"]]
+        assert {blkpts["D"]} == domfront[blkpts["C1"]]
+        assert {blkpts["E"]} == domfront[blkpts["D"]]
+        assert {blkpts["F"]} == domfront[blkpts["E"]]
 
     def test_infinite_loop(self):
         def foo():
@@ -1414,20 +1404,16 @@ def foo():
 
         idoms = cfa.graph.immediate_dominators()
         if utils.PYVERSION >= (3, 10):
-            self.assertNotIn("E", blkpts)
+            assert "E" not in blkpts
         else:
-            self.assertNotIn(blkpts["E"], idoms)
-        self.assertEqual(blkpts["B"], idoms[blkpts["C"]])
-        self.assertEqual(blkpts["B"], idoms[blkpts["D"]])
+            assert blkpts["E"] not in idoms
+        assert blkpts["B"] == idoms[blkpts["C"]]
+        assert blkpts["B"] == idoms[blkpts["D"]]
 
         domfront = cfa.graph.dominance_frontier()
         if utils.PYVERSION < (3, 10):
-            self.assertNotIn(blkpts["E"], domfront)
-        self.assertFalse(domfront[blkpts["A"]])
-        self.assertFalse(domfront[blkpts["C"]])
-        self.assertEqual({blkpts["B"]}, domfront[blkpts["B"]])
-        self.assertEqual({blkpts["B"]}, domfront[blkpts["D"]])
-
-
-if __name__ == "__main__":
-    unittest.main()
+            assert blkpts["E"] not in domfront
+        assert not domfront[blkpts["A"]]
+        assert not domfront[blkpts["C"]]
+        assert {blkpts["B"]} == domfront[blkpts["B"]]
+        assert {blkpts["B"]} == domfront[blkpts["D"]]
diff --git a/numba_cuda/numba/cuda/tests/support.py b/numba_cuda/numba/cuda/tests/support.py
index 29805a333..1ca80c790 100644
--- a/numba_cuda/numba/cuda/tests/support.py
+++ b/numba_cuda/numba/cuda/tests/support.py
@@ -261,6 +261,11 @@ def unbox_dummy(typ, obj, c):
     return Dummy, DummyType
 
 
+@pytest.fixture
+def random():
+    return np.random.RandomState(42)
+
+
 def run_in_subprocess(code, flags=(), env=None, timeout=30):
     """Run a snippet of Python code in a subprocess with flags, if any are
     given. 'env' is passed to subprocess.Popen(). 'timeout' is passed to
@@ -708,14 +713,7 @@ def _fix_strides(arr):
 
 
 class TestCase(unittest.TestCase):
-    longMessage = True
-
-    # A random state yielding the same random numbers for any test case.
-    # Use as `self.random.<method name>`
-    @cached_property
-    def random(self):
-        return np.random.RandomState(42)
-
+    pass
     # TODO: safe to delete since it's not used anywhere in numba_cuda?
     # @contextlib.contextmanager
     # def assertTypingError(self):

From 5481099b2f3f59a7984002e8e17c4795f46ac453 Mon Sep 17 00:00:00 2001
From: Andres Guzman-Ballen <acosmicflamingo@gmail.com>
Date: Mon, 11 May 2026 11:47:06 -0500
Subject: [PATCH 07/10] Refactor test_id generating behavior for pytest usage

Incorporating Graham Markall's suggestions; archaic functions have
been removed, and unique test ids are now generated using pytest
fixtures. Test IDs have a class scope and monotonically increase
whenever pytest calls test functions.

Create numba.cuda.tests.test_support module to house tests for
numba.cuda.tests.support (used to test test_id generation
behavior)

Migrate test_overload module to exclusively using pytest
---
 .../numba/cuda/tests/cudapy/test_overload.py  | 25 +++--
 numba_cuda/numba/cuda/tests/support.py        | 92 ++++---------------
 numba_cuda/numba/cuda/tests/test_support.py   | 22 +++++
 3 files changed, 49 insertions(+), 90 deletions(-)
 create mode 100644 numba_cuda/numba/cuda/tests/test_support.py

diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_overload.py b/numba_cuda/numba/cuda/tests/cudapy/test_overload.py
index a6a14d407..7c85aa6e3 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_overload.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_overload.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: BSD-2-Clause
 
 import numpy as np
+import pytest
 
 from numba import cuda
 from numba.cuda import types
@@ -17,12 +18,10 @@
 from numba.cuda.typing.typeof import typeof
 from numba.core.typing.typeof import typeof as cpu_typeof
 from numba.cuda.testing import (
-    CUDATestCase,
     skip_on_cudasim,
-    unittest,
     skip_on_standalone_numba_cuda,
 )
-from numba.cuda.tests.support import make_dummy_type
+from numba.cuda.tests.support import test_id_generator, make_dummy_type
 
 # Dummy function definitions to overload
 
@@ -232,17 +231,17 @@ def impl(out, x, y=5, z=6):
 
 
 @skip_on_cudasim("Overloading not supported in cudasim")
-class TestOverload(CUDATestCase):
+class TestOverload:
     def check_overload(self, kernel, expected):
         x = np.ones(1, dtype=np.int32)
         cuda.jit(kernel)[1, 1](x)
-        self.assertEqual(x[0], expected)
+        assert x[0] == expected
 
     @skip_on_standalone_numba_cuda
     def check_overload_cpu(self, kernel, expected):
         x = np.ones(1, dtype=np.int32)
         njit(kernel)(x)
-        self.assertEqual(x[0], expected)
+        assert x[0] == expected
 
     def test_generic(self):
         def kernel(x):
@@ -349,8 +348,8 @@ def kernel(x):
         self.check_overload_cpu(kernel, expected)
 
     @skip_on_standalone_numba_cuda
-    def test_overload_attribute_target(self):
-        MyDummy, MyDummyType = make_dummy_type(self)
+    def test_overload_attribute_target(self, test_id):
+        MyDummy, MyDummyType = make_dummy_type(test_id)
         mydummy_type_cpu = cpu_typeof(MyDummy())  # For @njit (cpu)
         mydummy_type = typeof(MyDummy())  # For @cuda.jit (CUDA)
 
@@ -372,12 +371,14 @@ def imp(obj):
         else:
             msg = "Unknown attribute 'cuda_only'"
 
-        with self.assertRaisesRegex(TypingError, msg):
+        with pytest.raises(TypingError) as exc_info:
 
             @njit(types.int64(mydummy_type_cpu))
             def illegal_target_attr_use(x):
                 return x.cuda_only
 
+        assert exc_info.type is TypingError
+
         # Ensure that the CUDA target-specific attribute is usable and works
         # correctly when the target is CUDA - note eager compilation via
         # signature
@@ -396,8 +397,4 @@ def kernel(a, b, out):
 
         out = np.empty(2, dtype=np.int64)
         kernel[1, 1](1, 2, out)
-        self.assertEqual(tuple(out), (6, 2))
-
-
-if __name__ == "__main__":
-    unittest.main()
+        assert tuple(out) == (6, 2)
diff --git a/numba_cuda/numba/cuda/tests/support.py b/numba_cuda/numba/cuda/tests/support.py
index 1ca80c790..ee929198f 100644
--- a/numba_cuda/numba/cuda/tests/support.py
+++ b/numba_cuda/numba/cuda/tests/support.py
@@ -10,6 +10,7 @@
 import unittest
 import os
 import io
+import itertools
 import subprocess
 import sys
 import shutil
@@ -222,15 +223,25 @@ def override_config(name, value):
         setattr(config, name, old_value)
 
 
-# TODO: how to generate self.id() without access to instance?
-def make_dummy_type(test_obj: unittest.TestCase):
+# This can certainly be divided into two pytest fixtures. However, doing so
+# means developers will have to import both 'test_id_generator' and the fixture
+# returning the itertools.count() generator. Importing could be avoided
+# altogether by adding the fixtures in conftest.py, but that potentially
+# pollutes the namespace
+@pytest.fixture(name="test_id")
+def test_id_generator(request):
+    cls = request.cls
+    if not hasattr(cls, "_test_id_counter"):
+        cls._test_id_counter = itertools.count()
+    yield next(cls._test_id_counter)
+
+
+def make_dummy_type(test_id):
     """
     Use to generate a dummy type unique to this test. Returns a python
     Dummy class and a corresponding Numba type DummyType.
     """
 
-    # Use test_id to make sure no collision is possible.
-    test_id = test_obj.id()
     DummyType = type("DummyTypeFor{}".format(test_id), (types.Opaque,), {})
 
     dummy_type = DummyType("my_dummy")
@@ -713,78 +724,7 @@ def _fix_strides(arr):
 
 
 class TestCase(unittest.TestCase):
-    pass
-    # TODO: safe to delete since it's not used anywhere in numba_cuda?
-    # @contextlib.contextmanager
-    # def assertTypingError(self):
-    #     """
-    #     A context manager that asserts the enclosed code block fails
-    #     compiling in nopython mode.
-    #     """
-    #     _accepted_errors = (
-    #         errors.LoweringError,
-    #         errors.TypingError,
-    #         TypeError,
-    #         NotImplementedError,
-    #     )
-    #     with self.assertRaises(_accepted_errors) as cm:
-    #         yield cm
-
-    # TODO: safe to delete since it's not used anywhere in numba_cuda?
-    # @contextlib.contextmanager
-    # def assertRefCount(self, *objects):
-    #     """
-    #     A context manager that asserts the given objects have the
-    #     same reference counts before and after executing the
-    #     enclosed block.
-    #     """
-    #     old_refcounts = [sys.getrefcount(x) for x in objects]
-    #     yield
-    #     gc.collect()
-    #     new_refcounts = [sys.getrefcount(x) for x in objects]
-    #     for old, new, obj in zip(old_refcounts, new_refcounts, objects):
-    #         if old != new:
-    #             self.fail(
-    #                 "Refcount changed from %d to %d for object: %r"
-    #                 % (old, new, obj)
-    #             )
-
-    # TODO: safe to delete since it's not used anywhere in numba_cuda?
-    # def assertRefCountEqual(self, *objects):
-    #     gc.collect()
-    #     rc = [sys.getrefcount(x) for x in objects]
-    #     rc_0 = rc[0]
-    #     for i in range(len(objects))[1:]:
-    #         rc_i = rc[i]
-    #         if rc_0 != rc_i:
-    #             self.fail(
-    #                 f"Refcount for objects does not match. "
-    #                 f"#0({rc_0}) != #{i}({rc_i}) does not match."
-    #             )
-
-    # @contextlib.contextmanager
-    # def assertNoNRTLeak(self):
-    #     """
-    #     A context manager that asserts no NRT leak was created during
-    #     the execution of the enclosed block.
-    #     """
-    #     old = rtsys.get_allocation_stats()
-    #     yield
-    #     new = rtsys.get_allocation_stats()
-    #     total_alloc = new.alloc - old.alloc
-    #     total_free = new.free - old.free
-    #     total_mi_alloc = new.mi_alloc - old.mi_alloc
-    #     total_mi_free = new.mi_free - old.mi_free
-    #     self.assertEqual(
-    #         total_alloc,
-    #         total_free,
-    #         "number of data allocs != number of data frees",
-    #     )
-    #     self.assertEqual(
-    #         total_mi_alloc,
-    #         total_mi_free,
-    #         "number of meminfo allocs != number of meminfo frees",
-    #     )
+    longMessage = True
 
 
 class MemoryLeak:
diff --git a/numba_cuda/numba/cuda/tests/test_support.py b/numba_cuda/numba/cuda/tests/test_support.py
new file mode 100644
index 000000000..2cc7f3028
--- /dev/null
+++ b/numba_cuda/numba/cuda/tests/test_support.py
@@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: BSD-2-Clause
+
+import pytest
+
+from .support import test_id_generator
+
+
+class TestMonotonicallyIncreasingID:
+    def test_id_starts_zero(self, test_id):
+        assert test_id == 0
+
+    def test_id_increments(self, test_id):
+        assert test_id == 1
+
+
+class TestIDResetsInSeparateClass:
+    def test_id_starts_zero_again(self, test_id):
+        assert test_id == 0
+
+    def test_id_increments_again(self, test_id):
+        assert test_id == 1

From a577458b4c3c9030aa0292ac85c78eb835d7eb42 Mon Sep 17 00:00:00 2001
From: Andres Guzman-Ballen <acosmicflamingo@gmail.com>
Date: Mon, 11 May 2026 15:56:16 -0500
Subject: [PATCH 08/10] Remove TestCase class from support.py

All tests previously inheriting from TestCase class now inherit
directly from unittest.TestCase class instead.
---
 numba_cuda/numba/cuda/testing.py                     |  3 +--
 numba_cuda/numba/cuda/tests/core/test_serialize.py   |  7 +++----
 numba_cuda/numba/cuda/tests/cudapy/test_analysis.py  |  3 +--
 .../numba/cuda/tests/cudapy/test_array_reductions.py |  5 +++--
 numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py  |  3 +--
 numba_cuda/numba/cuda/tests/cudapy/test_caching.py   |  3 +--
 numba_cuda/numba/cuda/tests/cudapy/test_extending.py | 12 ++++++------
 numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py    |  4 ++--
 numba_cuda/numba/cuda/tests/support.py               |  4 ----
 9 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/numba_cuda/numba/cuda/testing.py b/numba_cuda/numba/cuda/testing.py
index 7ec509ad0..33f705366 100644
--- a/numba_cuda/numba/cuda/testing.py
+++ b/numba_cuda/numba/cuda/testing.py
@@ -12,7 +12,6 @@
 from numba.cuda.cudadrv import driver, devices, libs
 from numba.cuda.dispatcher import CUDADispatcher
 from numba.cuda import config
-from numba.cuda.tests.support import TestCase
 from pathlib import Path
 
 from typing import Iterable, Union
@@ -32,7 +31,7 @@
 
 
 @pytest.mark.usefixtures("initialize_from_pytest_config")
-class CUDATestCase(TestCase):
+class CUDATestCase(unittest.TestCase):
     """
     For tests that use a CUDA device.
 
diff --git a/numba_cuda/numba/cuda/tests/core/test_serialize.py b/numba_cuda/numba/cuda/tests/core/test_serialize.py
index aee32ae27..3dc56ac1a 100644
--- a/numba_cuda/numba/cuda/tests/core/test_serialize.py
+++ b/numba_cuda/numba/cuda/tests/core/test_serialize.py
@@ -18,14 +18,13 @@
     from numba.core.target_extension import resolve_dispatcher_from_str
 else:
     from numba.cuda.core.errors import TypingError
-from numba.cuda.tests.support import TestCase
 from numba.cuda.cloudpickle import dumps, loads
 from numba.cuda.testing import skip_on_standalone_numba_cuda
 from numba.cuda.tests.support import assertPreciseEqual
 
 
 @skip_on_standalone_numba_cuda
-class TestDispatcherPickling(TestCase):
+class TestDispatcherPickling(unittest.TestCase):
     def run_with_protocols(self, meth, *args, **kwargs):
         for proto in range(pickle.HIGHEST_PROTOCOL + 1):
             meth(proto, *args, **kwargs)
@@ -235,7 +234,7 @@ def foo(x):
         subprocess.check_call([sys.executable, "-c", code])
 
 
-class TestSerializationMisc(TestCase):
+class TestSerializationMisc(unittest.TestCase):
     def test_numba_unpickle(self):
         # Test that _numba_unpickle is memorizing its output
         from numba.cuda.serialize import _numba_unpickle
@@ -252,7 +251,7 @@ def test_numba_unpickle(self):
         self.assertIs(got1, got2)
 
 
-class TestCloudPickleIssues(TestCase):
+class TestCloudPickleIssues(unittest.TestCase):
     """This test case includes issues specific to the cloudpickle implementation."""
 
     _numba_parallel_test_ = False
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_analysis.py b/numba_cuda/numba/cuda/tests/cudapy/test_analysis.py
index 909358ac0..00cc1ff46 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_analysis.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_analysis.py
@@ -19,7 +19,6 @@
 from numba.cuda.core.inline_closurecall import InlineClosureCallPass
 from numba.cuda.tests.support import (
     assertPreciseEqual,
-    TestCase,
     override_config,
 )
 from numba.cuda.core.analysis import (
@@ -54,7 +53,7 @@ def compile_to_ir(func):
     return func_ir
 
 
-class TestBranchPruneBase(TestCase):
+class TestBranchPruneBase(unittest.TestCase):
     """
     Tests branch pruning
     """
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
index aaa53d17c..54705cd5e 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_array_reductions.py
@@ -1,8 +1,9 @@
 # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: BSD-2-Clause
 import numpy as np
+import unittest
 
-from numba.cuda.tests.support import TestCase, MemoryLeakMixin
+from numba.cuda.tests.support import MemoryLeakMixin
 from numba import cuda
 from numba.cuda.testing import skip_on_cudasim, skip_on_nvjitlink_13_1_sm_120
 from numba.cuda.misc.special import literal_unroll
@@ -11,7 +12,7 @@
 
 
 @skip_on_cudasim("doesn't work in the simulator")
-class TestArrayReductions(MemoryLeakMixin, TestCase):
+class TestArrayReductions(MemoryLeakMixin, unittest.TestCase):
     """
     Test array reduction methods and functions such as .sum(), .max(), etc.
     """
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py b/numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py
index 20574c0a7..ef84e7b00 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_byteflow.py
@@ -7,11 +7,10 @@
 
 import unittest
 
-from numba.cuda.tests.support import TestCase
 from numba.cuda.compiler import run_frontend
 
 
-class TestByteFlowIssues(TestCase):
+class TestByteFlowIssues(unittest.TestCase):
     def test_issue_5087(self):
         # This is an odd issue. The exact number of print below is
         # necessary to trigger it. Too many or too few will alter the behavior.
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_caching.py b/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
index 2da869a4d..bce960db5 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_caching.py
@@ -23,7 +23,6 @@
 )
 from numba.cuda.tests.support import (
     assertPreciseEqual,
-    TestCase,
     temp_directory,
     import_dynamic,
 )
@@ -34,7 +33,7 @@
 GLOBAL_DEVICE_ARRAY = None
 
 
-class BaseCacheTest(TestCase):
+class BaseCacheTest(unittest.TestCase):
     # The source file that will be copied
     usecases_file = None
     # Make sure this doesn't conflict with another module
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_extending.py b/numba_cuda/numba/cuda/tests/cudapy/test_extending.py
index 38c25921c..5935b5a4b 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_extending.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_extending.py
@@ -26,7 +26,7 @@
 import numba
 from numba import njit
 from numba.cuda import cgutils, jit
-from numba.cuda.tests.support import TestCase, override_config
+from numba.cuda.tests.support import override_config
 from numba.cuda.typing.templates import AttributeTemplate
 from numba.cuda.cudadecl import registry as cuda_registry
 from numba.cuda.cudaimpl import lower_attr as cuda_lower_attr
@@ -386,7 +386,7 @@ def foo(r, x):
         np.testing.assert_equal(r, x * 2)
 
 
-class TestLowLevelExtending(TestCase):
+class TestLowLevelExtending(unittest.TestCase):
     """
     Test the low-level two-tier extension API.
     """
@@ -418,7 +418,7 @@ def test_type_callable_keeps_function(self):
         self.assertIsNotNone(type_func1)
 
 
-class TestHighLevelExtending(TestCase):
+class TestHighLevelExtending(unittest.TestCase):
     """
     Test the high-level combined API.
     """
@@ -708,7 +708,7 @@ def _assert_cache_stats(cfunc, expect_hit, expect_misses):
         raise AssertionError("cache not used")
 
 
-class TestIntrinsic(TestCase):
+class TestIntrinsic(unittest.TestCase):
     def test_void_return(self):
         """
         Verify that returning a None from codegen function is handled
@@ -885,7 +885,7 @@ def bar(x, y):
         self.assertEqual(x[0], 16)
 
 
-class TestOverloadPreferLiteral(TestCase):
+class TestOverloadPreferLiteral(unittest.TestCase):
     def test_overload(self):
         def prefer_lit(x):
             pass
@@ -942,7 +942,7 @@ def check_non_lit(x, res):
         self.assertEqual(c, 300)
 
 
-class TestNumbaInternalOverloads(TestCase):
+class TestNumbaInternalOverloads(unittest.TestCase):
     def test_signatures_match_overloaded_api(self):
         # This is a "best-effort" test to try and ensure that Numba's internal
         # overload declarations have signatures with argument names that match
diff --git a/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py b/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py
index 8222f9866..4c0b3ac17 100644
--- a/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py
+++ b/numba_cuda/numba/cuda/tests/cudapy/test_ufuncs.py
@@ -15,7 +15,7 @@
 from numba.cuda.testing import skip_on_standalone_numba_cuda
 from numba.cuda.typing.typeof import typeof
 from numba.cuda.np import numpy_support
-from numba.cuda.tests.support import TestCase, reset_module_warnings
+from numba.cuda.tests.support import reset_module_warnings
 
 
 class BaseUFuncTest:
@@ -292,7 +292,7 @@ def _make_ufunc_usecase(ufunc):
 # global state, we implement the necessary part of CUDATestCase within this
 # class instead. This disables CUDA performance warnings for the duration of
 # tests.
-class CUDAUFuncTestBase(BasicUFuncTest, TestCase):
+class CUDAUFuncTestBase(BasicUFuncTest, unittest.TestCase):
     def setUp(self):
         BasicUFuncTest.setUp(self)
 
diff --git a/numba_cuda/numba/cuda/tests/support.py b/numba_cuda/numba/cuda/tests/support.py
index ee929198f..3d3a5d649 100644
--- a/numba_cuda/numba/cuda/tests/support.py
+++ b/numba_cuda/numba/cuda/tests/support.py
@@ -723,10 +723,6 @@ def _fix_strides(arr):
         ]
 
 
-class TestCase(unittest.TestCase):
-    longMessage = True
-
-
 class MemoryLeak:
     __enable_leak_check = True
 

From 7e91f0950f0d35f09a87c1a6f42da172a72e3ce5 Mon Sep 17 00:00:00 2001
From: Andres Guzman-Ballen <acosmicflamingo@gmail.com>
Date: Mon, 11 May 2026 22:29:28 -0500
Subject: [PATCH 09/10] Address conda-specific CI failure

---
 numba_cuda/numba/cuda/tests/support.py | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/numba_cuda/numba/cuda/tests/support.py b/numba_cuda/numba/cuda/tests/support.py
index 3d3a5d649..90401599f 100644
--- a/numba_cuda/numba/cuda/tests/support.py
+++ b/numba_cuda/numba/cuda/tests/support.py
@@ -381,6 +381,7 @@ def subprocess_test_runner(
         *flags_args,
         "-m",
         "pytest",
+        "--import-mode=importlib",
         "--pyargs",
         fully_qualified_test,
     ]
@@ -404,15 +405,24 @@ def subprocess_test_runner(
         f"\ncaptured stdout: {status.stdout}\ncaptured stderr: {status.stderr}"
     )
     assert status.returncode == 0, streams
-    # Python 3.12.1 report
+
+    _IGNORED_STDERR_PATTERNS = [
+        "PytestBenchmarkWarning",
+        "Benchmarks are automatically disabled",
+    ]
+
+    def _unexpected_stderr_found(stderr):
+        lines = stderr.splitlines()
+        return any(
+            line and not any(pat in line for pat in _IGNORED_STDERR_PATTERNS)
+            for line in lines
+        )
+
     no_tests_ran = "NO TESTS RAN"
     if no_tests_ran in status.stderr:
         pytest.skip(no_tests_ran)
-    else:
-        # status.stderr for successful runs comprise a string like
-        # "...Ran 1 test in 0.565s\n\nOK\n". Migrating to pytest means the
-        # error stream in successful runs are empty
-        assert status.stderr == ""
+    elif _unexpected_stderr_found(status.stderr):
+        pytest.fail(f"Unexpected stderr output:\n{status.stderr}")
     return status
 
 

From 33ad2570a1fddc78f68e8180d897bd28fd27d09c Mon Sep 17 00:00:00 2001
From: Andres Guzman-Ballen <acosmicflamingo@gmail.com>
Date: Tue, 12 May 2026 11:46:59 -0500
Subject: [PATCH 10/10] Simplify test_id generation to support tests running in
 parallel

Generating monotonically increasing number for test_id only works
when tests are run sequentially. To support running in parallel,
let's just use pytest's request.node.nodeid attribute to get the
same behavior we had before using unittest.TestCase.id (e.g.
cudapy/test_foo.py::TestFoo::test_foo1)
---
 numba_cuda/numba/cuda/tests/support.py      | 12 +++--------
 numba_cuda/numba/cuda/tests/test_support.py | 22 ---------------------
 2 files changed, 3 insertions(+), 31 deletions(-)
 delete mode 100644 numba_cuda/numba/cuda/tests/test_support.py

diff --git a/numba_cuda/numba/cuda/tests/support.py b/numba_cuda/numba/cuda/tests/support.py
index 90401599f..3cc97d281 100644
--- a/numba_cuda/numba/cuda/tests/support.py
+++ b/numba_cuda/numba/cuda/tests/support.py
@@ -223,17 +223,11 @@ def override_config(name, value):
         setattr(config, name, old_value)
 
 
-# This can certainly be divided into two pytest fixtures. However, doing so
-# means developers will have to import both 'test_id_generator' and the fixture
-# returning the itertools.count() generator. Importing could be avoided
-# altogether by adding the fixtures in conftest.py, but that potentially
-# pollutes the namespace
+# Returns string in following style:
+# 'cudapy/test_foo.py::TestFoo::test_foo1'
 @pytest.fixture(name="test_id")
 def test_id_generator(request):
-    cls = request.cls
-    if not hasattr(cls, "_test_id_counter"):
-        cls._test_id_counter = itertools.count()
-    yield next(cls._test_id_counter)
+    return request.node.nodeid
 
 
 def make_dummy_type(test_id):
diff --git a/numba_cuda/numba/cuda/tests/test_support.py b/numba_cuda/numba/cuda/tests/test_support.py
deleted file mode 100644
index 2cc7f3028..000000000
--- a/numba_cuda/numba/cuda/tests/test_support.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: BSD-2-Clause
-
-import pytest
-
-from .support import test_id_generator
-
-
-class TestMonotonicallyIncreasingID:
-    def test_id_starts_zero(self, test_id):
-        assert test_id == 0
-
-    def test_id_increments(self, test_id):
-        assert test_id == 1
-
-
-class TestIDResetsInSeparateClass:
-    def test_id_starts_zero_again(self, test_id):
-        assert test_id == 0
-
-    def test_id_increments_again(self, test_id):
-        assert test_id == 1