Update tests for random.choice

antonwolfy · antonwolfy · commit 8f8a1f9ef2c7 · 2026-01-19T07:45:16.000-08:00
diff --git a/dpnp/tests/third_party/cupy/random_tests/test_generator.py b/dpnp/tests/third_party/cupy/random_tests/test_generator.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import functools
 import os
 import threading
@@ -850,6 +852,7 @@ def test_goodness_of_fit(self):
         assert _hypothesis.chi_square_test(counts, expected)
 
     @_condition.repeat(3, 10)
+    # @pytest.mark.xfail(runtime.is_hip, reason="ROCm/HIP may have a bug")
     def test_goodness_of_fit_2(self):
         vals = self.generate(3, (5, 20), True, [0.3, 0.3, 0.4]).get()
         counts = numpy.histogram(vals, bins=numpy.arange(4))[0]
@@ -929,6 +932,163 @@ def test_bound(self):
         assert numpy.unique(val).size == val.size
 
 
+@testing.parameterize(
+    # Edge cases with small domain sizes
+    {"a": 0, "size": 0},
+    {"a": 1, "size": 1},
+    {"a": 2, "size": 1},
+    {"a": 256, "size": 100},  # Minimum cipher bits threshold
+    {"a": 257, "size": 100},
+    # large scalare uniqueness
+    {"a": 100, "size": 50},
+    {"a": 1000, "size": 500},
+    {"a": 10000, "size": 5000},
+    {"a": 100000, "size": 50000},
+    # full inpupt permutation
+    {"a": 10, "size": 10},
+    {"a": 100, "size": 100},
+    {"a": 1000, "size": 1000},
+    # Power of 2
+    {"a": 2**8, "size": 100},
+    {"a": 2**10, "size": 500},
+    {"a": 2**16, "size": 1000},
+    {"a": 2**20, "size": 5000},
+    {"a": 2**24, "size": 10000},
+    # Just below power of 2
+    {"a": 2**8 - 1, "size": 100},
+    {"a": 2**16 - 1, "size": 1000},
+    {"a": 2**20 - 1, "size": 5000},
+    # Just above power of 2
+    {"a": 2**8 + 1, "size": 100},
+    {"a": 2**16 + 1, "size": 1000},
+    {"a": 2**20 + 1, "size": 5000},
+    # Test multi-dimensional shapes.
+    {"a": 6, "size": (2, 3)},
+    {"a": 32, "size": (4, 5)},
+    {"a": 120, "size": (5, 4, 5)},
+)
+@testing.fix_random()
+class TestChoiceReplaceFalseLargeScale(RandomGeneratorTestCase):
+    """Test large-scale uniqueness for Feistel bijection implementation."""
+
+    target_method = "choice"
+
+    def test_uniqueness_and_bounds(self):
+        """Test that samples have no duplicates and correct bounds."""
+        val = self.generate(a=self.a, size=self.size, replace=False).get()
+        size = self.size if isinstance(self.size, tuple) else (self.size,)
+
+        # Check shape
+        assert val.shape == size
+
+        # Check bounds
+        assert (0 <= val).all()
+        assert (val < self.a).all()
+
+        # Check uniqueness
+        val_flat = numpy.asarray(val).flatten()
+        assert (
+            numpy.unique(val_flat).size == val_flat.size
+        ), "Found duplicate values in replace=False sample"
+
+
+@testing.fix_random()
+class TestChoiceReplaceFalseStatistical(RandomGeneratorTestCase):
+    """Statistical tests for uniformity of Feistel bijection."""
+
+    target_method = "choice"
+
+    @_condition.repeat(3)
+    def test_small_domain_uniformity(self):
+        """Chi-square test for uniform sampling in small domain."""
+        # Sample from domain of size 10, taking 5 elements
+        # Repeat many times and check each index appears uniformly
+        n = 10
+        sample_size = 5
+        n_trials = 1000
+
+        counts = cupy.zeros(n, dtype=int)
+        vals = self.generate_many(
+            n, size=sample_size, replace=False, _count=n_trials
+        )
+        for val in vals:
+            counts[val] += 1
+        counts = counts.get()
+
+        # Each index should appear ~500 times (5/10 * 1000)
+        expected = numpy.ones(n, dtype=int) * (sample_size * n_trials // n)
+        assert _hypothesis.chi_square_test(counts, expected)
+
+    @_condition.repeat(3, 10)
+    def test_permutation_variability(self):
+        """Test that repeated full permutations are different."""
+        n = 20
+        n_trials = 10
+
+        vals = self.generate_many(n, size=n, replace=False, _count=n_trials)
+        perms = cupy.vstack(vals)
+
+        # Should have multiple unique permutations
+        unique_perms = cupy.unique(perms, axis=0)
+        assert (
+            len(unique_perms) == n_trials
+        ), "Permutations should vary across multiple calls"
+
+
+@testing.slow
+@testing.fix_random()
+class TestChoiceReplaceFalseVeryLargeDomain(unittest.TestCase):
+    """Test memory efficiency with very large domains."""
+
+    def setUp(self):
+        self.rs = _generator.RandomState(seed=testing.generate_seed())
+
+    def test_large_domain_memory_efficiency(self):
+        """Test that very large domains don't allocate full arrays."""
+        # This should NOT allocate a 2^30 element array
+        # If it did, it would require ~8GB of memory
+        a = 2**30
+        size = 1000
+
+        val = self.rs.choice(a=a, size=size, replace=False).get()
+
+        # Check bounds
+        assert (0 <= val).all()
+        assert (val < a).all()
+
+        # Check uniqueness
+        assert numpy.unique(val).size == size
+
+    def test_near_32bit_limit(self):
+        """Test at the 32-bit boundary."""
+        # Current implementation supports up to 2^32
+        a = 2**31
+        size = 500
+
+        val = self.rs.choice(a=a, size=size, replace=False).get()
+
+        # Check bounds
+        assert (0 <= val).all()
+        assert (val < a).all()
+
+        # Check uniqueness
+        assert numpy.unique(val).size == size
+
+
+@testing.fix_random()
+class TestChoiceReplaceFalseDtypeConsistency(RandomGeneratorTestCase):
+    """Test output dtype consistency."""
+
+    target_method = "choice"
+
+    def test_integer_input_dtype(self):
+        """Integer input should produce int64/long dtype."""
+        val = self.generate(a=100, size=50, replace=False)
+
+        # Should be 'l' (long) dtype, which is int64 on most platforms
+        assert val.dtype == numpy.dtype("l") or val.dtype == numpy.int64
+
+
 @testing.fix_random()
 class TestGumbel(RandomGeneratorTestCase):