Add documentation and script for verifying Hypothesis example counts

Copilot · ev-br · Copilot · commit b5e8e05df14f · 2026-02-07T15:00:02.000Z
Co-authored-by: ev-br &lt;2133832+ev-br@users.noreply.github.com&gt;
diff --git a/README.md b/README.md
@@ -291,6 +291,63 @@ values should result in more rigorous runs. For example, `--max-examples
 10_000` may find bugs where default runs don't but will take much longer to
 run.
 
+##### Checking the actual number of examples
+
+To verify the actual number of examples Hypothesis ran for each test, use the
+`--hypothesis-show-statistics` flag:
+
+```bash
+$ pytest array_api_tests/test_manipulation_functions.py::test_squeeze --max-examples=100 --hypothesis-show-statistics
+```
+
+This will display detailed statistics for each test at the end of the output.
+The key line to look for is `"Stopped because settings.max_examples=N"` which shows
+exactly how many examples were run. For example:
+
+```
+================================================ Hypothesis Statistics =================================================
+array_api_tests/test_manipulation_functions.py::test_squeeze:
+
+  - during generate phase (0.06 seconds):
+    - Typical runtimes: ~ 1-2 ms, of which ~ 0-2 ms in data generation
+    - 10 passing examples, 0 failing examples, 19 invalid examples
+
+  - Stopped because settings.max_examples=10
+```
+
+This confirms that even though `--max-examples=100` was specified, the test
+only ran 10 examples. This is because `test_squeeze` is marked with 
+`@pytest.mark.unvectorized`, which automatically reduces the number of examples 
+to 1/10th to improve performance.
+
+To compare, a test without the `unvectorized` marker will show the full count:
+
+```bash
+$ pytest array_api_tests/test_manipulation_functions.py::TestExpandDims::test_expand_dims_tuples --max-examples=100 --hypothesis-show-statistics
+```
+
+Output:
+```
+  - Stopped because settings.max_examples=100
+```
+
+This verification method is useful for:
+- Confirming that the `unvectorized` marker is working correctly
+- Debugging test performance issues
+- Understanding how many examples Hypothesis actually generated vs skipped
+
+###### Automated verification
+
+A verification script is provided to automatically check that the `unvectorized` 
+marker is working correctly:
+
+```bash
+$ python3 verify_unvectorized_marker.py
+```
+
+This script runs tests with and without the marker and confirms that the number
+of examples is correctly reduced.
+
 #### Skipping Dtypes
 
 The test suite will automatically skip testing of inessential dtypes if they
diff --git a/verify_unvectorized_marker.py b/verify_unvectorized_marker.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+"""
+Verification script to check that the @pytest.mark.unvectorized marker
+correctly reduces the number of Hypothesis examples.
+
+This script runs tests with --hypothesis-show-statistics and parses the output
+to verify the actual number of examples that were run.
+
+Usage:
+    python3 verify_unvectorized_marker.py
+"""
+
+import subprocess
+import re
+import sys
+
+
+def run_test_and_get_examples(test_path, max_examples=100):
+    """
+    Run a test with hypothesis statistics and extract the number of examples run.
+    
+    Args:
+        test_path: Full path to the test (e.g., "array_api_tests/test_manipulation_functions.py::test_squeeze")
+        max_examples: The --max-examples value to pass to pytest
+        
+    Returns:
+        int: Number of examples that were actually run, or None if parsing failed
+    """
+    cmd = [
+        "python3", "-m", "pytest",
+        test_path,
+        f"--max-examples={max_examples}",
+        "--hypothesis-show-statistics",
+        "-v"
+    ]
+    
+    result = subprocess.run(
+        cmd,
+        capture_output=True,
+        text=True,
+        env={"ARRAY_API_TESTS_MODULE": "numpy"}
+    )
+    
+    # Look for "Stopped because settings.max_examples=N"
+    match = re.search(r"Stopped because settings\.max_examples=(\d+)", result.stdout)
+    if match:
+        return int(match.group(1))
+    
+    return None
+
+
+def main():
+    print("Verifying @pytest.mark.unvectorized marker behavior...")
+    print("=" * 70)
+    
+    max_examples = 100
+    
+    # Test 1: A test WITH the unvectorized marker
+    print(f"\n1. Testing with @pytest.mark.unvectorized marker:")
+    print(f"   Test: test_squeeze")
+    print(f"   Expected: {max_examples // 10} examples (1/10th of {max_examples})")
+    
+    examples_vectorized = run_test_and_get_examples(
+        "array_api_tests/test_manipulation_functions.py::test_squeeze",
+        max_examples
+    )
+    
+    if examples_vectorized is None:
+        print("   ERROR: Could not parse number of examples from output")
+        return 1
+    
+    print(f"   Actual:   {examples_vectorized} examples")
+    
+    expected = max_examples // 10
+    if examples_vectorized == expected:
+        print("   ✓ PASS: Marker correctly reduced examples")
+    else:
+        print(f"   ✗ FAIL: Expected {expected} examples but got {examples_vectorized}")
+        return 1
+    
+    # Test 2: A test WITHOUT the unvectorized marker
+    print(f"\n2. Testing WITHOUT @pytest.mark.unvectorized marker:")
+    print(f"   Test: TestExpandDims::test_expand_dims_tuples")
+    print(f"   Expected: {max_examples} examples (full amount)")
+    
+    examples_non_vectorized = run_test_and_get_examples(
+        "array_api_tests/test_manipulation_functions.py::TestExpandDims::test_expand_dims_tuples",
+        max_examples
+    )
+    
+    if examples_non_vectorized is None:
+        print("   ERROR: Could not parse number of examples from output")
+        return 1
+    
+    print(f"   Actual:   {examples_non_vectorized} examples")
+    
+    if examples_non_vectorized == max_examples:
+        print("   ✓ PASS: Test ran with full number of examples")
+    else:
+        print(f"   ✗ FAIL: Expected {max_examples} examples but got {examples_non_vectorized}")
+        return 1
+    
+    print("\n" + "=" * 70)
+    print("✓ All verification checks passed!")
+    print(f"\nSummary:")
+    print(f"  - Unvectorized test:     {examples_vectorized} examples (10% of requested)")
+    print(f"  - Non-unvectorized test: {examples_non_vectorized} examples (100% of requested)")
+    
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())