fix tuning test (#3118)

yzhou103 · gyohuangxin · web-flow · commit b140e55ce1e3 · 2026-05-12T14:12:54.000+08:00
* fix tuning test

* update

---------

Co-authored-by: Xin Huang &lt;Xin.Huang@amd.com&gt;
diff --git a/op_tests/tuning_tests/README.md b/op_tests/tuning_tests/README.md
@@ -11,7 +11,7 @@ Minimal test suite for validating the aiter tuning infrastructure.
 | `test_compare_logic.py` | 1 | No | Compare/update_improved: `_build_compare_update_plan`, `_merge_compare_filtered_results` |
 | `test_mp_tuner_logic.py` | 1 | No | `mp_tuner` polling: timeout, AcceleratorError, KeyError, pool restart |
 | `test_online_tune.py` | 1 | No | `AITER_ONLINE_TUNE` decision logic, `mp_lock` synchronization, MainFunc CSV write, cfg_2stages reload |
-| `test_tune_pipeline.py` | 2 | Yes | End-to-end: run each tuner on small shapes, verify output CSV; `--compare --update_improved`; `AITER_ONLINE_TUNE` e2e |
+| `test_tune_pipeline.py` | 2 | Yes | End-to-end: run each tuner on small shapes (mp=1 + mp=default), verify output CSV; `--compare --update_improved`; `AITER_ONLINE_TUNE` e2e |
 | `test_asm_splitk_guard.py` | 1 | No | `GemmTuner.asm_gemm_all_solutions` SplitK semaphore grid guard |
 | `test_run_config.py` | 2 | Yes | Run --run_config on ALL existing tuned CSVs (configs + model_configs) |
 
@@ -58,6 +58,24 @@ python3 -m unittest op_tests.tuning_tests.test_run_config -v
 python3 -m unittest discover -s op_tests/tuning_tests -v
 ```
 
+### Running individual tuner tests
+
+Each tuner in `test_tune_pipeline.py` has two variants: `_mp1` (single GPU) and `_mp_default` (all GPUs).
+
+```bash
+# Run a specific tuner (both mp1 and mp_default)
+python3 -m pytest op_tests/tuning_tests/test_tune_pipeline.py -k "gradlib_bf16" -v
+
+# Run only the single-GPU variant
+python3 -m pytest op_tests/tuning_tests/test_tune_pipeline.py -k "gradlib_bf16_mp1" -v
+
+# Run only the multi-GPU variant
+python3 -m pytest op_tests/tuning_tests/test_tune_pipeline.py -k "gradlib_bf16_mp_default" -v
+
+# Run a specific tuner with unittest
+python3 -m unittest op_tests.tuning_tests.test_tune_pipeline.TestTunePipeline.test_a8w8_blockscale_mp1 -v
+```
+
 ## Reproducing with custom config
 
 Use `TUNE_TEST_FAMILY` to run `--run_config` for a specific family. Config is resolved via `AITER_CONFIGS` automatically:
diff --git a/op_tests/tuning_tests/test_tune_pipeline.py b/op_tests/tuning_tests/test_tune_pipeline.py
@@ -7,6 +7,7 @@
 --shape_grouped with profile row count comparison.
 """
 
+import glob
 import os
 import sys
 import csv
@@ -52,7 +53,27 @@ def _write_csv(path, header, rows):
             writer.writerow(row)
 
 
+def _cleanup_stale_lock_files():
+    """Remove stale FileBaton lock files left by killed subprocesses."""
+    build_dir = os.path.join(AITER_ROOT, "aiter", "jit", "build")
+    if not os.path.isdir(build_dir):
+        return
+    lock_patterns = [
+        os.path.join(build_dir, "lock_*"),
+        os.path.join(build_dir, "*", "build", "lock"),
+        os.path.join(build_dir, "lock_3rdparty_*"),
+    ]
+    for pattern in lock_patterns:
+        for lock_file in glob.glob(pattern):
+            try:
+                os.remove(lock_file)
+                print(f"Cleaned up stale lock file: {lock_file}", flush=True)
+            except OSError:
+                pass
+
+
 def _run_tuner(script, untuned, tuned, extra_args=None, timeout=300, mp=1):
+    _cleanup_stale_lock_files()
     cmd = [
         sys.executable,
         os.path.join(AITER_ROOT, script),
@@ -82,6 +103,7 @@ def _run_tuner(script, untuned, tuned, extra_args=None, timeout=300, mp=1):
             env=env,
         )
     except subprocess.TimeoutExpired as e:
+        _cleanup_stale_lock_files()
         raise AssertionError(
             f"Tuner timed out after {timeout}s (likely GPU hang or infinite loop)\n"
             f"  cmd: {' '.join(cmd)}\n"
@@ -512,65 +534,51 @@ def test_batched_bf16(self):
 
 @unittest.skipUnless(_gpu_available(), "No GPU available")
 class TestComparePipeline(unittest.TestCase):
-    """Test --compare and --compare --update_improved end-to-end."""
+    """Test --compare --update_improved end-to-end."""
 
     CONFIGS = {
         "a8w8_blockscale": {
             "script": "csrc/ck_gemm_a8w8_blockscale/gemm_a8w8_blockscale_tune.py",
             "header": ["M", "N", "K"],
-            "shapes": [(1, 1024, 512), (16, 1536, 7168)],
+            "shapes": [(1, 1024, 512)],
             "keys": ["cu_num", "M", "N", "K"],
+            "timeout": 3600,
         },
     }
 
-    def _run_compare(self, name, update_improved=False):
-        cfg = self.CONFIGS[name]
+    def test_compare_and_update(self):
+        """--compare --update_improved: tune, compare, update tuned CSV."""
+        cfg = self.CONFIGS["a8w8_blockscale"]
+        timeout = cfg.get("timeout", 900)
         tmp = tempfile.mkdtemp()
-        untuned = os.path.join(tmp, "untuned.csv")
-        tuned = os.path.join(tmp, "tuned.csv")
-        _write_csv(untuned, cfg["header"], cfg["shapes"])
-
-        extra = ["--compare"]
-        if update_improved:
-            extra.append("--update_improved")
-        result = _run_tuner(
-            cfg["script"], untuned, tuned, extra_args=extra, timeout=900
-        )
-        return result, tuned, tmp
-
-    def test_compare_only(self):
-        """--compare runs pre/post benchmark and prints comparison."""
-        result, tuned, tmp = self._run_compare("a8w8_blockscale", update_improved=False)
         try:
+            untuned = os.path.join(tmp, "untuned.csv")
+            tuned = os.path.join(tmp, "tuned.csv")
+            _write_csv(untuned, cfg["header"], cfg["shapes"])
+
+            result = _run_tuner(
+                cfg["script"],
+                untuned,
+                tuned,
+                extra_args=[
+                    "--compare",
+                    "--update_improved",
+                    "--libtype",
+                    "ck",
+                    "--batch",
+                    "1",
+                ],
+                timeout=timeout,
+                mp=1,
+            )
             if result.returncode != 0:
                 print(f"\n=== compare STDOUT ===\n{result.stdout[-2000:]}")
                 print(f"\n=== compare STDERR ===\n{result.stderr[-2000:]}")
-            self.assertEqual(result.returncode, 0, "compare tuner failed")
-            output = result.stdout + result.stderr
-            self.assertIn(
-                "Compare Report", output, "Expected 'Compare Report' in output"
-            )
-        finally:
-            import shutil
-
-            shutil.rmtree(tmp, ignore_errors=True)
-
-    def test_compare_update_improved(self):
-        """--compare --update_improved writes tuned CSV and prints comparison."""
-        result, tuned, tmp = self._run_compare("a8w8_blockscale", update_improved=True)
-        try:
-            if result.returncode != 0:
-                print(f"\n=== compare+update STDOUT ===\n{result.stdout[-2000:]}")
-                print(f"\n=== compare+update STDERR ===\n{result.stderr[-2000:]}")
             self.assertEqual(result.returncode, 0, "compare+update tuner failed")
-            self.assertTrue(os.path.exists(tuned), "tuned CSV not created")
             output = result.stdout + result.stderr
             self.assertIn(
                 "Compare Report", output, "Expected 'Compare Report' in output"
             )
-            df = pd.read_csv(tuned)
-            df.columns = df.columns.str.strip()
-            self.assertGreaterEqual(len(df), 1, "tuned CSV should have at least 1 row")
         finally:
             import shutil
 
diff --git a/op_tests/tuning_tests/test_tuner_infra.py b/op_tests/tuning_tests/test_tuner_infra.py
@@ -398,8 +398,8 @@ def test_two_files_merge_dedup(self):
             if os.path.exists(merged_path):
                 os.unlink(merged_path)
 
-    def test_column_mismatch_raises(self):
-        """Two CSVs with different columns -> AssertionError."""
+    def test_column_mismatch_merges(self):
+        """Two CSVs with different columns -> merged with missing cols filled."""
         tuner = _StubTuner.get()
         h1 = [
             "gfx",
@@ -438,8 +438,17 @@ def test_column_mismatch_raises(self):
             h2,
             [[TEST_GFX, 304, 1, 1024, 512, "x", 100.0, "k0", 1.0, 1.0, 0.01]],
         )
-        with self.assertRaises(AssertionError):
-            tuner.update_config_files(f"{f1}{os.pathsep}{f2}", "test_mismatch")
+        merged_path = tuner.update_config_files(
+            f"{f1}{os.pathsep}{f2}", "test_mismatch"
+        )
+        try:
+            df = pd.read_csv(merged_path)
+            self.assertIn("extra_col", df.columns)
+            self.assertIn("kernelId", df.columns)
+            self.assertIn("splitK", df.columns)
+        finally:
+            if os.path.exists(merged_path):
+                os.unlink(merged_path)
 
     def test_missing_second_file(self):
         """Second path doesn't exist -> only first file data."""