From 902e56b03ca22b0aaff05a05fbcb408bd36add65 Mon Sep 17 00:00:00 2001 From: Djordje Antic Date: Sun, 1 Mar 2026 10:06:07 +0000 Subject: [PATCH 01/13] Add GEMM+GEMM and CONV+GEMM support to quickTuningGen.py --- .../performance/analysis/quickTuningGen.py | 43 ++++++++++++++++--- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/mlir/utils/performance/analysis/quickTuningGen.py b/mlir/utils/performance/analysis/quickTuningGen.py index ed0b745363a5..4292529a15d2 100644 --- a/mlir/utils/performance/analysis/quickTuningGen.py +++ b/mlir/utils/performance/analysis/quickTuningGen.py @@ -24,6 +24,11 @@ 'TransQ', 'TransK', 'TransV', 'TransO', 'Causal', 'ReturnLSE', 'SplitKV', 'WithAttnScale', 'WithAttnBias', 'G', 'SeqLenQ', 'SeqLenK', 'NumHeadsQ', 'NumHeadsKV', 'HeadDimQK', 'HeadDimV' ] +GEMM_GEMM_COLUMNS = ['TransA', 'TransB', 'TransC', 'TransO', 'G', 'M', 'K', 'N', 'O'] +CONV_GEMM_COLUMNS = [ + 'FilterLayout', 'InputLayout', 'TransC', 'TransO', 'N', 'C', 'H', 'W', 'K', 'Y', 'X', + 'DilationH', 'DilationW', 'StrideH', 'StrideW', 'PaddingH', 'PaddingW', 'O' +] # Regex pattern for lookup table entries: {"arch_op_dtype", {Class::params, Class::count}}, // optional comment LOOKUP_ENTRY_PATTERN = re.compile(r'\{("(gfx\w+)_(\w+)_(\w+)"),\s*(\{[^}]+\})\},(\s*//[^\n]*)?') @@ -35,7 +40,9 @@ def get_instruction_type(arch, dtype, op): """Determine instruction type based on architecture, data type, and operation.""" - if op == "attention": + if op in ("attention", "gemm_gemm", "conv_gemm"): + if op == "gemm_gemm" and arch.startswith("gfx1") and dtype == "f32": + return "NonAccel" return "GemmGemm" if arch.startswith("gfx9"): return "XDL" @@ -55,9 +62,15 @@ def get_class_name(arch, dtype, op): return f"PopulateParams{instr}" if instr != "NonAccel" else "PopulateParams" +def _op_cap_for_param_name(op): + """Format op for C++ param name: gemm_gemm -> GemmGemm, attention -> Attention.""" + return "".join(part.capitalize() for part in op.split("_")) + + def get_param_names(arch, dtype, op): """Generate array and count variable names.""" - base = f"initParameters{dtype.capitalize()}{op.capitalize()}{arch.capitalize()}" + op_cap = _op_cap_for_param_name(op) + base = f"initParameters{dtype.capitalize()}{op_cap}{arch.capitalize()}" return base, f"n{base[0].upper()}{base[1:]}" @@ -69,6 +82,10 @@ def get_target_columns(op): return CONV_COLUMNS elif op == "attention": return ATTENTION_COLUMNS + elif op == "gemm_gemm": + return GEMM_GEMM_COLUMNS + elif op == "conv_gemm": + return CONV_GEMM_COLUMNS else: raise ValueError(f"Unknown operation: {op}") @@ -302,9 +319,19 @@ def add_lookup_entry(content, insert_marker, entry): return content[:insert_pos] + f'{entry}\n\n' + content[insert_pos:] +def get_lookup_key_op(op): + """Return the operation key used in the C++ lookup table (matches stringifyEnum(KernelType).lower()).""" + # C++ KernelType enum: Attention, GemmElementwiseGemm, ConvElementwiseGemm -> lower() + key_map = {"attention": "attention", "gemm_gemm": "gemmelementwisegemm", "conv_gemm": "convelementwisegemm"} + return key_map.get(op, op) + + def get_lookup_endif(arch, op, dtype): """Get the appropriate lookup table #endif marker.""" - if op == "attention": + # op may be script name (gemm_gemm) or C++ key form (gemmelementwisegemm) from .inc + gemm_gemm_ops = ("attention", "gemm_gemm", "conv_gemm", + "gemmelementwisegemm", "convelementwisegemm") + if op in gemm_gemm_ops: return "#endif // GemmGemm_LOOKUP_TABLE_GEN" elif is_accel(arch, dtype, op): return "#endif // Accel_LOOKUP_TABLE_GEN" @@ -348,9 +375,9 @@ def update_inc_file(results, arch, op): f"// END_{op.upper()}_{instr}_{dtype}_{arch}_DECS", "\n".join(dec_lines)) - # Add lookup entry + # Add lookup entry (key must match C++ ParamLookupTable makeKey: arch_op_dtype) endif_marker = get_lookup_endif(arch, op, dtype) - key = f"{arch}_{op}_{dtype}" + key = f"{arch}_{get_lookup_key_op(op)}_{dtype}" value = f"{{{class_name}::{param_name}, {class_name}::{count_name}}}" entry = f'{{"{key}", {value}}},' content = add_lookup_entry(content, endif_marker, entry) @@ -438,6 +465,8 @@ def main(args=None): %(prog)s tuningData/*.debug --op conv --update %(prog)s gfx90a/*.debug gfx942/*.debug --op gemm --update cat data.debug | %(prog)s --op attention --update + %(prog)s gemmgemm/*.debug --op gemm_gemm --update + %(prog)s convgemm/*.debug --op conv_gemm --update find . -name "*.debug" | xargs %(prog)s --op gemm --update # Add fallback type aliases (use f16 configs when there's no bf16 data) @@ -449,7 +478,9 @@ def main(args=None): nargs='*', metavar='FILE', help='.debug files produced by tuningRunner.py (reads TSV from stdin if none provided)') - parser.add_argument('--op', choices=['gemm', 'conv', 'attention'], help='Operation') + parser.add_argument('--op', + choices=['gemm', 'conv', 'attention', 'gemm_gemm', 'conv_gemm'], + help='Operation') parser.add_argument('--th', type=float, default=0.93, From 25e21c1d1049e3f256734fd4485cda744da2e553 Mon Sep 17 00:00:00 2001 From: Djordje Antic Date: Sun, 1 Mar 2026 10:10:14 +0000 Subject: [PATCH 02/13] Add GEMM+GEMM quick tuning configs for gfx908 --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index 1bb32e598494..221754a1482b 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -2860,6 +2860,24 @@ const StringRef PopulateParamsGemmGemm::initParametersI8AttentionGfx1152[] = { }; // END_ATTENTION_GemmGemm_i8_gfx1152_DEFS +// BEGIN_GEMM_GEMM_GemmGemm_f16_gfx908_DEFS +const StringRef PopulateParamsGemmGemm::initParametersF16GemmGemmGfx908[] = { + "attn:v3:128,128,16,16,16,16,16,8,4,1,2,0,1", + "attn:v3:128,128,32,32,32,16,16,4,4,1,2,0,1", + "attn:v3:128,128,128,32,32,32,16,4,4,1,2,0,1", + "attn:v3:128,128,32,8,16,32,16,16,4,1,2,0,1", + "attn:v3:32,256,16,8,16,16,16,8,1,1,2,0,1" +}; +// END_GEMM_GEMM_GemmGemm_f16_gfx908_DEFS + +// BEGIN_GEMM_GEMM_GemmGemm_f32_gfx908_DEFS +const StringRef PopulateParamsGemmGemm::initParametersF32GemmGemmGfx908[] = { + "attn:v3:64,128,32,16,16,16,16,4,4,1,2,0,1", + "attn:v3:64,128,32,32,16,16,16,4,4,1,2,0,1", + "attn:v3:64,64,128,8,64,16,16,8,4,1,2,0,1" +}; +// END_GEMM_GEMM_GemmGemm_f32_gfx908_DEFS + #endif // GemmGemm_DEFINITIONS_GEN #ifdef GemmGemm_DECLARATIONS_GEN @@ -3019,6 +3037,16 @@ static constexpr size_t nInitParametersI8AttentionGfx1152 = 10; static const StringRef initParametersI8AttentionGfx1152[nInitParametersI8AttentionGfx1152]; // END_ATTENTION_GemmGemm_i8_gfx1152_DECS +// BEGIN_GEMM_GEMM_GemmGemm_f16_gfx908_DECS +static constexpr size_t nInitParametersF16GemmGemmGfx908 = 5; +static const StringRef initParametersF16GemmGemmGfx908[nInitParametersF16GemmGemmGfx908]; +// END_GEMM_GEMM_GemmGemm_f16_gfx908_DECS + +// BEGIN_GEMM_GEMM_GemmGemm_f32_gfx908_DECS +static constexpr size_t nInitParametersF32GemmGemmGfx908 = 3; +static const StringRef initParametersF32GemmGemmGfx908[nInitParametersF32GemmGemmGfx908]; +// END_GEMM_GEMM_GemmGemm_f32_gfx908_DECS + #endif // GemmGemm_DECLARATIONS_GEN #ifdef NonAccel_LOOKUP_TABLE_GEN @@ -3283,4 +3311,8 @@ static const StringRef initParametersI8AttentionGfx1152[nInitParametersI8Attenti {"gfx1152_attention_i8", {PopulateParamsGemmGemm::initParametersI8AttentionGfx1152, PopulateParamsGemmGemm::nInitParametersI8AttentionGfx1152}}, +{"gfx908_gemmelementwisegemm_f16", {PopulateParamsGemmGemm::initParametersF16GemmGemmGfx908, PopulateParamsGemmGemm::nInitParametersF16GemmGemmGfx908}}, + +{"gfx908_gemmelementwisegemm_f32", {PopulateParamsGemmGemm::initParametersF32GemmGemmGfx908, PopulateParamsGemmGemm::nInitParametersF32GemmGemmGfx908}}, + #endif // GemmGemm_LOOKUP_TABLE_GEN From b65f49382fdb48eafda0b9c2e279c873538fa963 Mon Sep 17 00:00:00 2001 From: Djordje Antic Date: Sun, 1 Mar 2026 10:17:03 +0000 Subject: [PATCH 03/13] Add GEMM+GEMM quick tuning configs for gfx1200 --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index 221754a1482b..ca34df165073 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -2878,6 +2878,19 @@ const StringRef PopulateParamsGemmGemm::initParametersF32GemmGemmGfx908[] = { }; // END_GEMM_GEMM_GemmGemm_f32_gfx908_DEFS +// BEGIN_GEMM_GEMM_GemmGemm_f16_gfx1200_DEFS +const StringRef PopulateParamsGemmGemm::initParametersF16GemmGemmGfx1200[] = { + "attn:v3:128,128,64,32,16,16,16,4,4,1,2,0,1", + "attn:v3:64,64,32,16,16,16,16,8,4,2,2,0,1", + "attn:v3:128,128,16,4,16,16,16,8,4,1,2,0,1", + "attn:v3:128,128,32,4,16,16,16,8,1,1,2,0,1", + "attn:v3:128,128,32,8,16,16,16,8,1,1,2,0,1", + "attn:v3:128,128,64,16,32,32,16,8,1,1,2,0,1", + "attn:v3:32,32,32,8,16,16,16,8,1,2,2,0,1", + "attn:v3:64,256,32,16,16,16,16,8,1,1,2,0,1" +}; +// END_GEMM_GEMM_GemmGemm_f16_gfx1200_DEFS + #endif // GemmGemm_DEFINITIONS_GEN #ifdef GemmGemm_DECLARATIONS_GEN @@ -3047,6 +3060,11 @@ static constexpr size_t nInitParametersF32GemmGemmGfx908 = 3; static const StringRef initParametersF32GemmGemmGfx908[nInitParametersF32GemmGemmGfx908]; // END_GEMM_GEMM_GemmGemm_f32_gfx908_DECS +// BEGIN_GEMM_GEMM_GemmGemm_f16_gfx1200_DECS +static constexpr size_t nInitParametersF16GemmGemmGfx1200 = 8; +static const StringRef initParametersF16GemmGemmGfx1200[nInitParametersF16GemmGemmGfx1200]; +// END_GEMM_GEMM_GemmGemm_f16_gfx1200_DECS + #endif // GemmGemm_DECLARATIONS_GEN #ifdef NonAccel_LOOKUP_TABLE_GEN @@ -3315,4 +3333,6 @@ static const StringRef initParametersF32GemmGemmGfx908[nInitParametersF32GemmGem {"gfx908_gemmelementwisegemm_f32", {PopulateParamsGemmGemm::initParametersF32GemmGemmGfx908, PopulateParamsGemmGemm::nInitParametersF32GemmGemmGfx908}}, +{"gfx1200_gemmelementwisegemm_f16", {PopulateParamsGemmGemm::initParametersF16GemmGemmGfx1200, PopulateParamsGemmGemm::nInitParametersF16GemmGemmGfx1200}}, + #endif // GemmGemm_LOOKUP_TABLE_GEN From 64cab37c5fbf70550cacd532c08209c6734bb2e2 Mon Sep 17 00:00:00 2001 From: Djordje Antic Date: Sun, 1 Mar 2026 10:30:02 -0500 Subject: [PATCH 04/13] Fix rocprof verification: use --output-format csv instead of -f csv --- mlir/utils/performance/perfRunner.py | 4 ++-- mlir/utils/performance/tuningRunner.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/utils/performance/perfRunner.py b/mlir/utils/performance/perfRunner.py index 90b9010d27a5..90a76ed7b816 100644 --- a/mlir/utils/performance/perfRunner.py +++ b/mlir/utils/performance/perfRunner.py @@ -1784,7 +1784,7 @@ def run_config_with_mlir(config: PerfConfiguration, '--entry-point-result=void' ] profiler_cmd = [ROCPROF] + get_metric_args_for_rocprof(arch) + [ - '--kernel-trace', '--stats', '-f', 'csv', '-o', BENCHMARKING_RESULT_FILE_NAME, '--', + '--kernel-trace', '--stats', '--output-format', 'csv', '-o', BENCHMARKING_RESULT_FILE_NAME, '--', paths.mlir_paths.cpu_runner_path ] + mlir_cpu_runner_args @@ -2068,7 +2068,7 @@ def run_fusion_kernel(filename, rocmlir_gen_args, paths: Paths): '--entry-point-result=void' ] profiler_cmd = [ROCPROF] + get_metric_args_for_rocprof(chip) + [ - '--kernel-trace', '--stats', '-f', 'csv', '-o', BENCHMARKING_RESULT_FILE_NAME + '--kernel-trace', '--stats', '--output-format', 'csv', '-o', BENCHMARKING_RESULT_FILE_NAME ] + ['--', paths.mlir_paths.cpu_runner_path] + mlir_cpu_runner_args commands.append(profiler_cmd) outs, noerr = run_pipeline(commands) diff --git a/mlir/utils/performance/tuningRunner.py b/mlir/utils/performance/tuningRunner.py index 7aa804c3a434..8adf28ad6e1e 100755 --- a/mlir/utils/performance/tuningRunner.py +++ b/mlir/utils/performance/tuningRunner.py @@ -1153,7 +1153,7 @@ def verify_perfconfig(perfconfig: str, config: PerfConfiguration, paths: Paths, ] rocprof_command = [perfRunner.ROCPROF] + perfRunner.get_metric_args_for_rocprof( options.arch) + [ - '--kernel-trace', '--stats', '-f', 'csv', '-o', + '--kernel-trace', '--stats', '--output-format', 'csv', '-o', perfRunner.BENCHMARKING_RESULT_FILE_NAME, '--', paths.mlir_paths.cpu_runner_path ] + mlir_cpu_runner_args From 892f6d914f099700a013e72cba6c013f7a00ad3b Mon Sep 17 00:00:00 2001 From: Djordje Antic Date: Sun, 1 Mar 2026 15:49:46 +0000 Subject: [PATCH 05/13] Add GEMM+GEMM quick tuning configs for gfx1100 --- .../Rock/Tuning/QuickTuningPerfconfigs.inc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc index ca34df165073..f2c1ceca5233 100644 --- a/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc +++ b/mlir/include/mlir/Dialect/Rock/Tuning/QuickTuningPerfconfigs.inc @@ -2891,6 +2891,16 @@ const StringRef PopulateParamsGemmGemm::initParametersF16GemmGemmGfx1200[] = { }; // END_GEMM_GEMM_GemmGemm_f16_gfx1200_DEFS +// BEGIN_GEMM_GEMM_GemmGemm_f16_gfx1100_DEFS +const StringRef PopulateParamsGemmGemm::initParametersF16GemmGemmGfx1100[] = { + "attn:v3:128,128,16,16,16,16,16,8,1,1,2,0,1", + "attn:v3:128,128,16,4,16,16,16,8,1,1,2,0,1", + "attn:v3:64,256,32,16,16,16,16,8,1,1,2,0,1", + "attn:v3:128,128,16,8,32,16,16,8,1,1,2,0,1", + "attn:v3:32,64,256,16,32,64,16,16,1,1,2,0,1" +}; +// END_GEMM_GEMM_GemmGemm_f16_gfx1100_DEFS + #endif // GemmGemm_DEFINITIONS_GEN #ifdef GemmGemm_DECLARATIONS_GEN @@ -3065,6 +3075,11 @@ static constexpr size_t nInitParametersF16GemmGemmGfx1200 = 8; static const StringRef initParametersF16GemmGemmGfx1200[nInitParametersF16GemmGemmGfx1200]; // END_GEMM_GEMM_GemmGemm_f16_gfx1200_DECS +// BEGIN_GEMM_GEMM_GemmGemm_f16_gfx1100_DECS +static constexpr size_t nInitParametersF16GemmGemmGfx1100 = 5; +static const StringRef initParametersF16GemmGemmGfx1100[nInitParametersF16GemmGemmGfx1100]; +// END_GEMM_GEMM_GemmGemm_f16_gfx1100_DECS + #endif // GemmGemm_DECLARATIONS_GEN #ifdef NonAccel_LOOKUP_TABLE_GEN @@ -3335,4 +3350,6 @@ static const StringRef initParametersF16GemmGemmGfx1200[nInitParametersF16GemmGe {"gfx1200_gemmelementwisegemm_f16", {PopulateParamsGemmGemm::initParametersF16GemmGemmGfx1200, PopulateParamsGemmGemm::nInitParametersF16GemmGemmGfx1200}}, +{"gfx1100_gemmelementwisegemm_f16", {PopulateParamsGemmGemm::initParametersF16GemmGemmGfx1100, PopulateParamsGemmGemm::nInitParametersF16GemmGemmGfx1100}}, + #endif // GemmGemm_LOOKUP_TABLE_GEN From 10ba5eff37126269e7e090514d4c58cf551dd90a Mon Sep 17 00:00:00 2001 From: Djordje Antic Date: Sun, 1 Mar 2026 16:05:03 +0000 Subject: [PATCH 06/13] Treat gemm_gemm, conv_gemm, and attention with -t f32 on gfx11xx/gfx12xx as skipped instead of failed --- mlir/utils/performance/tuningRunner.py | 49 ++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/mlir/utils/performance/tuningRunner.py b/mlir/utils/performance/tuningRunner.py index 8adf28ad6e1e..338c9861b72f 100755 --- a/mlir/utils/performance/tuningRunner.py +++ b/mlir/utils/performance/tuningRunner.py @@ -183,12 +183,34 @@ class Options: timeout: Optional[int] +def _is_navi_arch(arch: str) -> bool: + """Return True if arch is Navi (gfx11xx or gfx12xx).""" + return arch.startswith("gfx11") or arch.startswith("gfx12") + + +# Operations that have no f32 tuning support on Navi (gfx11xx/gfx12xx) - empty tuning range +_F32_NAVI_UNSUPPORTED_OPS = frozenset({ + 'GemmGemmConfiguration', 'ConvGemmConfiguration', 'AttentionConfiguration' +}) + + +def _should_skip_f32_on_navi(arch: str, test_vector: str, conf_class: type) -> bool: + """Return True if this op is f32 on Navi and has no tuning support (empty range).""" + if conf_class.__name__ not in _F32_NAVI_UNSUPPORTED_OPS: + return False + if not _is_navi_arch(arch): + return False + # Match -t f32 in the test vector (e.g. "-t f32 -transA" or " -t f32 ") + return '-t f32' in test_vector + + @dataclass class TuningResult: """Result of tuning a single configuration.""" test_vector: str success: bool timed_out: bool = False + skipped: bool = False gpu_id: int = -1 duration_seconds: float = 0.0 timestamp: Optional[str] = None @@ -500,6 +522,12 @@ def set_succeeded(self, test_vector: str) -> None: self._state.remove(test_vector) self._save_locked() + def remove(self, test_vector: str) -> None: + """Remove test_vector from state (e.g. when skipping without marking failed).""" + with self._lock: + self._state.remove(test_vector) + self._save_locked() + def finalize_interrupted(self) -> None: """Mark RUNNING configs as INTERRUPTED on clean shutdown.""" with self._lock: @@ -715,11 +743,14 @@ class ETATracker: success_times: List[float] = field(default_factory=list) ok_count: int = 0 fail_count: int = 0 + skip_count: int = 0 _processed: int = field(default=0, init=False) def record(self, result: TuningResult) -> None: self._processed += 1 - if result.success: + if result.skipped: + self.skip_count += 1 + elif result.success: self.ok_count += 1 self.success_times.append(result.duration_seconds) else: @@ -760,7 +791,10 @@ def get_postfix_str(self) -> str: rate = self._format_rate(median) eta = self._format_eta(eta_seconds) - return f"ok={self.ok_count}, fail={self.fail_count}, rate={rate}, eta={eta}" + postfix = f"ok={self.ok_count}, fail={self.fail_count}" + if self.skip_count > 0: + postfix += f", skip={self.skip_count}" + return f"{postfix}, rate={rate}, eta={eta}" @dataclass @@ -1520,6 +1554,13 @@ def execute_tuning_task(test_vector: str) -> TuningResult: state_file.set_running(test_vector) + if _should_skip_f32_on_navi(ctx.options.chip, test_vector, ctx.conf_class): + state_file.remove(test_vector) + return TuningResult(test_vector=test_vector, + success=False, + skipped=True, + gpu_id=gpu_id) + timestamp = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ') start_time = time.time() compile_threads = ctx.get_compile_threads(gpu_id) @@ -1550,6 +1591,10 @@ def execute_tuning_task(test_vector: str) -> TuningResult: results_writer.write_result(result) if debug_writer: debug_writer.write_result(result) + elif result.skipped: + logger.warning( + f"SKIPPED: '{result.test_vector}' on GPU {result.gpu_id} " + "(f32 on Navi has no tuning support for this op)") else: has_errors = True logger.error( From e00972982772a936c75ae432f556b37f04053fff Mon Sep 17 00:00:00 2001 From: Djordje Antic Date: Sun, 1 Mar 2026 16:07:31 +0000 Subject: [PATCH 07/13] Edit log message --- mlir/utils/performance/tuningRunner.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/mlir/utils/performance/tuningRunner.py b/mlir/utils/performance/tuningRunner.py index 338c9861b72f..da9e73fd1b38 100755 --- a/mlir/utils/performance/tuningRunner.py +++ b/mlir/utils/performance/tuningRunner.py @@ -1592,9 +1592,17 @@ def execute_tuning_task(test_vector: str) -> TuningResult: if debug_writer: debug_writer.write_result(result) elif result.skipped: - logger.warning( - f"SKIPPED: '{result.test_vector}' on GPU {result.gpu_id} " - "(f32 on Navi has no tuning support for this op)") + skip_msg = ( + f"'{result.test_vector}' on GPU {result.gpu_id} " + "(f32 on Navi has no tuning support for this op)" + ) + if sys.stderr.isatty(): + tqdm.write( + f"{_LOG_COLORS[logging.WARNING]}SKIPPED{_COLOR_RESET}: {skip_msg}", + file=sys.stderr, + ) + else: + tqdm.write(f"SKIPPED: {skip_msg}", file=sys.stderr) else: has_errors = True logger.error( From 6efb66de8e752cd054f07fa23b21610a97bc0ee7 Mon Sep 17 00:00:00 2001 From: Djordje Antic Date: Sun, 1 Mar 2026 16:10:41 +0000 Subject: [PATCH 08/13] Apply yapf --- mlir/utils/performance/tuningRunner.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/mlir/utils/performance/tuningRunner.py b/mlir/utils/performance/tuningRunner.py index da9e73fd1b38..e0cd970a046f 100755 --- a/mlir/utils/performance/tuningRunner.py +++ b/mlir/utils/performance/tuningRunner.py @@ -189,9 +189,8 @@ def _is_navi_arch(arch: str) -> bool: # Operations that have no f32 tuning support on Navi (gfx11xx/gfx12xx) - empty tuning range -_F32_NAVI_UNSUPPORTED_OPS = frozenset({ - 'GemmGemmConfiguration', 'ConvGemmConfiguration', 'AttentionConfiguration' -}) +_F32_NAVI_UNSUPPORTED_OPS = frozenset( + {'GemmGemmConfiguration', 'ConvGemmConfiguration', 'AttentionConfiguration'}) def _should_skip_f32_on_navi(arch: str, test_vector: str, conf_class: type) -> bool: @@ -1592,10 +1591,8 @@ def execute_tuning_task(test_vector: str) -> TuningResult: if debug_writer: debug_writer.write_result(result) elif result.skipped: - skip_msg = ( - f"'{result.test_vector}' on GPU {result.gpu_id} " - "(f32 on Navi has no tuning support for this op)" - ) + skip_msg = (f"'{result.test_vector}' on GPU {result.gpu_id} " + "(f32 on Navi has no tuning support for this op)") if sys.stderr.isatty(): tqdm.write( f"{_LOG_COLORS[logging.WARNING]}SKIPPED{_COLOR_RESET}: {skip_msg}", From 5f3595565ec41406f48b3ddd09a416e43596163a Mon Sep 17 00:00:00 2001 From: Djordje Antic Date: Sun, 1 Mar 2026 16:19:53 +0000 Subject: [PATCH 09/13] Apply yapf pt 2 --- mlir/utils/performance/analysis/quickTuningGen.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/mlir/utils/performance/analysis/quickTuningGen.py b/mlir/utils/performance/analysis/quickTuningGen.py index 4292529a15d2..5f5d6f6a62b9 100644 --- a/mlir/utils/performance/analysis/quickTuningGen.py +++ b/mlir/utils/performance/analysis/quickTuningGen.py @@ -322,15 +322,19 @@ def add_lookup_entry(content, insert_marker, entry): def get_lookup_key_op(op): """Return the operation key used in the C++ lookup table (matches stringifyEnum(KernelType).lower()).""" # C++ KernelType enum: Attention, GemmElementwiseGemm, ConvElementwiseGemm -> lower() - key_map = {"attention": "attention", "gemm_gemm": "gemmelementwisegemm", "conv_gemm": "convelementwisegemm"} + key_map = { + "attention": "attention", + "gemm_gemm": "gemmelementwisegemm", + "conv_gemm": "convelementwisegemm" + } return key_map.get(op, op) def get_lookup_endif(arch, op, dtype): """Get the appropriate lookup table #endif marker.""" # op may be script name (gemm_gemm) or C++ key form (gemmelementwisegemm) from .inc - gemm_gemm_ops = ("attention", "gemm_gemm", "conv_gemm", - "gemmelementwisegemm", "convelementwisegemm") + gemm_gemm_ops = ("attention", "gemm_gemm", "conv_gemm", "gemmelementwisegemm", + "convelementwisegemm") if op in gemm_gemm_ops: return "#endif // GemmGemm_LOOKUP_TABLE_GEN" elif is_accel(arch, dtype, op): From 68a466e4039155a3027ad9a6d8ad33393b4ad7c0 Mon Sep 17 00:00:00 2001 From: Djordje Antic Date: Sun, 1 Mar 2026 19:46:36 +0000 Subject: [PATCH 10/13] Apply yapf on perfRunner as well --- mlir/utils/performance/perfRunner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mlir/utils/performance/perfRunner.py b/mlir/utils/performance/perfRunner.py index 90a76ed7b816..32a03f6663c5 100644 --- a/mlir/utils/performance/perfRunner.py +++ b/mlir/utils/performance/perfRunner.py @@ -1784,8 +1784,8 @@ def run_config_with_mlir(config: PerfConfiguration, '--entry-point-result=void' ] profiler_cmd = [ROCPROF] + get_metric_args_for_rocprof(arch) + [ - '--kernel-trace', '--stats', '--output-format', 'csv', '-o', BENCHMARKING_RESULT_FILE_NAME, '--', - paths.mlir_paths.cpu_runner_path + '--kernel-trace', '--stats', '--output-format', 'csv', '-o', + BENCHMARKING_RESULT_FILE_NAME, '--', paths.mlir_paths.cpu_runner_path ] + mlir_cpu_runner_args outs, noerr = run_pipeline([rocmlir_gen_cmd.split(), rocmlir_driver_cmd, profiler_cmd]) From ee4ee6139e26b45cbccf05de2418eb2e48691aed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=90or=C4=91e=20Anti=C4=87?= Date: Fri, 24 Apr 2026 16:31:44 +0200 Subject: [PATCH 11/13] Update quickTuningGen.py --- mlir/utils/performance/analysis/quickTuningGen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/utils/performance/analysis/quickTuningGen.py b/mlir/utils/performance/analysis/quickTuningGen.py index 5f5d6f6a62b9..ae06f82e4e8a 100644 --- a/mlir/utils/performance/analysis/quickTuningGen.py +++ b/mlir/utils/performance/analysis/quickTuningGen.py @@ -379,7 +379,7 @@ def update_inc_file(results, arch, op): f"// END_{op.upper()}_{instr}_{dtype}_{arch}_DECS", "\n".join(dec_lines)) - # Add lookup entry (key must match C++ ParamLookupTable makeKey: arch_op_dtype) + # Add lookup entry endif_marker = get_lookup_endif(arch, op, dtype) key = f"{arch}_{get_lookup_key_op(op)}_{dtype}" value = f"{{{class_name}::{param_name}, {class_name}::{count_name}}}" From 16a57b92abd8f6e54c1eb33fb43f21b39aa2cc35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=90or=C4=91e=20Anti=C4=87?= Date: Fri, 24 Apr 2026 16:32:48 +0200 Subject: [PATCH 12/13] Update quickTuningGen.py --- mlir/utils/performance/analysis/quickTuningGen.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/mlir/utils/performance/analysis/quickTuningGen.py b/mlir/utils/performance/analysis/quickTuningGen.py index ae06f82e4e8a..96f86f1d90fc 100644 --- a/mlir/utils/performance/analysis/quickTuningGen.py +++ b/mlir/utils/performance/analysis/quickTuningGen.py @@ -469,8 +469,6 @@ def main(args=None): %(prog)s tuningData/*.debug --op conv --update %(prog)s gfx90a/*.debug gfx942/*.debug --op gemm --update cat data.debug | %(prog)s --op attention --update - %(prog)s gemmgemm/*.debug --op gemm_gemm --update - %(prog)s convgemm/*.debug --op conv_gemm --update find . -name "*.debug" | xargs %(prog)s --op gemm --update # Add fallback type aliases (use f16 configs when there's no bf16 data) From 65584927593a65e67dcdb83c76880a8d8f7c37e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C4=90or=C4=91e=20Anti=C4=87?= Date: Fri, 24 Apr 2026 16:33:43 +0200 Subject: [PATCH 13/13] Update tuningRunner.py --- mlir/utils/performance/tuningRunner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/utils/performance/tuningRunner.py b/mlir/utils/performance/tuningRunner.py index d0e90e8538e6..69403ac2bc99 100755 --- a/mlir/utils/performance/tuningRunner.py +++ b/mlir/utils/performance/tuningRunner.py @@ -185,7 +185,7 @@ class Options: def _is_navi_arch(arch: str) -> bool: """Return True if arch is Navi (gfx11xx or gfx12xx).""" - return arch.startswith("gfx11") or arch.startswith("gfx12") + return arch.startswith("gfx1") # Operations that have no f32 tuning support on Navi (gfx11xx/gfx12xx) - empty tuning range