From 7afd522fd3ffefb6971da516214508d230f686f9 Mon Sep 17 00:00:00 2001 From: functionstackx <47992694+functionstackx@users.noreply.github.com> Date: Thu, 11 Jun 2026 17:06:43 -0400 Subject: [PATCH] fix(inference): swap known-issue precisions back (GB300 FP8, MI355X FP4) #441 applied the precision swap to the wrong entries. The GB300 Dynamo TRT MTP accuracy issue (NVIDIA/srt-slurm#51) affects FP8, and the MI355X MoRI SGLang MTP issue (sgl-project/sglang#27194) affects FP4. Co-Authored-By: Claude Fable 5 --- packages/app/src/lib/known-issues.test.ts | 22 +++++++++++----------- packages/app/src/lib/known-issues.ts | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/packages/app/src/lib/known-issues.test.ts b/packages/app/src/lib/known-issues.test.ts index 332ab340..9db134e9 100644 --- a/packages/app/src/lib/known-issues.test.ts +++ b/packages/app/src/lib/known-issues.test.ts @@ -5,32 +5,32 @@ import { KNOWN_CONFIG_ISSUES, knownIssueCsvNote, matchKnownConfigIssues } from ' const DSR1 = 'DeepSeek-R1-0528'; describe('matchKnownConfigIssues', () => { - it('matches the GB300 Dynamo TRT MTP entry for DeepSeek R1 FP4', () => { + it('matches the GB300 Dynamo TRT MTP entry for DeepSeek R1 FP8', () => { const issues = matchKnownConfigIssues(DSR1, [ - { hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp4' }, + { hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp8' }, ]); expect(issues).toHaveLength(1); expect(issues[0].url).toBe('https://github.com/NVIDIA/srt-slurm/issues/51'); }); - it('does not match GB300 Dynamo TRT MTP for non-FP4 precisions', () => { + it('does not match GB300 Dynamo TRT MTP for non-FP8 precisions', () => { const issues = matchKnownConfigIssues(DSR1, [ - { hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp8' }, + { hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp4' }, ]); expect(issues).toHaveLength(0); }); - it('matches the MI355X MoRI SGLang MTP entry for DeepSeek R1 FP8', () => { + it('matches the MI355X MoRI SGLang MTP entry for DeepSeek R1 FP4', () => { const issues = matchKnownConfigIssues(DSR1, [ - { hwKey: 'mi355x_mori-sglang_mtp', precision: 'fp8' }, + { hwKey: 'mi355x_mori-sglang_mtp', precision: 'fp4' }, ]); expect(issues).toHaveLength(1); expect(issues[0].url).toBe('https://github.com/sgl-project/sglang/issues/27194'); }); - it('does not match MI355X MoRI SGLang MTP for non-FP8 precisions', () => { + it('does not match MI355X MoRI SGLang MTP for non-FP4 precisions', () => { const issues = matchKnownConfigIssues(DSR1, [ - { hwKey: 'mi355x_mori-sglang_mtp', precision: 'fp4' }, + { hwKey: 'mi355x_mori-sglang_mtp', precision: 'fp8' }, ]); expect(issues).toHaveLength(0); }); @@ -54,9 +54,9 @@ describe('matchKnownConfigIssues', () => { it('returns each issue at most once even with many matching points', () => { const issues = matchKnownConfigIssues(DSR1, [ - { hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp4' }, - { hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp4' }, - { hwKey: 'mi355x_mori-sglang_mtp', precision: 'fp8' }, + { hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp8' }, + { hwKey: 'gb300_dynamo-trt_mtp', precision: 'fp8' }, + { hwKey: 'mi355x_mori-sglang_mtp', precision: 'fp4' }, ]); expect(issues).toHaveLength(2); }); diff --git a/packages/app/src/lib/known-issues.ts b/packages/app/src/lib/known-issues.ts index 36fb6045..0056008e 100644 --- a/packages/app/src/lib/known-issues.ts +++ b/packages/app/src/lib/known-issues.ts @@ -29,7 +29,7 @@ export const KNOWN_CONFIG_ISSUES: KnownConfigIssue[] = [ { hwKey: 'gb300_dynamo-trt_mtp', model: Model.DeepSeek_R1, - precisions: ['fp4'], + precisions: ['fp8'], summary: 'Accuracy issues', filed: 'Apr 21, 2026', url: 'https://github.com/NVIDIA/srt-slurm/issues/51', @@ -38,7 +38,7 @@ export const KNOWN_CONFIG_ISSUES: KnownConfigIssue[] = [ { hwKey: 'mi355x_mori-sglang_mtp', model: Model.DeepSeek_R1, - precisions: ['fp8'], + precisions: ['fp4'], summary: 'Accuracy issues', filed: 'Jun 4, 2026', url: 'https://github.com/sgl-project/sglang/issues/27194',