Update torch dependency to 2.10

haijieg · haijieg · commit 799a8ab95a07 · 2026-01-28T10:58:46.000-08:00
- update docs

Signed-off-by: Jay Gu &lt;jagu@nvidia.com&gt;
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -54,3 +54,4 @@ Proceed to the :ref:`quickstart` page for installation instructions and a comple
    performance
    operations
    debugging
+   known_issues
diff --git a/docs/source/known_issues.rst b/docs/source/known_issues.rst
@@ -0,0 +1,11 @@
+.. SPDX-FileCopyrightText: Copyright (c) <2025> NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+..
+.. SPDX-License-Identifier: Apache-2.0
+
+
+Known Issues
+============
+
+1. FP8 Torch Tensor requires `torch>=2.10`. Older version of PyTorch does not support converting fp8
+   datatype through `dlpack` protocol and will `leak memory <https://github.com/pytorch/pytorch/issues/171820>`__
+   when conversion to dlpack tensor fails.
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
@@ -116,5 +116,5 @@ Developer Tools
 This profile can then be loaded in a graphical instance of Nsight Compute and the kernel ``vector_add`` selected to see statistics about the kernel.
 
 .. note:: 
-    Capturing detailed statistics for cuTile Python kernels requires running on NVIDIA Driver r590 or later.
+    Capturing detailed statistics for cuTile Python kernels requires running on NVIDIA Driver equals or later than r580.126.09 (linux) or r582.16 (windows).
 
diff --git a/test/requirements.txt b/test/requirements.txt
@@ -15,4 +15,4 @@ numba-cuda[cu13]==0.20.0
 cupy-cuda13x==13.6.0
 
 --extra-index-url https://download.pytorch.org/whl/cu130
-torch==2.9.0
+torch==2.10.0
diff --git a/test/test_mma.py b/test/test_mma.py
@@ -121,7 +121,7 @@ def test_mma_fp8(tile_size, case):
     scale = torch.tensor([1.0], dtype=torch.float32, device="cuda")
     try:
         ref = torch._scaled_mm(A, B.T, scale, scale, out_dtype=C.dtype) + C
-    except RuntimeError as e:
+    except (RuntimeError, ValueError) as e:
         assert 'Multiplication of two Float8_e5m2 matrices is not supported' in str(e)
         ref = None
     ct.launch(torch.cuda.current_stream(), (1,), mma_kernel,
@@ -270,7 +270,7 @@ def test_matmul_fp8(tile_size, dtype):
     scale = torch.tensor([1.0], dtype=torch.float32, device="cuda")
     try:
         ref = torch._scaled_mm(A, B.T, scale, scale, out_dtype=torch.float16).to(dtype)
-    except RuntimeError as e:
+    except (RuntimeError, ValueError) as e:
         assert 'Multiplication of two Float8_e5m2 matrices is not supported' in str(e)
         ref = None
     ct.launch(torch.cuda.current_stream(), (1,), matmul_kernel,