pytorch
diff --git a/‎backends/cuda/CMakeLists.txt‎
Lines changed: 3 additions & 1 deletion b/‎backends/cuda/CMakeLists.txt‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎backends/cuda/runtime/TARGETS‎
Lines changed: 2 additions & 0 deletions b/‎backends/cuda/runtime/TARGETS‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/cuda/runtime/cuda_backend.cpp‎
Lines changed: 11 additions & 0 deletions b/‎backends/cuda/runtime/cuda_backend.cpp‎
Lines changed: 11 additions & 0 deletions
@@ -179,7 +179,9 @@ install(
 )
 
 # CUDA backend implementation
-set(_aoti_cuda_backend_sources runtime/cuda_backend.cpp)
+set(_aoti_cuda_backend_sources runtime/cuda_backend.cpp
+                               runtime/cuda_mutable_state.cpp
+)
 if(_cuda_is_msvc_toolchain)
   # MSVC links aoti_cuda_backend into portable_lib without relying on C++
   # symbols exported from aoti_cuda_shims.dll.
 
@@ -105,9 +105,11 @@ runtime.cxx_library(
     name = "cuda_backend",
     srcs = [
         "cuda_backend.cpp",
+        "cuda_mutable_state.cpp",
     ],
     headers = [
         "cuda_delegate_handle.h",
+        "cuda_mutable_state.h",
     ],
     # @lint-ignore BUCKLINT: Avoid `link_whole=True` (https://fburl.com/avoid-link-whole)
     link_whole = True,
 
@@ -42,6 +42,7 @@
 #include <executorch/backends/aoti/utils.h>
 #include <executorch/backends/cuda/runtime/cuda_allocator.h>
 #include <executorch/backends/cuda/runtime/cuda_delegate_handle.h>
+#include <executorch/backends/cuda/runtime/cuda_mutable_state.h>
 #include <executorch/backends/cuda/runtime/platform/platform.h>
 #include <executorch/backends/cuda/runtime/shims/memory.h>
 #include <executorch/backends/cuda/runtime/utils.h>
@@ -466,6 +467,10 @@ class ET_EXPERIMENTAL CudaBackend final
           kCudaGraphWarmupSteps);
     }
 
+    // Record whether this AOTI build exposes the constant-management symbols
+    // needed for per-session mutable-buffer rebinding (CUDA V2 multi-session).
+    mutable_state_note_handle(handle);
+
     return (DelegateHandle*)handle; // Return the handle post-processing
   }
 
@@ -514,6 +519,12 @@ class ET_EXPERIMENTAL CudaBackend final
           static_cast<int>(device_type));
     }
 
+    // CUDA V2 multi-session: if a logical session is active on this thread,
+    // rebind this container's mutable constants (KV/conv/recurrent) to the
+    // session's own GPU buffers before running. No-op for
+    // single-session/legacy.
+    ET_CHECK_OK_OR_RETURN_ERROR(mutable_state_rebind_for_execute(handle));
+
     // ---------------------------------------------------------------
     // CUDA graph REPLAY path — skip all tensor setup and just replay
     // ---------------------------------------------------------------