@@ -152,8 +152,7 @@ endif()
152152# retention.
153153if (_cuda_is_msvc_toolchain)
154154 target_link_libraries (
155- aoti_cuda_shims PRIVATE cuda_platform CUDA::cudart
156- ${CMAKE_DL_LIBS}
155+ aoti_cuda_shims PRIVATE cuda_platform CUDA::cudart ${CMAKE_DL_LIBS}
157156 )
158157 # Link object library directly so symbols are pulled exactly once while
159158 # avoiding duplicate static/object inclusion and interface leakage.
@@ -179,10 +178,10 @@ install(
179178
180179# CUDA-specific AOTI sampler shim symbols (rand/randint via curand). Split out
181180# of aoti_cuda_shims so the curand fatbin (~3.5MB precalc tables + Philox
182- # kernels per arch) and the CUDA::curand dependency are only paid by the
183- # small set of consumers that actually use them (e.g. qwen3_5_moe). Other
184- # CUDA examples (voxtral, parakeet, whisper, dinov2, ...) link only
185- # aoti_cuda_shims and stay small.
181+ # kernels per arch) and the CUDA::curand dependency are only paid by the small
182+ # set of consumers that actually use them (e.g. qwen3_5_moe). Other CUDA
183+ # examples (voxtral, parakeet, whisper, dinov2, ...) link only aoti_cuda_shims
184+ # and stay small.
186185if (CMAKE_CUDA_COMPILER)
187186 add_library (aoti_cuda_sampler_shims SHARED runtime/shims/rand.cu )
188187
@@ -217,19 +216,28 @@ if(CMAKE_CUDA_COMPILER)
217216 endif ()
218217
219218 # rand.cu calls into slim helpers (empty_strided, getCurrentCUDAStream,
220- # SlimTensor) which are linked into aoti_cuda_shims. Depend on that target
221- # so we resolve those symbols from the already-loaded shims library
222- # instead of duplicating slim's static archive into both DLLs.
219+ # SlimTensor) which are linked into aoti_cuda_shims. Depend on that target so
220+ # we resolve those symbols from the already-loaded shims library instead of
221+ # duplicating slim's static archive into both DLLs.
222+ #
223+ # Also link `slimtensor` (INTERFACE / header-only) directly so the c10 include
224+ # root (runtime/core/portable_type/c10) is on this target's compile command.
225+ # aoti_cuda_shims links aoti_common_shims_slim PUBLIC on non-MSVC (so includes
226+ # propagate transitively on Linux) but only PRIVATELY via the *_obj OBJECT lib
227+ # on MSVC, which does NOT forward the slimtensor INTERFACE include dirs.
228+ # Linking slimtensor here makes the include path explicit on both toolchains
229+ # and keeps Windows MSVC happy without changing aoti_cuda_shims' propagation
230+ # semantics.
223231 if (_cuda_is_msvc_toolchain)
224232 target_link_libraries (
225233 aoti_cuda_sampler_shims
226234 PRIVATE cuda_platform CUDA::cudart CUDA::curand ${CMAKE_DL_LIBS}
227- aoti_cuda_shims
235+ aoti_cuda_shims slimtensor
228236 )
229237 else ()
230238 target_link_libraries (
231239 aoti_cuda_sampler_shims
232- PRIVATE cuda_platform
240+ PRIVATE cuda_platform slimtensor
233241 PUBLIC CUDA::cudart CUDA::curand ${CMAKE_DL_LIBS} aoti_cuda_shims
234242 )
235243 endif ()
0 commit comments