@@ -913,6 +913,32 @@ StringRef normalizeARMGPUTarget(StringRef target) {
913913// cooperative matrix layouts are opaque. We need to create NVIDIA specific WMMA
914914// intrinsics if we need to have explicit layout analysis and register mapping.
915915
916+ // Reports initial NVIDIA Blackwell 12.1 target capabilities for GPU target
917+ // selection. CUDA execution limits are based on sm_121 GB10 device properties.
918+ const WgpDetails *getBlackwellWgpDetails () {
919+ static const MMAIntrinsic mmaOps[] = {
920+ MMAIntrinsic::NV_MMA_SYNC_F32_16x8x16_F16,
921+ MMAIntrinsic::NV_MMA_SYNC_F16_16x8x16_F16,
922+ MMAIntrinsic::NV_MMA_SYNC_F32_16x8x16_BF16,
923+ MMAIntrinsic::NV_WMMA_F32_16x16x16_F16,
924+ MMAIntrinsic::NV_WMMA_F16_16x16x16_F16,
925+ };
926+ static const WgpDetails blackwellWgp = {allComputeBits,
927+ allStorageBits,
928+ allSubgroupOps,
929+ allDotProductOps,
930+ std::size (mmaOps),
931+ mmaOps,
932+ 0 ,
933+ nullptr ,
934+ {32 , 32 },
935+ {1024 , 1024 , 64 },
936+ 1024 ,
937+ 99 * 1024 ,
938+ {0x7fffffff , 0xffff , 0xffff }};
939+ return &blackwellWgp;
940+ }
941+
916942// Reports Ampere-class NVIDIA tensor core capabilities for GPU target
917943// selection.
918944const WgpDetails *getAmpereWgpDetails () {
@@ -1000,6 +1026,7 @@ const WgpDetails *getPascalWgpDetails() {
10001026
10011027// Maps NVIDIA target aliases to the GPU capability model used by codegen.
10021028std::optional<TargetDetails> getNVIDIAGPUTargetDetails (StringRef target) {
1029+ const WgpDetails *blackwellWgp = getBlackwellWgpDetails ();
10031030 const WgpDetails *ampereWgp = getAmpereWgpDetails ();
10041031 const WgpDetails *turingWgp = getTuringWgpDetails ();
10051032 const WgpDetails *voltaWgp = getVoltaWgpDetails ();
@@ -1038,6 +1065,10 @@ std::optional<TargetDetails> getNVIDIAGPUTargetDetails(StringRef target) {
10381065 .Case (" rtx3070ti" , TargetDetails{ampereWgp, &rtx3070tiChip})
10391066 // https://www.techpowerup.com/gpu-specs/geforce-rtx-3070.c3674
10401067 .Case (" rtx3070" , TargetDetails{ampereWgp, &rtx3070Chip})
1068+ // Initial support for sm_121 / GB10. Other Blackwell compute
1069+ // capabilities, including sm_120, are intentionally left for follow-up
1070+ // validation.
1071+ .Case (" sm_121" , TargetDetails{blackwellWgp, nullptr })
10411072 .Cases ({" ada" , " sm_89" }, TargetDetails{ampereWgp, nullptr })
10421073 .Cases ({" ampere" , " sm_80" , " sm_86" , " sm_87" },
10431074 TargetDetails{ampereWgp, nullptr })
0 commit comments