From 3ca8bf425d78fa4488c66020157b12b9d3562539 Mon Sep 17 00:00:00 2001 From: Teja Alaghari Date: Tue, 15 Apr 2025 13:06:58 +0800 Subject: [PATCH] Added migration logic for RMA, RMA_NBI, Memory Ord, Signal & Collective ops --- clang/lib/DPCT/RulesSHMEM/APINamesNvshmem.inc | 438 ++++- .../DPCT/RulesSHMEM/NVSHMEMAPIMigration.cpp | 122 +- clang/lib/DPCT/SrcAPI/APINames_nvSHMEM.inc | 1474 +++++++-------- clang/test/dpct/nvshmem/coll_ops.cu | 16 + clang/test/dpct/nvshmem/mem_order.cu | 30 + clang/test/dpct/nvshmem/rma.cu | 1653 +++++++++++++++++ clang/test/dpct/nvshmem/rma_nbi.cu | 804 ++++++++ 7 files changed, 3786 insertions(+), 751 deletions(-) create mode 100644 clang/test/dpct/nvshmem/coll_ops.cu create mode 100644 clang/test/dpct/nvshmem/mem_order.cu create mode 100644 clang/test/dpct/nvshmem/rma.cu diff --git a/clang/lib/DPCT/RulesSHMEM/APINamesNvshmem.inc b/clang/lib/DPCT/RulesSHMEM/APINamesNvshmem.inc index bc84423bef3c..b6cac092ad30 100644 --- a/clang/lib/DPCT/RulesSHMEM/APINamesNvshmem.inc +++ b/clang/lib/DPCT/RulesSHMEM/APINamesNvshmem.inc @@ -5,6 +5,199 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +// Helper macros for stringizing +#define STRINGIZE(x) #x +#define EXPAND_AND_STRINGIZE(x) STRINGIZE(x) + +// Helper function for registering API rules for starndard RMA data types +#define FOR_ALL_STANDARD_RMA_TYPES(PREFIX, MIGRATION_RULE, OP, N_POSTFIX, \ + I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, float, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, double, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, char, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, short, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, schar, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, int, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, long, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, longlong, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, uchar, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, ushort, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, uint, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, ulong, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, ulonglong, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, int8, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, int16, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, int32, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, int64, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, uint8, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, uint16, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, uint32, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, uint64, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, size, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, ptrdiff, N_POSTFIX, I_POSTFIX) + +// Helper function for registering API rules for starndard RMA data types +#define FOR_ALL_SIZES(PREFIX, MIGRATION_RULE, OP, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, 8, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, 16, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, 32, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, 64, N_POSTFIX, I_POSTFIX) \ + MIGRATION_RULE(PREFIX, OP, 128, N_POSTFIX, I_POSTFIX) + +// Helper function for registering API rule +#define REGISTER_NVSHMEM_RULE(PREFIX, OP, TYPE, N_POSTFIX, I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##TYPE##_##OP##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##I_POSTFIX), ARG(0), \ + ARG(1), ARG(2), ARG(3)))) + +#define REGISTER_STREAM_NVSHMEM_RULE(PREFIX, OP, TYPE, N_POSTFIX, I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##TYPE##_##OP##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##I_POSTFIX), ARG(0), \ + ARG(1), ARG(2), ARG(3), DEREF(4)))) + +#define REGISTER_BLOCK_NVSHMEM_RULE(PREFIX, OP, TYPE, N_POSTFIX, I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##TYPE##_##OP##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##I_POSTFIX), ARG(0), \ + ARG(1), ARG(2), ARG(3), GROUP))) + +#define REGISTER_WARP_NVSHMEM_RULE(PREFIX, OP, TYPE, N_POSTFIX, I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##TYPE##_##OP##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##I_POSTFIX), ARG(0), \ + ARG(1), ARG(2), ARG(3), SUBGROUP))) + +#define REGISTER_SIZE_NVSHMEM_RULE(PREFIX, OP, SIZE, N_POSTFIX, I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##OP##SIZE##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##SIZE##I_POSTFIX), \ + ARG(0), ARG(1), ARG(2), ARG(3)))) + +#define REGISTER_SIZE_STREAM_NVSHMEM_RULE(PREFIX, OP, SIZE, N_POSTFIX, \ + I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##OP##SIZE##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##SIZE##I_POSTFIX), \ + ARG(0), ARG(1), ARG(2), ARG(3), DEREF(4)))) + +#define REGISTER_SIZE_BLOCK_NVSHMEM_RULE(PREFIX, OP, SIZE, N_POSTFIX, \ + I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##OP##SIZE##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##SIZE##I_POSTFIX), \ + ARG(0), ARG(1), ARG(2), ARG(3), GROUP))) + +#define REGISTER_SIZE_WARP_NVSHMEM_RULE(PREFIX, OP, SIZE, N_POSTFIX, \ + I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##OP##SIZE##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##SIZE##I_POSTFIX), \ + ARG(0), ARG(1), ARG(2), ARG(3), SUBGROUP))) + +#define REGISTER_STRIDED_NVSHMEM_RULE(PREFIX, OP, TYPE, N_POSTFIX, I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##TYPE##_##OP##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##I_POSTFIX), ARG(0), \ + ARG(1), ARG(2), ARG(3), ARG(4), ARG(5)))) + +#define REGISTER_STRIDED_STREAM_NVSHMEM_RULE(PREFIX, OP, TYPE, N_POSTFIX, \ + I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##TYPE##_##OP##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##I_POSTFIX), ARG(0), \ + ARG(1), ARG(2), ARG(3), ARG(4), ARG(5), DEREF(6)))) + +#define REGISTER_STRIDED_BLOCK_NVSHMEM_RULE(PREFIX, OP, TYPE, N_POSTFIX, \ + I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##TYPE##_##OP##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##I_POSTFIX), ARG(0), \ + ARG(1), ARG(2), ARG(3), ARG(4), ARG(5), GROUP))) + +#define REGISTER_STRIDED_WARP_NVSHMEM_RULE(PREFIX, OP, TYPE, N_POSTFIX, \ + I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##TYPE##_##OP##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##I_POSTFIX), ARG(0), \ + ARG(1), ARG(2), ARG(3), ARG(4), ARG(5), SUBGROUP))) + +#define REGISTER_STRIDED_SIZE_NVSHMEM_RULE(PREFIX, OP, SIZE, N_POSTFIX, \ + I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##OP##SIZE##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##SIZE##I_POSTFIX), \ + ARG(0), ARG(1), ARG(2), ARG(3), ARG(4), ARG(5)))) + +#define REGISTER_STRIDED_SIZE_STREAM_NVSHMEM_RULE(PREFIX, OP, SIZE, N_POSTFIX, \ + I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##OP##SIZE##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##SIZE##I_POSTFIX), \ + ARG(0), ARG(1), ARG(2), ARG(3), ARG(4), ARG(5), DEREF(6)))) + +#define REGISTER_STRIDED_SIZE_BLOCK_NVSHMEM_RULE(PREFIX, OP, SIZE, N_POSTFIX, \ + I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##OP##SIZE##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##SIZE##I_POSTFIX), \ + ARG(0), ARG(1), ARG(2), ARG(3), ARG(4), ARG(5), GROUP))) + +#define REGISTER_STRIDED_SIZE_WARP_NVSHMEM_RULE(PREFIX, OP, SIZE, N_POSTFIX, \ + I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##OP##SIZE##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##SIZE##I_POSTFIX), \ + ARG(0), ARG(1), ARG(2), ARG(3), ARG(4), ARG(5), SUBGROUP))) + +#define REGISTER_P_NVSHMEM_RULE(PREFIX, OP, TYPE, N_POSTFIX, I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##TYPE##_##OP##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##I_POSTFIX), ARG(0), \ + ARG(1), ARG(2)))) + +#define REGISTER_G_NVSHMEM_RULE(PREFIX, OP, TYPE, N_POSTFIX, I_POSTFIX) \ + FEATURE_REQUEST_FACTORY( \ + HelperFeatureEnum::device_ext, \ + CALL_FACTORY_ENTRY( \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##TYPE##_##OP##N_POSTFIX), \ + CALL(EXPAND_AND_STRINGIZE(ishmem##PREFIX##_##OP##I_POSTFIX), ARG(0), \ + ARG(1)))) // Library Setup, Exit & Query FEATURE_REQUEST_FACTORY(HelperFeatureEnum::device_ext, @@ -96,22 +289,209 @@ FEATURE_REQUEST_FACTORY(HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("nvshmem_team_destroy", CALL("ishmem_team_destroy", ARG(0)))) -// Nonblocking RMA +// RMA +// nvshmem_TYPENAME_put +FOR_ALL_STANDARD_RMA_TYPES(, REGISTER_NVSHMEM_RULE, put, , ) +// nvshmemx_TYPENAME_put_on_stream +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_STREAM_NVSHMEM_RULE, put, _on_stream, + _on_queue) +// nvshmemx_TYPENAME_put_block +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_BLOCK_NVSHMEM_RULE, put, _block, + _work_group) +// nvshmemx_TYPENAME_put_warp +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_WARP_NVSHMEM_RULE, put, _warp, + _work_group) + +// nvshmem_putSIZE +FOR_ALL_SIZES(, REGISTER_SIZE_NVSHMEM_RULE, put, , ) +// nvshmemx_putSIZE_on_stream +FOR_ALL_SIZES(x, REGISTER_SIZE_STREAM_NVSHMEM_RULE, put, _on_stream, _on_queue) +// nvshmem_putSIZE_block +FOR_ALL_SIZES(x, REGISTER_SIZE_BLOCK_NVSHMEM_RULE, put, _block, _work_group) +// nvshmem_putSIZE_warp +FOR_ALL_SIZES(x, REGISTER_SIZE_WARP_NVSHMEM_RULE, put, _warp, _work_group) + +// nvshmem_TYPENAME_iput +FOR_ALL_STANDARD_RMA_TYPES(, REGISTER_STRIDED_NVSHMEM_RULE, iput, , ) +// nvshmemx_TYPENAME_iput_on_stream +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_STRIDED_STREAM_NVSHMEM_RULE, iput, + _on_stream, _on_queue) +// nvshmemx_TYPENAME_iput_block +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_STRIDED_BLOCK_NVSHMEM_RULE, iput, _block, + _work_group) +// nvshmemx_TYPENAME_iput_warp +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_STRIDED_WARP_NVSHMEM_RULE, iput, _warp, + _work_group) + +// nvshmem_iputSIZE +FOR_ALL_SIZES(, REGISTER_STRIDED_SIZE_NVSHMEM_RULE, iput, , ) +// nvshmem_iputSIZE_on_stream +FOR_ALL_SIZES(x, REGISTER_STRIDED_SIZE_STREAM_NVSHMEM_RULE, iput, _on_stream, + _on_queue) +// nvshmem_iputSIZE_block +FOR_ALL_SIZES(x, REGISTER_STRIDED_SIZE_BLOCK_NVSHMEM_RULE, iput, _block, + _work_group) +// nvshmem_iputSIZE_warp +FOR_ALL_SIZES(x, REGISTER_STRIDED_SIZE_WARP_NVSHMEM_RULE, iput, _warp, + _work_group) + +// nvshmem_putmem +REGISTER_SIZE_NVSHMEM_RULE(, put, mem, , ) +// nvshmem_putmem_on_stream +REGISTER_SIZE_STREAM_NVSHMEM_RULE(x, put, mem, _on_stream, _on_queue) +// nvshmem_putmem_block +REGISTER_SIZE_BLOCK_NVSHMEM_RULE(x, put, mem, _block, _work_group) +// nvshmem_putmem_warp +REGISTER_SIZE_WARP_NVSHMEM_RULE(x, put, mem, _warp, _work_group) + +// nvshmem_TYPENAME_p +FOR_ALL_STANDARD_RMA_TYPES(, REGISTER_P_NVSHMEM_RULE, p, , ) + +// nvshmem_TYPENAME_get +FOR_ALL_STANDARD_RMA_TYPES(, REGISTER_NVSHMEM_RULE, get, , ) +// nvshmemx_TYPENAME_get_on_stream +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_STREAM_NVSHMEM_RULE, get, _on_stream, + _on_queue) +// nvshmemx_TYPENAME_get_block +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_BLOCK_NVSHMEM_RULE, get, _block, + _work_group) +// nvshmemx_TYPENAME_get_warp +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_WARP_NVSHMEM_RULE, get, _warp, + _work_group) + +// nvshmem_getSIZE +FOR_ALL_SIZES(, REGISTER_SIZE_NVSHMEM_RULE, get, , ) +// nvshmem_getSIZE_on_stream +FOR_ALL_SIZES(x, REGISTER_SIZE_STREAM_NVSHMEM_RULE, get, _on_stream, _on_queue) +// nvshmem_getSIZE_block +FOR_ALL_SIZES(x, REGISTER_SIZE_BLOCK_NVSHMEM_RULE, get, _block, _work_group) +// nvshmem_getSIZE_warp +FOR_ALL_SIZES(x, REGISTER_SIZE_WARP_NVSHMEM_RULE, get, _warp, _work_group) + +// nvshmem_TYPENAME_iget +FOR_ALL_STANDARD_RMA_TYPES(, REGISTER_STRIDED_NVSHMEM_RULE, iget, , ) +// nvshmemx_TYPENAME_iget_on_stream +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_STRIDED_STREAM_NVSHMEM_RULE, iget, + _on_stream, _on_queue) +// nvshmemx_TYPENAME_iget_block +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_STRIDED_BLOCK_NVSHMEM_RULE, iget, _block, + _work_group) +// nvshmemx_TYPENAME_iget_warp +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_STRIDED_WARP_NVSHMEM_RULE, iget, _warp, + _work_group) + +// nvshmem_igetSIZE +FOR_ALL_SIZES(, REGISTER_STRIDED_SIZE_NVSHMEM_RULE, iget, , ) +// nvshmem_igetSIZE_on_stream +FOR_ALL_SIZES(x, REGISTER_STRIDED_SIZE_STREAM_NVSHMEM_RULE, iget, _on_stream, + _on_queue) +// nvshmem_igetSIZE_block +FOR_ALL_SIZES(x, REGISTER_STRIDED_SIZE_BLOCK_NVSHMEM_RULE, iget, _block, + _work_group) +// nvshmem_igetSIZE_warp +FOR_ALL_SIZES(x, REGISTER_STRIDED_SIZE_WARP_NVSHMEM_RULE, iget, _warp, + _work_group) + +// nvshmem_getmem +REGISTER_SIZE_NVSHMEM_RULE(, get, mem, , ) +// nvshmem_getmem_on_stream +REGISTER_SIZE_STREAM_NVSHMEM_RULE(x, get, mem, _on_stream, _on_queue) +// nvshmem_getmem_block +REGISTER_SIZE_BLOCK_NVSHMEM_RULE(x, get, mem, _block, _work_group) +// nvshmem_getmem_warp +REGISTER_SIZE_WARP_NVSHMEM_RULE(x, get, mem, _warp, _work_group) + +// nvshmem_TYPENAME_g +FOR_ALL_STANDARD_RMA_TYPES(, REGISTER_G_NVSHMEM_RULE, g, , ) + +// Nonblocing RMA +// nvshmem_TYPENAME_put_nbi +FOR_ALL_STANDARD_RMA_TYPES(, REGISTER_NVSHMEM_RULE, put, _nbi, _nbi) +// nvshmemx_TYPENAME_put_nbi_on_stream +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_STREAM_NVSHMEM_RULE, put, _nbi_on_stream, + _nbi_on_queue) +// nvshmemx_TYPENAME_put_nbi_block +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_BLOCK_NVSHMEM_RULE, put, _nbi_block, + _nbi_work_group) +// nvshmemx_TYPENAME_put_nbi_warp +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_WARP_NVSHMEM_RULE, put, _nbi_warp, + _nbi_work_group) + +// nvshmem_putSIZE_nbi +FOR_ALL_SIZES(, REGISTER_SIZE_NVSHMEM_RULE, put, _nbi, _nbi) +// nvshmemx_putSIZE_nbi_on_stream +FOR_ALL_SIZES(x, REGISTER_SIZE_STREAM_NVSHMEM_RULE, put, _nbi_on_stream, + _nbi_on_queue) +// nvshmem_putSIZE_nbi_block +FOR_ALL_SIZES(x, REGISTER_SIZE_BLOCK_NVSHMEM_RULE, put, _nbi_block, + _nbi_work_group) +// nvshmem_putSIZE_nbi_warp +FOR_ALL_SIZES(x, REGISTER_SIZE_WARP_NVSHMEM_RULE, put, _nbi_warp, + _nbi_work_group) + +// nvshmem_putmem_nbi +REGISTER_SIZE_NVSHMEM_RULE(, put, mem, _nbi, _nbi) +// nvshmem_putmem_nbi_on_stream +REGISTER_SIZE_STREAM_NVSHMEM_RULE(x, put, mem, _nbi_on_stream, _nbi_on_queue) +// nvshmem_putmem_nbi_block +REGISTER_SIZE_BLOCK_NVSHMEM_RULE(x, put, mem, _nbi_block, _nbi_work_group) +// nvshmem_putmem_nbi_warp +REGISTER_SIZE_WARP_NVSHMEM_RULE(x, put, mem, _nbi_warp, _nbi_work_group) + +// nvshmem_TYPENAME_get_nbi +FOR_ALL_STANDARD_RMA_TYPES(, REGISTER_NVSHMEM_RULE, get, _nbi, _nbi) +// nvshmemx_TYPENAME_get_nbi_on_stream +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_STREAM_NVSHMEM_RULE, get, _nbi_on_stream, + _nbi_on_queue) +// nvshmemx_TYPENAME_get_nbi_block +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_BLOCK_NVSHMEM_RULE, get, _nbi_block, + _nbi_work_group) +// nvshmemx_TYPENAME_get_nbi_warp +FOR_ALL_STANDARD_RMA_TYPES(x, REGISTER_WARP_NVSHMEM_RULE, get, _nbi_warp, + _nbi_work_group) + +// nvshmem_getSIZE_nbi +FOR_ALL_SIZES(, REGISTER_SIZE_NVSHMEM_RULE, get, _nbi, _nbi) +// nvshmemx_getSIZE_nbi_on_stream +FOR_ALL_SIZES(x, REGISTER_SIZE_STREAM_NVSHMEM_RULE, get, _nbi_on_stream, + _nbi_on_queue) +// nvshmem_getSIZE_nbi_block +FOR_ALL_SIZES(x, REGISTER_SIZE_BLOCK_NVSHMEM_RULE, get, _nbi_block, + _nbi_work_group) +// nvshmem_getSIZE_nbi_warp +FOR_ALL_SIZES(x, REGISTER_SIZE_WARP_NVSHMEM_RULE, get, _nbi_warp, + _nbi_work_group) + +// nvshmem_getmem_nbi +REGISTER_SIZE_NVSHMEM_RULE(, get, mem, _nbi, _nbi) +// nvshmem_getmem_nbi_on_stream +REGISTER_SIZE_STREAM_NVSHMEM_RULE(x, get, mem, _nbi_on_stream, _nbi_on_queue) +// nvshmem_getmem_nbi_block +REGISTER_SIZE_BLOCK_NVSHMEM_RULE(x, get, mem, _nbi_block, _nbi_work_group) +// nvshmem_getmem_nbi_warp +REGISTER_SIZE_WARP_NVSHMEM_RULE(x, get, mem, _nbi_warp, _nbi_work_group) + +// Memory Ordering FEATURE_REQUEST_FACTORY(HelperFeatureEnum::device_ext, - CALL_FACTORY_ENTRY("nvshmem_putmem_nbi", - CALL("ishmem_putmem_nbi", ARG(0), - ARG(1), ARG(2), ARG(3)))) + CALL_FACTORY_ENTRY("nvshmem_fence", + CALL("ishmem_fence"))) -// Signalling Operations -HEADER_INSERT_FACTORY(HeaderType::HT_DPCT_SHMEM_Utils, - FEATURE_REQUEST_FACTORY( - HelperFeatureEnum::device_ext, - CALL_FACTORY_ENTRY("nvshmemx_signal_op", - CALL(MapNames::getDpctNamespace() + - "shmemx::signal_op", - ARG(0), ARG(1), ARG(2), - ARG(3))))) +FEATURE_REQUEST_FACTORY(HelperFeatureEnum::device_ext, + CALL_FACTORY_ENTRY("nvshmem_quiet", + CALL("ishmem_quiet"))) +FEATURE_REQUEST_FACTORY(HelperFeatureEnum::device_ext, + CALL_FACTORY_ENTRY("nvshmemx_quiet_on_stream", + CALL("ishmemx_quiet_on_queue", + DEREF(0)))) + +// Collective Operations +FEATURE_REQUEST_FACTORY(HelperFeatureEnum::device_ext, + CALL_FACTORY_ENTRY("nvshmemx_barrier_all_on_stream", + CALL("ishmemx_barrier_all_on_queue", + DEREF(0)))) + +// Signalling Operations FEATURE_REQUEST_FACTORY(HelperFeatureEnum::device_ext, CALL_FACTORY_ENTRY("nvshmem_signal_wait_until", CALL("ishmem_signal_wait_until", @@ -122,3 +502,35 @@ FEATURE_REQUEST_FACTORY(HelperFeatureEnum::device_ext, CALL("ishmem_putmem_signal_nbi", ARG(0), ARG(1), ARG(2), ARG(3), ARG(4), ARG(5), ARG(6)))) + +HEADER_INSERT_FACTORY(HeaderType::HT_DPCT_SHMEM_Utils, + FEATURE_REQUEST_FACTORY( + HelperFeatureEnum::device_ext, + CALL_FACTORY_ENTRY("nvshmemx_signal_op", + CALL(MapNames::getDpctNamespace() + + "shmemx::signal_op", + ARG(0), ARG(1), ARG(2), + ARG(3))))) + +#undef STRINGIZE +#undef EXPAND_AND_STRINGIZE +#undef FOR_ALL_STANDARD_RMA_TYPES +#undef FOR_ALL_SIZES +#undef REGISTER_NVSHMEM_RULE +#undef REGISTER_STREAM_NVSHMEM_RULE +#undef REGISTER_BLOCK_NVSHMEM_RULE +#undef REGISTER_WARP_NVSHMEM_RULE +#undef REGISTER_SIZE_NVSHMEM_RULE +#undef REGISTER_SIZE_STREAM_NVSHMEM_RULE +#undef REGISTER_SIZE_BLOCK_NVSHMEM_RULE +#undef REGISTER_SIZE_WARP_NVSHMEM_RULE +#undef REGISTER_STRIDED_NVSHMEM_RULE +#undef REGISTER_STRIDED_STREAM_NVSHMEM_RULE +#undef REGISTER_STRIDED_BLOCK_NVSHMEM_RULE +#undef REGISTER_STRIDED_WARP_NVSHMEM_RULE +#undef REGISTER_STRIDED_SIZE_NVSHMEM_RULE +#undef REGISTER_STRIDED_SIZE_STREAM_NVSHMEM_RULE +#undef REGISTER_STRIDED_SIZE_BLOCK_NVSHMEM_RULE +#undef REGISTER_STRIDED_SIZE_WARP_NVSHMEM_RULE +#undef REGISTER_P_NVSHMEM_RULE +#undef REGISTER_G_NVSHMEM_RULE diff --git a/clang/lib/DPCT/RulesSHMEM/NVSHMEMAPIMigration.cpp b/clang/lib/DPCT/RulesSHMEM/NVSHMEMAPIMigration.cpp index 33126f0f28e3..fb12df2fbb98 100644 --- a/clang/lib/DPCT/RulesSHMEM/NVSHMEMAPIMigration.cpp +++ b/clang/lib/DPCT/RulesSHMEM/NVSHMEMAPIMigration.cpp @@ -12,6 +12,41 @@ using namespace clang::dpct; using namespace clang::ast_matchers; +#define STRINGIZE(x) #x +#define EXPAND_AND_STRINGIZE(x) STRINGIZE(x) + +#define FOR_ALL_STANDARD_RMA_TYPES(PREFIX, POSTFIX) \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##float##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##double##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##char##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##schar##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##short##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##int##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##long##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##longlong##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##uchar##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##ushort##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##uint##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##ulong##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##ulonglong##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##int8##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##int16##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##int32##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##int64##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##uint8##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##uint16##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##uint32##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##uint64##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##size##_##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##ptrdiff##_##POSTFIX) + +#define FOR_ALL_SIZES(PREFIX, OP, POSTFIX) \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##OP##8##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##OP##16##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##OP##32##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##OP##64##POSTFIX), \ + EXPAND_AND_STRINGIZE(nvshmem##PREFIX##_##OP##128##POSTFIX) + void clang::dpct::NVSHMEMRule::registerMatcher(ast_matchers::MatchFinder &MF) { auto NvshmemAPI = [&]() { return hasAnyName( @@ -25,8 +60,88 @@ void clang::dpct::NVSHMEMRule::registerMatcher(ast_matchers::MatchFinder &MF) { "nvshmem_team_my_pe", "nvshmem_team_n_pes", "nvshmem_team_get_config", "nvshmem_team_translate_pe", "nvshmem_team_split_strided", "nvshmem_team_split_2d", "nvshmem_team_destroy", + // RMA + FOR_ALL_STANDARD_RMA_TYPES(, put) /*nvshmem_TYPENAME_put*/, + FOR_ALL_STANDARD_RMA_TYPES( + x, put_on_stream) /*nvshmemx_TYPENAME_put_on_stream*/, + FOR_ALL_STANDARD_RMA_TYPES(x, + put_block) /*nvshmemx_TYPENAME_put_block*/, + FOR_ALL_STANDARD_RMA_TYPES(x, put_warp) /*nvshmemx_TYPENAME_put_warp*/, + FOR_ALL_SIZES(, put, ) /*nvshmem_putSIZE*/, + FOR_ALL_SIZES(x, put, _on_stream) /*nvshmemx_putSIZE_on_stream*/, + FOR_ALL_SIZES(x, put, _block) /*nvshmem_putSIZE_block*/, + FOR_ALL_SIZES(x, put, _warp) /*nvshmem_putSIZE_warp*/, + FOR_ALL_STANDARD_RMA_TYPES(, iput) /*nvshmem_TYPENAME_iput*/, + FOR_ALL_STANDARD_RMA_TYPES( + x, iput_on_stream) /*nvshmemx_TYPENAME_iput_on_stream*/, + FOR_ALL_STANDARD_RMA_TYPES(x, + iput_block) /*nvshmemx_TYPENAME_iput_block*/, + FOR_ALL_STANDARD_RMA_TYPES(x, + iput_warp) /*nvshmemx_TYPENAME_iput_warp*/, + FOR_ALL_SIZES(, iput, ) /*nvshmem_iputSIZE*/, + FOR_ALL_SIZES(x, iput, _on_stream) /*nvshmem_iputSIZE_on_stream*/, + FOR_ALL_SIZES(x, iput, _block) /*nvshmem_iputSIZE_block*/, + FOR_ALL_SIZES(x, iput, _warp) /*nvshmem_iputSIZE_warp*/, + "nvshmem_putmem", "nvshmemx_putmem_on_stream", "nvshmemx_putmem_block", + "nvshmemx_putmem_warp", + FOR_ALL_STANDARD_RMA_TYPES(, p) /*nvshmem_TYPENAME_p*/, + FOR_ALL_STANDARD_RMA_TYPES(, get) /*nvshmem_TYPENAME_get*/, + FOR_ALL_STANDARD_RMA_TYPES( + x, get_on_stream) /*nvshmemx_TYPENAME_get_on_stream*/, + FOR_ALL_STANDARD_RMA_TYPES(x, + get_block) /*nvshmemx_TYPENAME_get_block*/, + FOR_ALL_STANDARD_RMA_TYPES(x, get_warp) /*nvshmemx_TYPENAME_get_warp*/, + FOR_ALL_SIZES(, get, ) /*nvshmem_getSIZE*/, + FOR_ALL_SIZES(x, get, _on_stream) /*nvshmem_getSIZE_on_stream*/, + FOR_ALL_SIZES(x, get, _block) /*nvshmem_getSIZE_block*/, + FOR_ALL_SIZES(x, get, _warp) /*nvshmem_getSIZE_warp*/, + FOR_ALL_STANDARD_RMA_TYPES(, iget) /*nvshmem_TYPENAME_iget*/, + FOR_ALL_STANDARD_RMA_TYPES( + x, iget_on_stream) /*nvshmemx_TYPENAME_iget_on_stream*/, + FOR_ALL_STANDARD_RMA_TYPES(x, + iget_block) /*nvshmemx_TYPENAME_iget_block*/, + FOR_ALL_STANDARD_RMA_TYPES(x, + iget_warp) /*nvshmemx_TYPENAME_iget_warp*/, + FOR_ALL_SIZES(, iget, ) /*nvshmem_igetSIZE*/, + FOR_ALL_SIZES(x, iget, _on_stream) /*nvshmem_igetSIZE_on_stream*/, + FOR_ALL_SIZES(x, iget, _block) /*nvshmem_igetSIZE_block*/, + FOR_ALL_SIZES(x, iget, _warp) /*nvshmem_igetSIZE_warp*/, + "nvshmem_getmem", "nvshmemx_getmem_on_stream", "nvshmemx_getmem_block", + "nvshmemx_getmem_warp", + FOR_ALL_STANDARD_RMA_TYPES(, g) /*nvshmem_TYPENAME_g*/, // Nonblocking RMA - "nvshmem_putmem_nbi", + FOR_ALL_STANDARD_RMA_TYPES(, put_nbi) /*nvshmem_TYPENAME_put_nbi*/, + FOR_ALL_STANDARD_RMA_TYPES( + x, put_nbi_on_stream) /*nvshmemx_TYPENAME_put_nbi_on_stream*/, + FOR_ALL_STANDARD_RMA_TYPES( + x, put_nbi_block) /*nvshmemx_TYPENAME_put_nbi_block*/, + FOR_ALL_STANDARD_RMA_TYPES( + x, put_nbi_warp) /*nvshmemx_TYPENAME_put_nbi_warp*/, + FOR_ALL_SIZES(, put, _nbi) /*nvshmem_putSIZE_nbi*/, + FOR_ALL_SIZES(x, put, + _nbi_on_stream) /*nvshmemx_putSIZE_nbi_on_stream*/, + FOR_ALL_SIZES(x, put, _nbi_block) /*nvshmem_putSIZE_nbi_block*/, + FOR_ALL_SIZES(x, put, _nbi_warp) /*nvshmem_putSIZE_nbi_warp*/, + "nvshmem_putmem_nbi", "nvshmemx_putmem_nbi_on_stream", + "nvshmemx_putmem_nbi_block", "nvshmemx_putmem_nbi_warp", + FOR_ALL_STANDARD_RMA_TYPES(, get_nbi) /*nvshmem_TYPENAME_get_nbi*/, + FOR_ALL_STANDARD_RMA_TYPES( + x, get_nbi_on_stream) /*nvshmemx_TYPENAME_get_nbi_on_stream*/, + FOR_ALL_STANDARD_RMA_TYPES( + x, get_nbi_block) /*nvshmemx_TYPENAME_get_nbi_block*/, + FOR_ALL_STANDARD_RMA_TYPES( + x, get_nbi_warp) /*nvshmemx_TYPENAME_get_nbi_warp*/, + FOR_ALL_SIZES(, get, _nbi) /*nvshmem_getSIZE_nbi*/, + FOR_ALL_SIZES(x, get, + _nbi_on_stream) /*nvshmemx_getSIZE_nbi_on_stream*/, + FOR_ALL_SIZES(x, get, _nbi_block) /*nvshmem_getSIZE_nbi_block*/, + FOR_ALL_SIZES(x, get, _nbi_warp) /*nvshmem_getSIZE_nbi_warp*/, + "nvshmem_getmem_nbi", "nvshmemx_getmem_nbi_on_stream", + "nvshmemx_getmem_nbi_block", "nvshmemx_getmem_nbi_warp", + // Memory Ordering + "nvshmem_fence", "nvshmem_quiet", "nvshmemx_quiet_on_stream", + // Collective Operations + "nvshmemx_barrier_all_on_stream", // Signalling Operations "nvshmemx_signal_op", "nvshmem_signal_wait_until", "nvshmem_putmem_signal_nbi"); @@ -112,3 +227,8 @@ void clang::dpct::NVSHMEMRule::runRule( emplaceTransformation(EA.getReplacement()); EA.applyAllSubExprRepl(); } + +#undef STRINGIZE +#undef EXPAND_AND_STRINGIZE +#undef FOR_ALL_STANDARD_RMA_TYPES +#undef FOR_ALL_SIZES diff --git a/clang/lib/DPCT/SrcAPI/APINames_nvSHMEM.inc b/clang/lib/DPCT/SrcAPI/APINames_nvSHMEM.inc index fe107913e4b0..9a1134edaedc 100644 --- a/clang/lib/DPCT/SrcAPI/APINames_nvSHMEM.inc +++ b/clang/lib/DPCT/SrcAPI/APINames_nvSHMEM.inc @@ -80,844 +80,844 @@ ENTRY(nvshmem_team_destroy, nvshmem_team_destroy, true, NO_FLAG, P4, "Succeessfu // Remote Memory Access (RMA) // nvshmem_TYPENAME_put -ENTRY(nvshmem_float_put, nvshmem_float_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_double_put, nvshmem_double_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_char_put, nvshmem_char_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_schar_put, nvshmem_schar_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_short_put, nvshmem_short_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int_put, nvshmem_int_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_long_put, nvshmem_long_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_longlong_put, nvshmem_longlong_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uchar_put, nvshmem_uchar_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ushort_put, nvshmem_ushort_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint_put, nvshmem_uint_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulong_put, nvshmem_ulong_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulonglong_put, nvshmem_ulonglong_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int8_put, nvshmem_int8_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int16_put, nvshmem_int16_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int32_put, nvshmem_int32_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int64_put, nvshmem_int64_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint8_put, nvshmem_uint8_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint16_put, nvshmem_uint16_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint32_put, nvshmem_uint32_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint64_put, nvshmem_uint64_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_size_put, nvshmem_size_put, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ptrdiff_put, nvshmem_ptrdiff_put, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_float_put, nvshmem_float_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_double_put, nvshmem_double_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_char_put, nvshmem_char_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_schar_put, nvshmem_schar_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_short_put, nvshmem_short_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int_put, nvshmem_int_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_long_put, nvshmem_long_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_longlong_put, nvshmem_longlong_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uchar_put, nvshmem_uchar_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ushort_put, nvshmem_ushort_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint_put, nvshmem_uint_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulong_put, nvshmem_ulong_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulonglong_put, nvshmem_ulonglong_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int8_put, nvshmem_int8_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int16_put, nvshmem_int16_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int32_put, nvshmem_int32_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int64_put, nvshmem_int64_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint8_put, nvshmem_uint8_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint16_put, nvshmem_uint16_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint32_put, nvshmem_uint32_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint64_put, nvshmem_uint64_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_size_put, nvshmem_size_put, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ptrdiff_put, nvshmem_ptrdiff_put, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_put_on_stream -ENTRY(nvshmemx_float_put_on_stream, nvshmemx_float_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_put_on_stream, nvshmemx_double_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_put_on_stream, nvshmemx_char_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_put_on_stream, nvshmemx_schar_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_put_on_stream, nvshmemx_short_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_put_on_stream, nvshmemx_int_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_put_on_stream, nvshmemx_long_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_put_on_stream, nvshmemx_longlong_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_put_on_stream, nvshmemx_uchar_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_put_on_stream, nvshmemx_ushort_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_put_on_stream, nvshmemx_uint_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_put_on_stream, nvshmemx_ulong_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_put_on_stream, nvshmemx_ulonglong_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_put_on_stream, nvshmemx_int8_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_put_on_stream, nvshmemx_int16_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_put_on_stream, nvshmemx_int32_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_put_on_stream, nvshmemx_int64_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_put_on_stream, nvshmemx_uint8_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_put_on_stream, nvshmemx_uint16_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_put_on_stream, nvshmemx_uint32_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_put_on_stream, nvshmemx_uint64_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_put_on_stream, nvshmemx_size_put_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_put_on_stream, nvshmemx_ptrdiff_put_on_stream, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_put_on_stream, nvshmemx_float_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_put_on_stream, nvshmemx_double_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_put_on_stream, nvshmemx_char_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_put_on_stream, nvshmemx_schar_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_put_on_stream, nvshmemx_short_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_put_on_stream, nvshmemx_int_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_put_on_stream, nvshmemx_long_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_put_on_stream, nvshmemx_longlong_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_put_on_stream, nvshmemx_uchar_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_put_on_stream, nvshmemx_ushort_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_put_on_stream, nvshmemx_uint_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_put_on_stream, nvshmemx_ulong_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_put_on_stream, nvshmemx_ulonglong_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_put_on_stream, nvshmemx_int8_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_put_on_stream, nvshmemx_int16_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_put_on_stream, nvshmemx_int32_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_put_on_stream, nvshmemx_int64_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_put_on_stream, nvshmemx_uint8_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_put_on_stream, nvshmemx_uint16_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_put_on_stream, nvshmemx_uint32_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_put_on_stream, nvshmemx_uint64_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_put_on_stream, nvshmemx_size_put_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_put_on_stream, nvshmemx_ptrdiff_put_on_stream, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_put_block -ENTRY(nvshmemx_float_put_block, nvshmemx_float_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_put_block, nvshmemx_double_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_put_block, nvshmemx_char_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_put_block, nvshmemx_schar_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_put_block, nvshmemx_short_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_put_block, nvshmemx_int_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_put_block, nvshmemx_long_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_put_block, nvshmemx_longlong_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_put_block, nvshmemx_uchar_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_put_block, nvshmemx_ushort_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_put_block, nvshmemx_uint_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_put_block, nvshmemx_ulong_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_put_block, nvshmemx_ulonglong_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_put_block, nvshmemx_int8_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_put_block, nvshmemx_int16_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_put_block, nvshmemx_int32_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_put_block, nvshmemx_int64_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_put_block, nvshmemx_uint8_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_put_block, nvshmemx_uint16_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_put_block, nvshmemx_uint32_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_put_block, nvshmemx_uint64_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_put_block, nvshmemx_size_put_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_put_block, nvshmemx_ptrdiff_put_block, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_put_block, nvshmemx_float_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_put_block, nvshmemx_double_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_put_block, nvshmemx_char_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_put_block, nvshmemx_schar_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_put_block, nvshmemx_short_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_put_block, nvshmemx_int_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_put_block, nvshmemx_long_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_put_block, nvshmemx_longlong_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_put_block, nvshmemx_uchar_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_put_block, nvshmemx_ushort_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_put_block, nvshmemx_uint_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_put_block, nvshmemx_ulong_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_put_block, nvshmemx_ulonglong_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_put_block, nvshmemx_int8_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_put_block, nvshmemx_int16_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_put_block, nvshmemx_int32_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_put_block, nvshmemx_int64_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_put_block, nvshmemx_uint8_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_put_block, nvshmemx_uint16_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_put_block, nvshmemx_uint32_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_put_block, nvshmemx_uint64_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_put_block, nvshmemx_size_put_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_put_block, nvshmemx_ptrdiff_put_block, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_put_warp -ENTRY(nvshmemx_float_put_warp, nvshmemx_float_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_put_warp, nvshmemx_double_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_put_warp, nvshmemx_char_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_put_warp, nvshmemx_schar_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_put_warp, nvshmemx_short_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_put_warp, nvshmemx_int_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_put_warp, nvshmemx_long_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_put_warp, nvshmemx_longlong_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_put_warp, nvshmemx_uchar_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_put_warp, nvshmemx_ushort_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_put_warp, nvshmemx_uint_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_put_warp, nvshmemx_ulong_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_put_warp, nvshmemx_ulonglong_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_put_warp, nvshmemx_int8_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_put_warp, nvshmemx_int16_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_put_warp, nvshmemx_int32_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_put_warp, nvshmemx_int64_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_put_warp, nvshmemx_uint8_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_put_warp, nvshmemx_uint16_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_put_warp, nvshmemx_uint32_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_put_warp, nvshmemx_uint64_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_put_warp, nvshmemx_size_put_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_put_warp, nvshmemx_ptrdiff_put_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_put_warp, nvshmemx_float_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_put_warp, nvshmemx_double_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_put_warp, nvshmemx_char_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_put_warp, nvshmemx_schar_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_put_warp, nvshmemx_short_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_put_warp, nvshmemx_int_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_put_warp, nvshmemx_long_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_put_warp, nvshmemx_longlong_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_put_warp, nvshmemx_uchar_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_put_warp, nvshmemx_ushort_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_put_warp, nvshmemx_uint_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_put_warp, nvshmemx_ulong_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_put_warp, nvshmemx_ulonglong_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_put_warp, nvshmemx_int8_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_put_warp, nvshmemx_int16_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_put_warp, nvshmemx_int32_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_put_warp, nvshmemx_int64_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_put_warp, nvshmemx_uint8_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_put_warp, nvshmemx_uint16_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_put_warp, nvshmemx_uint32_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_put_warp, nvshmemx_uint64_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_put_warp, nvshmemx_size_put_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_put_warp, nvshmemx_ptrdiff_put_warp, true, NO_FLAG, P4, "Succeessful") // nvshmem_putSIZE -ENTRY(nvshmem_put8, nvshmem_put8, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_put16, nvshmem_put16, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_put32, nvshmem_put32, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_put64, nvshmem_put64, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_put128, nvshmem_put128, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_put8, nvshmem_put8, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_put16, nvshmem_put16, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_put32, nvshmem_put32, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_put64, nvshmem_put64, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_put128, nvshmem_put128, true, NO_FLAG, P4, "Succeessful") // nvshmemx_putSIZE_on_stream -ENTRY(nvshmemx_put8_on_stream, nvshmemx_put8_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put16_on_stream, nvshmemx_put16_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put32_on_stream, nvshmemx_put32_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put64_on_stream, nvshmemx_put64_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put128_on_stream, nvshmemx_put128_on_stream, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_put8_on_stream, nvshmemx_put8_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put16_on_stream, nvshmemx_put16_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put32_on_stream, nvshmemx_put32_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put64_on_stream, nvshmemx_put64_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put128_on_stream, nvshmemx_put128_on_stream, true, NO_FLAG, P4, "Succeessful") // nvshmemx_putSIZE_block -ENTRY(nvshmemx_put8_block, nvshmemx_put8_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put16_block, nvshmemx_put16_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put32_block, nvshmemx_put32_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put64_block, nvshmemx_put64_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put128_block, nvshmemx_put128_block, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_put8_block, nvshmemx_put8_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put16_block, nvshmemx_put16_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put32_block, nvshmemx_put32_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put64_block, nvshmemx_put64_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put128_block, nvshmemx_put128_block, true, NO_FLAG, P4, "Succeessful") // nvshmemx_putSIZE_warp -ENTRY(nvshmemx_put8_warp, nvshmemx_put8_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put16_warp, nvshmemx_put16_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put32_warp, nvshmemx_put32_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put64_warp, nvshmemx_put64_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put128_warp, nvshmemx_put128_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_put8_warp, nvshmemx_put8_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put16_warp, nvshmemx_put16_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put32_warp, nvshmemx_put32_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put64_warp, nvshmemx_put64_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put128_warp, nvshmemx_put128_warp, true, NO_FLAG, P4, "Succeessful") -ENTRY(nvshmem_putmem, nvshmem_putmem, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_putmem_on_stream, nvshmemx_putmem_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_putmem_block, nvshmemx_putmem_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_putmem_warp, nvshmemx_putmem_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_putmem, nvshmem_putmem, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_putmem_on_stream, nvshmemx_putmem_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_putmem_block, nvshmemx_putmem_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_putmem_warp, nvshmemx_putmem_warp, true, NO_FLAG, P4, "Succeessful") // nvshmem_TYPENAME_p -ENTRY(nvshmem_float_p, nvshmem_float_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_double_p, nvshmem_double_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_char_p, nvshmem_char_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_schar_p, nvshmem_schar_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_short_p, nvshmem_short_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int_p, nvshmem_int_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_long_p, nvshmem_long_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_longlong_p, nvshmem_longlong_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uchar_p, nvshmem_uchar_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ushort_p, nvshmem_ushort_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint_p, nvshmem_uint_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulong_p, nvshmem_ulong_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulonglong_p, nvshmem_ulonglong_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int8_p, nvshmem_int8_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int16_p, nvshmem_int16_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int32_p, nvshmem_int32_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int64_p, nvshmem_int64_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint8_p, nvshmem_uint8_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint16_p, nvshmem_uint16_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint32_p, nvshmem_uint32_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint64_p, nvshmem_uint64_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_size_p, nvshmem_size_p, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ptrdiff_p, nvshmem_ptrdiff_p, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_float_p, nvshmem_float_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_double_p, nvshmem_double_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_char_p, nvshmem_char_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_schar_p, nvshmem_schar_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_short_p, nvshmem_short_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int_p, nvshmem_int_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_long_p, nvshmem_long_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_longlong_p, nvshmem_longlong_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uchar_p, nvshmem_uchar_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ushort_p, nvshmem_ushort_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint_p, nvshmem_uint_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulong_p, nvshmem_ulong_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulonglong_p, nvshmem_ulonglong_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int8_p, nvshmem_int8_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int16_p, nvshmem_int16_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int32_p, nvshmem_int32_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int64_p, nvshmem_int64_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint8_p, nvshmem_uint8_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint16_p, nvshmem_uint16_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint32_p, nvshmem_uint32_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint64_p, nvshmem_uint64_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_size_p, nvshmem_size_p, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ptrdiff_p, nvshmem_ptrdiff_p, true, NO_FLAG, P4, "Succeessful") // nvshmem_TYPENAME_iput -ENTRY(nvshmem_float_iput, nvshmem_float_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_double_iput, nvshmem_double_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_char_iput, nvshmem_char_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_schar_iput, nvshmem_schar_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_short_iput, nvshmem_short_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int_iput, nvshmem_int_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_long_iput, nvshmem_long_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_longlong_iput, nvshmem_longlong_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uchar_iput, nvshmem_uchar_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ushort_iput, nvshmem_ushort_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint_iput, nvshmem_uint_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulong_iput, nvshmem_ulong_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulonglong_iput, nvshmem_ulonglong_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int8_iput, nvshmem_int8_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int16_iput, nvshmem_int16_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int32_iput, nvshmem_int32_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int64_iput, nvshmem_int64_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint8_iput, nvshmem_uint8_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint16_iput, nvshmem_uint16_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint32_iput, nvshmem_uint32_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint64_iput, nvshmem_uint64_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_size_iput, nvshmem_size_iput, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ptrdiff_iput, nvshmem_ptrdiff_iput, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_float_iput, nvshmem_float_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_double_iput, nvshmem_double_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_char_iput, nvshmem_char_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_schar_iput, nvshmem_schar_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_short_iput, nvshmem_short_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int_iput, nvshmem_int_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_long_iput, nvshmem_long_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_longlong_iput, nvshmem_longlong_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uchar_iput, nvshmem_uchar_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ushort_iput, nvshmem_ushort_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint_iput, nvshmem_uint_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulong_iput, nvshmem_ulong_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulonglong_iput, nvshmem_ulonglong_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int8_iput, nvshmem_int8_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int16_iput, nvshmem_int16_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int32_iput, nvshmem_int32_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int64_iput, nvshmem_int64_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint8_iput, nvshmem_uint8_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint16_iput, nvshmem_uint16_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint32_iput, nvshmem_uint32_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint64_iput, nvshmem_uint64_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_size_iput, nvshmem_size_iput, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ptrdiff_iput, nvshmem_ptrdiff_iput, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_iput_on_stream -ENTRY(nvshmemx_float_iput_on_stream, nvshmemx_float_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_iput_on_stream, nvshmemx_double_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_iput_on_stream, nvshmemx_char_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_iput_on_stream, nvshmemx_schar_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_iput_on_stream, nvshmemx_short_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_iput_on_stream, nvshmemx_int_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_iput_on_stream, nvshmemx_long_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_iput_on_stream, nvshmemx_longlong_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_iput_on_stream, nvshmemx_uchar_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_iput_on_stream, nvshmemx_ushort_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_iput_on_stream, nvshmemx_uint_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_iput_on_stream, nvshmemx_ulong_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_iput_on_stream, nvshmemx_ulonglong_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_iput_on_stream, nvshmemx_int8_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_iput_on_stream, nvshmemx_int16_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_iput_on_stream, nvshmemx_int32_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_iput_on_stream, nvshmemx_int64_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_iput_on_stream, nvshmemx_uint8_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_iput_on_stream, nvshmemx_uint16_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_iput_on_stream, nvshmemx_uint32_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_iput_on_stream, nvshmemx_uint64_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_iput_on_stream, nvshmemx_size_iput_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_iput_on_stream, nvshmemx_ptrdiff_iput_on_stream, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_iput_on_stream, nvshmemx_float_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_iput_on_stream, nvshmemx_double_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_iput_on_stream, nvshmemx_char_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_iput_on_stream, nvshmemx_schar_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_iput_on_stream, nvshmemx_short_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_iput_on_stream, nvshmemx_int_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_iput_on_stream, nvshmemx_long_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_iput_on_stream, nvshmemx_longlong_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_iput_on_stream, nvshmemx_uchar_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_iput_on_stream, nvshmemx_ushort_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_iput_on_stream, nvshmemx_uint_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_iput_on_stream, nvshmemx_ulong_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_iput_on_stream, nvshmemx_ulonglong_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_iput_on_stream, nvshmemx_int8_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_iput_on_stream, nvshmemx_int16_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_iput_on_stream, nvshmemx_int32_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_iput_on_stream, nvshmemx_int64_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_iput_on_stream, nvshmemx_uint8_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_iput_on_stream, nvshmemx_uint16_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_iput_on_stream, nvshmemx_uint32_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_iput_on_stream, nvshmemx_uint64_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_iput_on_stream, nvshmemx_size_iput_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_iput_on_stream, nvshmemx_ptrdiff_iput_on_stream, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_iput_block -ENTRY(nvshmemx_float_iput_block, nvshmemx_float_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_iput_block, nvshmemx_double_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_iput_block, nvshmemx_char_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_iput_block, nvshmemx_schar_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_iput_block, nvshmemx_short_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_iput_block, nvshmemx_int_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_iput_block, nvshmemx_long_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_iput_block, nvshmemx_longlong_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_iput_block, nvshmemx_uchar_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_iput_block, nvshmemx_ushort_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_iput_block, nvshmemx_uint_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_iput_block, nvshmemx_ulong_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_iput_block, nvshmemx_ulonglong_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_iput_block, nvshmemx_int8_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_iput_block, nvshmemx_int16_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_iput_block, nvshmemx_int32_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_iput_block, nvshmemx_int64_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_iput_block, nvshmemx_uint8_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_iput_block, nvshmemx_uint16_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_iput_block, nvshmemx_uint32_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_iput_block, nvshmemx_uint64_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_iput_block, nvshmemx_size_iput_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_iput_block, nvshmemx_ptrdiff_iput_block, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_iput_block, nvshmemx_float_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_iput_block, nvshmemx_double_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_iput_block, nvshmemx_char_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_iput_block, nvshmemx_schar_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_iput_block, nvshmemx_short_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_iput_block, nvshmemx_int_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_iput_block, nvshmemx_long_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_iput_block, nvshmemx_longlong_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_iput_block, nvshmemx_uchar_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_iput_block, nvshmemx_ushort_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_iput_block, nvshmemx_uint_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_iput_block, nvshmemx_ulong_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_iput_block, nvshmemx_ulonglong_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_iput_block, nvshmemx_int8_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_iput_block, nvshmemx_int16_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_iput_block, nvshmemx_int32_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_iput_block, nvshmemx_int64_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_iput_block, nvshmemx_uint8_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_iput_block, nvshmemx_uint16_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_iput_block, nvshmemx_uint32_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_iput_block, nvshmemx_uint64_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_iput_block, nvshmemx_size_iput_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_iput_block, nvshmemx_ptrdiff_iput_block, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_iput_warp -ENTRY(nvshmemx_float_iput_warp, nvshmemx_float_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_iput_warp, nvshmemx_double_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_iput_warp, nvshmemx_char_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_iput_warp, nvshmemx_schar_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_iput_warp, nvshmemx_short_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_iput_warp, nvshmemx_int_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_iput_warp, nvshmemx_long_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_iput_warp, nvshmemx_longlong_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_iput_warp, nvshmemx_uchar_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_iput_warp, nvshmemx_ushort_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_iput_warp, nvshmemx_uint_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_iput_warp, nvshmemx_ulong_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_iput_warp, nvshmemx_ulonglong_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_iput_warp, nvshmemx_int8_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_iput_warp, nvshmemx_int16_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_iput_warp, nvshmemx_int32_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_iput_warp, nvshmemx_int64_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_iput_warp, nvshmemx_uint8_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_iput_warp, nvshmemx_uint16_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_iput_warp, nvshmemx_uint32_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_iput_warp, nvshmemx_uint64_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_iput_warp, nvshmemx_size_iput_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_iput_warp, nvshmemx_ptrdiff_iput_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_iput_warp, nvshmemx_float_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_iput_warp, nvshmemx_double_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_iput_warp, nvshmemx_char_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_iput_warp, nvshmemx_schar_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_iput_warp, nvshmemx_short_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_iput_warp, nvshmemx_int_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_iput_warp, nvshmemx_long_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_iput_warp, nvshmemx_longlong_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_iput_warp, nvshmemx_uchar_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_iput_warp, nvshmemx_ushort_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_iput_warp, nvshmemx_uint_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_iput_warp, nvshmemx_ulong_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_iput_warp, nvshmemx_ulonglong_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_iput_warp, nvshmemx_int8_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_iput_warp, nvshmemx_int16_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_iput_warp, nvshmemx_int32_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_iput_warp, nvshmemx_int64_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_iput_warp, nvshmemx_uint8_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_iput_warp, nvshmemx_uint16_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_iput_warp, nvshmemx_uint32_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_iput_warp, nvshmemx_uint64_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_iput_warp, nvshmemx_size_iput_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_iput_warp, nvshmemx_ptrdiff_iput_warp, true, NO_FLAG, P4, "Succeessful") // nvshmem_iputSIZE -ENTRY(nvshmem_iput8, nvshmem_iput8, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_iput16, nvshmem_iput16, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_iput32, nvshmem_iput32, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_iput64, nvshmem_iput64, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_iput128, nvshmem_iput128, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_iput8, nvshmem_iput8, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_iput16, nvshmem_iput16, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_iput32, nvshmem_iput32, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_iput64, nvshmem_iput64, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_iput128, nvshmem_iput128, true, NO_FLAG, P4, "Succeessful") // nvshmemx_iputSIZE_on_stream -ENTRY(nvshmemx_iput8_on_stream, nvshmemx_iput8_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iput16_on_stream, nvshmemx_iput16_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iput32_on_stream, nvshmemx_iput32_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iput64_on_stream, nvshmemx_iput64_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iput128_on_stream, nvshmemx_iput128_on_stream, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_iput8_on_stream, nvshmemx_iput8_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iput16_on_stream, nvshmemx_iput16_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iput32_on_stream, nvshmemx_iput32_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iput64_on_stream, nvshmemx_iput64_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iput128_on_stream, nvshmemx_iput128_on_stream, true, NO_FLAG, P4, "Succeessful") // nvshmemx_iputSIZE_block -ENTRY(nvshmemx_iput8_block, nvshmemx_iput8_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iput16_block, nvshmemx_iput16_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iput32_block, nvshmemx_iput32_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iput64_block, nvshmemx_iput64_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iput128_block, nvshmemx_iput128_block, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_iput8_block, nvshmemx_iput8_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iput16_block, nvshmemx_iput16_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iput32_block, nvshmemx_iput32_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iput64_block, nvshmemx_iput64_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iput128_block, nvshmemx_iput128_block, true, NO_FLAG, P4, "Succeessful") // nvshmemx_iputSIZE_warp -ENTRY(nvshmemx_iput8_warp, nvshmemx_iput8_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iput16_warp, nvshmemx_iput16_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iput32_warp, nvshmemx_iput32_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iput64_warp, nvshmemx_iput64_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iput128_warp, nvshmemx_iput128_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_iput8_warp, nvshmemx_iput8_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iput16_warp, nvshmemx_iput16_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iput32_warp, nvshmemx_iput32_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iput64_warp, nvshmemx_iput64_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iput128_warp, nvshmemx_iput128_warp, true, NO_FLAG, P4, "Succeessful") // nvshmem_TYPENAME_get -ENTRY(nvshmem_float_get, nvshmem_float_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_double_get, nvshmem_double_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_char_get, nvshmem_char_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_schar_get, nvshmem_schar_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_short_get, nvshmem_short_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int_get, nvshmem_int_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_long_get, nvshmem_long_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_longlong_get, nvshmem_longlong_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uchar_get, nvshmem_uchar_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ushort_get, nvshmem_ushort_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint_get, nvshmem_uint_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulong_get, nvshmem_ulong_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulonglong_get, nvshmem_ulonglong_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int8_get, nvshmem_int8_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int16_get, nvshmem_int16_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int32_get, nvshmem_int32_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int64_get, nvshmem_int64_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint8_get, nvshmem_uint8_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint16_get, nvshmem_uint16_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint32_get, nvshmem_uint32_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint64_get, nvshmem_uint64_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_size_get, nvshmem_size_get, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ptrdiff_get, nvshmem_ptrdiff_get, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_float_get, nvshmem_float_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_double_get, nvshmem_double_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_char_get, nvshmem_char_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_schar_get, nvshmem_schar_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_short_get, nvshmem_short_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int_get, nvshmem_int_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_long_get, nvshmem_long_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_longlong_get, nvshmem_longlong_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uchar_get, nvshmem_uchar_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ushort_get, nvshmem_ushort_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint_get, nvshmem_uint_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulong_get, nvshmem_ulong_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulonglong_get, nvshmem_ulonglong_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int8_get, nvshmem_int8_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int16_get, nvshmem_int16_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int32_get, nvshmem_int32_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int64_get, nvshmem_int64_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint8_get, nvshmem_uint8_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint16_get, nvshmem_uint16_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint32_get, nvshmem_uint32_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint64_get, nvshmem_uint64_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_size_get, nvshmem_size_get, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ptrdiff_get, nvshmem_ptrdiff_get, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_get_on_stream -ENTRY(nvshmemx_float_get_on_stream, nvshmemx_float_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_get_on_stream, nvshmemx_double_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_get_on_stream, nvshmemx_char_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_get_on_stream, nvshmemx_schar_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_get_on_stream, nvshmemx_short_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_get_on_stream, nvshmemx_int_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_get_on_stream, nvshmemx_long_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_get_on_stream, nvshmemx_longlong_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_get_on_stream, nvshmemx_uchar_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_get_on_stream, nvshmemx_ushort_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_get_on_stream, nvshmemx_uint_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_get_on_stream, nvshmemx_ulong_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_get_on_stream, nvshmemx_ulonglong_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_get_on_stream, nvshmemx_int8_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_get_on_stream, nvshmemx_int16_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_get_on_stream, nvshmemx_int32_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_get_on_stream, nvshmemx_int64_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_get_on_stream, nvshmemx_uint8_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_get_on_stream, nvshmemx_uint16_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_get_on_stream, nvshmemx_uint32_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_get_on_stream, nvshmemx_uint64_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_get_on_stream, nvshmemx_size_get_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_get_on_stream, nvshmemx_ptrdiff_get_on_stream, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_get_on_stream, nvshmemx_float_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_get_on_stream, nvshmemx_double_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_get_on_stream, nvshmemx_char_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_get_on_stream, nvshmemx_schar_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_get_on_stream, nvshmemx_short_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_get_on_stream, nvshmemx_int_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_get_on_stream, nvshmemx_long_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_get_on_stream, nvshmemx_longlong_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_get_on_stream, nvshmemx_uchar_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_get_on_stream, nvshmemx_ushort_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_get_on_stream, nvshmemx_uint_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_get_on_stream, nvshmemx_ulong_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_get_on_stream, nvshmemx_ulonglong_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_get_on_stream, nvshmemx_int8_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_get_on_stream, nvshmemx_int16_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_get_on_stream, nvshmemx_int32_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_get_on_stream, nvshmemx_int64_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_get_on_stream, nvshmemx_uint8_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_get_on_stream, nvshmemx_uint16_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_get_on_stream, nvshmemx_uint32_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_get_on_stream, nvshmemx_uint64_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_get_on_stream, nvshmemx_size_get_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_get_on_stream, nvshmemx_ptrdiff_get_on_stream, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_get_block -ENTRY(nvshmemx_float_get_block, nvshmemx_float_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_get_block, nvshmemx_double_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_get_block, nvshmemx_char_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_get_block, nvshmemx_schar_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_get_block, nvshmemx_short_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_get_block, nvshmemx_int_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_get_block, nvshmemx_long_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_get_block, nvshmemx_longlong_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_get_block, nvshmemx_uchar_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_get_block, nvshmemx_ushort_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_get_block, nvshmemx_uint_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_get_block, nvshmemx_ulong_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_get_block, nvshmemx_ulonglong_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_get_block, nvshmemx_int8_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_get_block, nvshmemx_int16_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_get_block, nvshmemx_int32_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_get_block, nvshmemx_int64_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_get_block, nvshmemx_uint8_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_get_block, nvshmemx_uint16_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_get_block, nvshmemx_uint32_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_get_block, nvshmemx_uint64_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_get_block, nvshmemx_size_get_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_get_block, nvshmemx_ptrdiff_get_block, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_get_block, nvshmemx_float_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_get_block, nvshmemx_double_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_get_block, nvshmemx_char_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_get_block, nvshmemx_schar_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_get_block, nvshmemx_short_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_get_block, nvshmemx_int_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_get_block, nvshmemx_long_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_get_block, nvshmemx_longlong_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_get_block, nvshmemx_uchar_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_get_block, nvshmemx_ushort_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_get_block, nvshmemx_uint_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_get_block, nvshmemx_ulong_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_get_block, nvshmemx_ulonglong_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_get_block, nvshmemx_int8_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_get_block, nvshmemx_int16_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_get_block, nvshmemx_int32_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_get_block, nvshmemx_int64_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_get_block, nvshmemx_uint8_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_get_block, nvshmemx_uint16_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_get_block, nvshmemx_uint32_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_get_block, nvshmemx_uint64_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_get_block, nvshmemx_size_get_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_get_block, nvshmemx_ptrdiff_get_block, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_get_warp -ENTRY(nvshmemx_float_get_warp, nvshmemx_float_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_get_warp, nvshmemx_double_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_get_warp, nvshmemx_char_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_get_warp, nvshmemx_schar_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_get_warp, nvshmemx_short_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_get_warp, nvshmemx_int_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_get_warp, nvshmemx_long_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_get_warp, nvshmemx_longlong_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_get_warp, nvshmemx_uchar_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_get_warp, nvshmemx_ushort_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_get_warp, nvshmemx_uint_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_get_warp, nvshmemx_ulong_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_get_warp, nvshmemx_ulonglong_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_get_warp, nvshmemx_int8_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_get_warp, nvshmemx_int16_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_get_warp, nvshmemx_int32_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_get_warp, nvshmemx_int64_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_get_warp, nvshmemx_uint8_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_get_warp, nvshmemx_uint16_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_get_warp, nvshmemx_uint32_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_get_warp, nvshmemx_uint64_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_get_warp, nvshmemx_size_get_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_get_warp, nvshmemx_ptrdiff_get_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_get_warp, nvshmemx_float_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_get_warp, nvshmemx_double_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_get_warp, nvshmemx_char_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_get_warp, nvshmemx_schar_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_get_warp, nvshmemx_short_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_get_warp, nvshmemx_int_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_get_warp, nvshmemx_long_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_get_warp, nvshmemx_longlong_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_get_warp, nvshmemx_uchar_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_get_warp, nvshmemx_ushort_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_get_warp, nvshmemx_uint_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_get_warp, nvshmemx_ulong_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_get_warp, nvshmemx_ulonglong_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_get_warp, nvshmemx_int8_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_get_warp, nvshmemx_int16_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_get_warp, nvshmemx_int32_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_get_warp, nvshmemx_int64_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_get_warp, nvshmemx_uint8_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_get_warp, nvshmemx_uint16_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_get_warp, nvshmemx_uint32_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_get_warp, nvshmemx_uint64_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_get_warp, nvshmemx_size_get_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_get_warp, nvshmemx_ptrdiff_get_warp, true, NO_FLAG, P4, "Succeessful") // nvshmem_getSIZE -ENTRY(nvshmem_get8, nvshmem_get8, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_get16, nvshmem_get16, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_get32, nvshmem_get32, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_get64, nvshmem_get64, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_get128, nvshmem_get128, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_get8, nvshmem_get8, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_get16, nvshmem_get16, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_get32, nvshmem_get32, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_get64, nvshmem_get64, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_get128, nvshmem_get128, true, NO_FLAG, P4, "Succeessful") // nvshmemx_getSIZE_on_stream -ENTRY(nvshmemx_get8_on_stream, nvshmemx_get8_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get16_on_stream, nvshmemx_get16_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get32_on_stream, nvshmemx_get32_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get64_on_stream, nvshmemx_get64_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get128_on_stream, nvshmemx_get128_on_stream, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_get8_on_stream, nvshmemx_get8_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get16_on_stream, nvshmemx_get16_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get32_on_stream, nvshmemx_get32_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get64_on_stream, nvshmemx_get64_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get128_on_stream, nvshmemx_get128_on_stream, true, NO_FLAG, P4, "Succeessful") // nvshmemx_getSIZE_block -ENTRY(nvshmemx_get8_block, nvshmemx_get8_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get16_block, nvshmemx_get16_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get32_block, nvshmemx_get32_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get64_block, nvshmemx_get64_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get128_block, nvshmemx_get128_block, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_get8_block, nvshmemx_get8_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get16_block, nvshmemx_get16_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get32_block, nvshmemx_get32_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get64_block, nvshmemx_get64_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get128_block, nvshmemx_get128_block, true, NO_FLAG, P4, "Succeessful") // nvshmemx_getSIZE_warp -ENTRY(nvshmemx_get8_warp, nvshmemx_get8_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get16_warp, nvshmemx_get16_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get32_warp, nvshmemx_get32_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get64_warp, nvshmemx_get64_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get128_warp, nvshmemx_get128_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_get8_warp, nvshmemx_get8_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get16_warp, nvshmemx_get16_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get32_warp, nvshmemx_get32_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get64_warp, nvshmemx_get64_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get128_warp, nvshmemx_get128_warp, true, NO_FLAG, P4, "Succeessful") -ENTRY(nvshmem_getmem, nvshmem_getmem, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_getmem_on_stream, nvshmemx_getmem_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_getmem_block, nvshmemx_getmem_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_getmem_warp, nvshmemx_getmem_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_getmem, nvshmem_getmem, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_getmem_on_stream, nvshmemx_getmem_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_getmem_block, nvshmemx_getmem_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_getmem_warp, nvshmemx_getmem_warp, true, NO_FLAG, P4, "Succeessful") // nvshmem_TYPENAME_g -ENTRY(nvshmem_float_g, nvshmem_float_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_double_g, nvshmem_double_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_char_g, nvshmem_char_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_schar_g, nvshmem_schar_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_short_g, nvshmem_short_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int_g, nvshmem_int_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_long_g, nvshmem_long_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_longlong_g, nvshmem_longlong_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uchar_g, nvshmem_uchar_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ushort_g, nvshmem_ushort_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint_g, nvshmem_uint_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulong_g, nvshmem_ulong_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulonglong_g, nvshmem_ulonglong_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int8_g, nvshmem_int8_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int16_g, nvshmem_int16_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int32_g, nvshmem_int32_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int64_g, nvshmem_int64_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint8_g, nvshmem_uint8_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint16_g, nvshmem_uint16_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint32_g, nvshmem_uint32_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint64_g, nvshmem_uint64_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_size_g, nvshmem_size_g, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ptrdiff_g, nvshmem_ptrdiff_g, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_float_g, nvshmem_float_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_double_g, nvshmem_double_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_char_g, nvshmem_char_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_schar_g, nvshmem_schar_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_short_g, nvshmem_short_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int_g, nvshmem_int_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_long_g, nvshmem_long_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_longlong_g, nvshmem_longlong_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uchar_g, nvshmem_uchar_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ushort_g, nvshmem_ushort_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint_g, nvshmem_uint_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulong_g, nvshmem_ulong_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulonglong_g, nvshmem_ulonglong_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int8_g, nvshmem_int8_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int16_g, nvshmem_int16_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int32_g, nvshmem_int32_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int64_g, nvshmem_int64_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint8_g, nvshmem_uint8_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint16_g, nvshmem_uint16_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint32_g, nvshmem_uint32_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint64_g, nvshmem_uint64_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_size_g, nvshmem_size_g, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ptrdiff_g, nvshmem_ptrdiff_g, true, NO_FLAG, P4, "Succeessful") // nvshmem_TYPENAME_iget -ENTRY(nvshmem_float_iget, nvshmem_float_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_double_iget, nvshmem_double_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_char_iget, nvshmem_char_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_schar_iget, nvshmem_schar_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_short_iget, nvshmem_short_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int_iget, nvshmem_int_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_long_iget, nvshmem_long_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_longlong_iget, nvshmem_longlong_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uchar_iget, nvshmem_uchar_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ushort_iget, nvshmem_ushort_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint_iget, nvshmem_uint_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulong_iget, nvshmem_ulong_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulonglong_iget, nvshmem_ulonglong_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int8_iget, nvshmem_int8_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int16_iget, nvshmem_int16_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int32_iget, nvshmem_int32_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int64_iget, nvshmem_int64_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint8_iget, nvshmem_uint8_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint16_iget, nvshmem_uint16_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint32_iget, nvshmem_uint32_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint64_iget, nvshmem_uint64_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_size_iget, nvshmem_size_iget, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ptrdiff_iget, nvshmem_ptrdiff_iget, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_float_iget, nvshmem_float_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_double_iget, nvshmem_double_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_char_iget, nvshmem_char_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_schar_iget, nvshmem_schar_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_short_iget, nvshmem_short_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int_iget, nvshmem_int_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_long_iget, nvshmem_long_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_longlong_iget, nvshmem_longlong_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uchar_iget, nvshmem_uchar_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ushort_iget, nvshmem_ushort_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint_iget, nvshmem_uint_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulong_iget, nvshmem_ulong_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulonglong_iget, nvshmem_ulonglong_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int8_iget, nvshmem_int8_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int16_iget, nvshmem_int16_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int32_iget, nvshmem_int32_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int64_iget, nvshmem_int64_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint8_iget, nvshmem_uint8_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint16_iget, nvshmem_uint16_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint32_iget, nvshmem_uint32_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint64_iget, nvshmem_uint64_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_size_iget, nvshmem_size_iget, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ptrdiff_iget, nvshmem_ptrdiff_iget, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_iget_on_stream -ENTRY(nvshmemx_float_iget_on_stream, nvshmemx_float_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_iget_on_stream, nvshmemx_double_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_iget_on_stream, nvshmemx_char_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_iget_on_stream, nvshmemx_schar_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_iget_on_stream, nvshmemx_short_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_iget_on_stream, nvshmemx_int_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_iget_on_stream, nvshmemx_long_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_iget_on_stream, nvshmemx_longlong_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_iget_on_stream, nvshmemx_uchar_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_iget_on_stream, nvshmemx_ushort_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_iget_on_stream, nvshmemx_uint_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_iget_on_stream, nvshmemx_ulong_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_iget_on_stream, nvshmemx_ulonglong_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_iget_on_stream, nvshmemx_int8_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_iget_on_stream, nvshmemx_int16_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_iget_on_stream, nvshmemx_int32_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_iget_on_stream, nvshmemx_int64_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_iget_on_stream, nvshmemx_uint8_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_iget_on_stream, nvshmemx_uint16_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_iget_on_stream, nvshmemx_uint32_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_iget_on_stream, nvshmemx_uint64_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_iget_on_stream, nvshmemx_size_iget_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_iget_on_stream, nvshmemx_ptrdiff_iget_on_stream, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_iget_on_stream, nvshmemx_float_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_iget_on_stream, nvshmemx_double_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_iget_on_stream, nvshmemx_char_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_iget_on_stream, nvshmemx_schar_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_iget_on_stream, nvshmemx_short_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_iget_on_stream, nvshmemx_int_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_iget_on_stream, nvshmemx_long_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_iget_on_stream, nvshmemx_longlong_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_iget_on_stream, nvshmemx_uchar_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_iget_on_stream, nvshmemx_ushort_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_iget_on_stream, nvshmemx_uint_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_iget_on_stream, nvshmemx_ulong_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_iget_on_stream, nvshmemx_ulonglong_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_iget_on_stream, nvshmemx_int8_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_iget_on_stream, nvshmemx_int16_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_iget_on_stream, nvshmemx_int32_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_iget_on_stream, nvshmemx_int64_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_iget_on_stream, nvshmemx_uint8_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_iget_on_stream, nvshmemx_uint16_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_iget_on_stream, nvshmemx_uint32_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_iget_on_stream, nvshmemx_uint64_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_iget_on_stream, nvshmemx_size_iget_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_iget_on_stream, nvshmemx_ptrdiff_iget_on_stream, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_iget_block -ENTRY(nvshmemx_float_iget_block, nvshmemx_float_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_iget_block, nvshmemx_double_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_iget_block, nvshmemx_char_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_iget_block, nvshmemx_schar_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_iget_block, nvshmemx_short_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_iget_block, nvshmemx_int_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_iget_block, nvshmemx_long_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_iget_block, nvshmemx_longlong_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_iget_block, nvshmemx_uchar_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_iget_block, nvshmemx_ushort_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_iget_block, nvshmemx_uint_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_iget_block, nvshmemx_ulong_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_iget_block, nvshmemx_ulonglong_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_iget_block, nvshmemx_int8_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_iget_block, nvshmemx_int16_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_iget_block, nvshmemx_int32_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_iget_block, nvshmemx_int64_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_iget_block, nvshmemx_uint8_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_iget_block, nvshmemx_uint16_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_iget_block, nvshmemx_uint32_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_iget_block, nvshmemx_uint64_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_iget_block, nvshmemx_size_iget_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_iget_block, nvshmemx_ptrdiff_iget_block, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_iget_block, nvshmemx_float_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_iget_block, nvshmemx_double_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_iget_block, nvshmemx_char_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_iget_block, nvshmemx_schar_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_iget_block, nvshmemx_short_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_iget_block, nvshmemx_int_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_iget_block, nvshmemx_long_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_iget_block, nvshmemx_longlong_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_iget_block, nvshmemx_uchar_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_iget_block, nvshmemx_ushort_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_iget_block, nvshmemx_uint_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_iget_block, nvshmemx_ulong_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_iget_block, nvshmemx_ulonglong_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_iget_block, nvshmemx_int8_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_iget_block, nvshmemx_int16_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_iget_block, nvshmemx_int32_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_iget_block, nvshmemx_int64_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_iget_block, nvshmemx_uint8_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_iget_block, nvshmemx_uint16_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_iget_block, nvshmemx_uint32_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_iget_block, nvshmemx_uint64_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_iget_block, nvshmemx_size_iget_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_iget_block, nvshmemx_ptrdiff_iget_block, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_iget_warp -ENTRY(nvshmemx_float_iget_warp, nvshmemx_float_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_iget_warp, nvshmemx_double_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_iget_warp, nvshmemx_char_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_iget_warp, nvshmemx_schar_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_iget_warp, nvshmemx_short_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_iget_warp, nvshmemx_int_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_iget_warp, nvshmemx_long_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_iget_warp, nvshmemx_longlong_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_iget_warp, nvshmemx_uchar_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_iget_warp, nvshmemx_ushort_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_iget_warp, nvshmemx_uint_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_iget_warp, nvshmemx_ulong_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_iget_warp, nvshmemx_ulonglong_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_iget_warp, nvshmemx_int8_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_iget_warp, nvshmemx_int16_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_iget_warp, nvshmemx_int32_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_iget_warp, nvshmemx_int64_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_iget_warp, nvshmemx_uint8_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_iget_warp, nvshmemx_uint16_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_iget_warp, nvshmemx_uint32_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_iget_warp, nvshmemx_uint64_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_iget_warp, nvshmemx_size_iget_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_iget_warp, nvshmemx_ptrdiff_iget_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_iget_warp, nvshmemx_float_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_iget_warp, nvshmemx_double_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_iget_warp, nvshmemx_char_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_iget_warp, nvshmemx_schar_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_iget_warp, nvshmemx_short_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_iget_warp, nvshmemx_int_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_iget_warp, nvshmemx_long_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_iget_warp, nvshmemx_longlong_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_iget_warp, nvshmemx_uchar_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_iget_warp, nvshmemx_ushort_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_iget_warp, nvshmemx_uint_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_iget_warp, nvshmemx_ulong_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_iget_warp, nvshmemx_ulonglong_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_iget_warp, nvshmemx_int8_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_iget_warp, nvshmemx_int16_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_iget_warp, nvshmemx_int32_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_iget_warp, nvshmemx_int64_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_iget_warp, nvshmemx_uint8_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_iget_warp, nvshmemx_uint16_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_iget_warp, nvshmemx_uint32_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_iget_warp, nvshmemx_uint64_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_iget_warp, nvshmemx_size_iget_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_iget_warp, nvshmemx_ptrdiff_iget_warp, true, NO_FLAG, P4, "Succeessful") // nvshmem_igetSIZE -ENTRY(nvshmem_iget8, nvshmem_iget8, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_iget16, nvshmem_iget16, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_iget32, nvshmem_iget32, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_iget64, nvshmem_iget64, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_iget128, nvshmem_iget128, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_iget8, nvshmem_iget8, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_iget16, nvshmem_iget16, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_iget32, nvshmem_iget32, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_iget64, nvshmem_iget64, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_iget128, nvshmem_iget128, true, NO_FLAG, P4, "Succeessful") // nvshmemx_igetSIZE_on_stream -ENTRY(nvshmemx_iget8_on_stream, nvshmemx_iget8_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iget16_on_stream, nvshmemx_iget16_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iget32_on_stream, nvshmemx_iget32_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iget64_on_stream, nvshmemx_iget64_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iget128_on_stream, nvshmemx_iget128_on_stream, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_iget8_on_stream, nvshmemx_iget8_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iget16_on_stream, nvshmemx_iget16_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iget32_on_stream, nvshmemx_iget32_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iget64_on_stream, nvshmemx_iget64_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iget128_on_stream, nvshmemx_iget128_on_stream, true, NO_FLAG, P4, "Succeessful") // nvshmemx_igetSIZE_block -ENTRY(nvshmemx_iget8_block, nvshmemx_iget8_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iget16_block, nvshmemx_iget16_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iget32_block, nvshmemx_iget32_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iget64_block, nvshmemx_iget64_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iget128_block, nvshmemx_iget128_block, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_iget8_block, nvshmemx_iget8_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iget16_block, nvshmemx_iget16_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iget32_block, nvshmemx_iget32_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iget64_block, nvshmemx_iget64_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iget128_block, nvshmemx_iget128_block, true, NO_FLAG, P4, "Succeessful") // nvshmemx_igetSIZE_warp -ENTRY(nvshmemx_iget8_warp, nvshmemx_iget8_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iget16_warp, nvshmemx_iget16_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iget32_warp, nvshmemx_iget32_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iget64_warp, nvshmemx_iget64_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_iget128_warp, nvshmemx_iget128_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_iget8_warp, nvshmemx_iget8_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iget16_warp, nvshmemx_iget16_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iget32_warp, nvshmemx_iget32_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iget64_warp, nvshmemx_iget64_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_iget128_warp, nvshmemx_iget128_warp, true, NO_FLAG, P4, "Succeessful") // Nonblocking RMA // nvshmem_TYPENAME_put_nbi -ENTRY(nvshmem_float_put_nbi, nvshmem_float_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_double_put_nbi, nvshmem_double_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_char_put_nbi, nvshmem_char_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_schar_put_nbi, nvshmem_schar_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_short_put_nbi, nvshmem_short_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int_put_nbi, nvshmem_int_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_long_put_nbi, nvshmem_long_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_longlong_put_nbi, nvshmem_longlong_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uchar_put_nbi, nvshmem_uchar_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ushort_put_nbi, nvshmem_ushort_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint_put_nbi, nvshmem_uint_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulong_put_nbi, nvshmem_ulong_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulonglong_put_nbi, nvshmem_ulonglong_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int8_put_nbi, nvshmem_int8_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int16_put_nbi, nvshmem_int16_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int32_put_nbi, nvshmem_int32_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int64_put_nbi, nvshmem_int64_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint8_put_nbi, nvshmem_uint8_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint16_put_nbi, nvshmem_uint16_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint32_put_nbi, nvshmem_uint32_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint64_put_nbi, nvshmem_uint64_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_size_put_nbi, nvshmem_size_put_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ptrdiff_put_nbi, nvshmem_ptrdiff_put_nbi, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_float_put_nbi, nvshmem_float_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_double_put_nbi, nvshmem_double_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_char_put_nbi, nvshmem_char_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_schar_put_nbi, nvshmem_schar_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_short_put_nbi, nvshmem_short_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int_put_nbi, nvshmem_int_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_long_put_nbi, nvshmem_long_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_longlong_put_nbi, nvshmem_longlong_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uchar_put_nbi, nvshmem_uchar_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ushort_put_nbi, nvshmem_ushort_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint_put_nbi, nvshmem_uint_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulong_put_nbi, nvshmem_ulong_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulonglong_put_nbi, nvshmem_ulonglong_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int8_put_nbi, nvshmem_int8_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int16_put_nbi, nvshmem_int16_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int32_put_nbi, nvshmem_int32_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int64_put_nbi, nvshmem_int64_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint8_put_nbi, nvshmem_uint8_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint16_put_nbi, nvshmem_uint16_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint32_put_nbi, nvshmem_uint32_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint64_put_nbi, nvshmem_uint64_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_size_put_nbi, nvshmem_size_put_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ptrdiff_put_nbi, nvshmem_ptrdiff_put_nbi, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_put_nbi_on_stream -ENTRY(nvshmemx_float_put_nbi_on_stream, nvshmemx_float_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_put_nbi_on_stream, nvshmemx_double_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_put_nbi_on_stream, nvshmemx_char_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_put_nbi_on_stream, nvshmemx_schar_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_put_nbi_on_stream, nvshmemx_short_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_put_nbi_on_stream, nvshmemx_int_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_put_nbi_on_stream, nvshmemx_long_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_put_nbi_on_stream, nvshmemx_longlong_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_put_nbi_on_stream, nvshmemx_uchar_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_put_nbi_on_stream, nvshmemx_ushort_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_put_nbi_on_stream, nvshmemx_uint_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_put_nbi_on_stream, nvshmemx_ulong_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_put_nbi_on_stream, nvshmemx_ulonglong_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_put_nbi_on_stream, nvshmemx_int8_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_put_nbi_on_stream, nvshmemx_int16_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_put_nbi_on_stream, nvshmemx_int32_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_put_nbi_on_stream, nvshmemx_int64_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_put_nbi_on_stream, nvshmemx_uint8_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_put_nbi_on_stream, nvshmemx_uint16_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_put_nbi_on_stream, nvshmemx_uint32_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_put_nbi_on_stream, nvshmemx_uint64_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_put_nbi_on_stream, nvshmemx_size_put_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_put_nbi_on_stream, nvshmemx_ptrdiff_put_nbi_on_stream, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_put_nbi_on_stream, nvshmemx_float_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_put_nbi_on_stream, nvshmemx_double_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_put_nbi_on_stream, nvshmemx_char_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_put_nbi_on_stream, nvshmemx_schar_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_put_nbi_on_stream, nvshmemx_short_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_put_nbi_on_stream, nvshmemx_int_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_put_nbi_on_stream, nvshmemx_long_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_put_nbi_on_stream, nvshmemx_longlong_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_put_nbi_on_stream, nvshmemx_uchar_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_put_nbi_on_stream, nvshmemx_ushort_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_put_nbi_on_stream, nvshmemx_uint_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_put_nbi_on_stream, nvshmemx_ulong_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_put_nbi_on_stream, nvshmemx_ulonglong_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_put_nbi_on_stream, nvshmemx_int8_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_put_nbi_on_stream, nvshmemx_int16_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_put_nbi_on_stream, nvshmemx_int32_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_put_nbi_on_stream, nvshmemx_int64_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_put_nbi_on_stream, nvshmemx_uint8_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_put_nbi_on_stream, nvshmemx_uint16_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_put_nbi_on_stream, nvshmemx_uint32_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_put_nbi_on_stream, nvshmemx_uint64_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_put_nbi_on_stream, nvshmemx_size_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_put_nbi_on_stream, nvshmemx_ptrdiff_put_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_put_nbi_block -ENTRY(nvshmemx_float_put_nbi_block, nvshmemx_float_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_put_nbi_block, nvshmemx_double_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_put_nbi_block, nvshmemx_char_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_put_nbi_block, nvshmemx_schar_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_put_nbi_block, nvshmemx_short_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_put_nbi_block, nvshmemx_int_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_put_nbi_block, nvshmemx_long_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_put_nbi_block, nvshmemx_longlong_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_put_nbi_block, nvshmemx_uchar_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_put_nbi_block, nvshmemx_ushort_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_put_nbi_block, nvshmemx_uint_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_put_nbi_block, nvshmemx_ulong_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_put_nbi_block, nvshmemx_ulonglong_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_put_nbi_block, nvshmemx_int8_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_put_nbi_block, nvshmemx_int16_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_put_nbi_block, nvshmemx_int32_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_put_nbi_block, nvshmemx_int64_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_put_nbi_block, nvshmemx_uint8_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_put_nbi_block, nvshmemx_uint16_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_put_nbi_block, nvshmemx_uint32_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_put_nbi_block, nvshmemx_uint64_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_put_nbi_block, nvshmemx_size_put_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_put_nbi_block, nvshmemx_ptrdiff_put_nbi_block, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_put_nbi_block, nvshmemx_float_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_put_nbi_block, nvshmemx_double_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_put_nbi_block, nvshmemx_char_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_put_nbi_block, nvshmemx_schar_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_put_nbi_block, nvshmemx_short_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_put_nbi_block, nvshmemx_int_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_put_nbi_block, nvshmemx_long_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_put_nbi_block, nvshmemx_longlong_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_put_nbi_block, nvshmemx_uchar_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_put_nbi_block, nvshmemx_ushort_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_put_nbi_block, nvshmemx_uint_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_put_nbi_block, nvshmemx_ulong_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_put_nbi_block, nvshmemx_ulonglong_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_put_nbi_block, nvshmemx_int8_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_put_nbi_block, nvshmemx_int16_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_put_nbi_block, nvshmemx_int32_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_put_nbi_block, nvshmemx_int64_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_put_nbi_block, nvshmemx_uint8_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_put_nbi_block, nvshmemx_uint16_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_put_nbi_block, nvshmemx_uint32_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_put_nbi_block, nvshmemx_uint64_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_put_nbi_block, nvshmemx_size_put_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_put_nbi_block, nvshmemx_ptrdiff_put_nbi_block, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_put_nbi_warp -ENTRY(nvshmemx_float_put_nbi_warp, nvshmemx_float_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_put_nbi_warp, nvshmemx_double_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_put_nbi_warp, nvshmemx_char_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_put_nbi_warp, nvshmemx_schar_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_put_nbi_warp, nvshmemx_short_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_put_nbi_warp, nvshmemx_int_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_put_nbi_warp, nvshmemx_long_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_put_nbi_warp, nvshmemx_longlong_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_put_nbi_warp, nvshmemx_uchar_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_put_nbi_warp, nvshmemx_ushort_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_put_nbi_warp, nvshmemx_uint_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_put_nbi_warp, nvshmemx_ulong_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_put_nbi_warp, nvshmemx_ulonglong_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_put_nbi_warp, nvshmemx_int8_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_put_nbi_warp, nvshmemx_int16_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_put_nbi_warp, nvshmemx_int32_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_put_nbi_warp, nvshmemx_int64_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_put_nbi_warp, nvshmemx_uint8_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_put_nbi_warp, nvshmemx_uint16_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_put_nbi_warp, nvshmemx_uint32_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_put_nbi_warp, nvshmemx_uint64_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_put_nbi_warp, nvshmemx_size_put_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_put_nbi_warp, nvshmemx_ptrdiff_put_nbi_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_put_nbi_warp, nvshmemx_float_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_put_nbi_warp, nvshmemx_double_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_put_nbi_warp, nvshmemx_char_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_put_nbi_warp, nvshmemx_schar_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_put_nbi_warp, nvshmemx_short_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_put_nbi_warp, nvshmemx_int_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_put_nbi_warp, nvshmemx_long_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_put_nbi_warp, nvshmemx_longlong_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_put_nbi_warp, nvshmemx_uchar_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_put_nbi_warp, nvshmemx_ushort_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_put_nbi_warp, nvshmemx_uint_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_put_nbi_warp, nvshmemx_ulong_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_put_nbi_warp, nvshmemx_ulonglong_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_put_nbi_warp, nvshmemx_int8_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_put_nbi_warp, nvshmemx_int16_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_put_nbi_warp, nvshmemx_int32_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_put_nbi_warp, nvshmemx_int64_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_put_nbi_warp, nvshmemx_uint8_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_put_nbi_warp, nvshmemx_uint16_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_put_nbi_warp, nvshmemx_uint32_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_put_nbi_warp, nvshmemx_uint64_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_put_nbi_warp, nvshmemx_size_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_put_nbi_warp, nvshmemx_ptrdiff_put_nbi_warp, true, NO_FLAG, P4, "Succeessful") // nvshmem_putSIZE_nbi -ENTRY(nvshmem_put8_nbi, nvshmem_put8_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_put16_nbi, nvshmem_put16_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_put32_nbi, nvshmem_put32_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_put64_nbi, nvshmem_put64_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_put128_nbi, nvshmem_put128_nbi, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_put8_nbi, nvshmem_put8_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_put16_nbi, nvshmem_put16_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_put32_nbi, nvshmem_put32_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_put64_nbi, nvshmem_put64_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_put128_nbi, nvshmem_put128_nbi, true, NO_FLAG, P4, "Succeessful") // nvshmemx_putSIZE_nbi_on_stream -ENTRY(nvshmemx_put8_nbi_on_stream, nvshmemx_put8_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put16_nbi_on_stream, nvshmemx_put16_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put32_nbi_on_stream, nvshmemx_put32_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put64_nbi_on_stream, nvshmemx_put64_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put128_nbi_on_stream, nvshmemx_put128_nbi_on_stream, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_put8_nbi_on_stream, nvshmemx_put8_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put16_nbi_on_stream, nvshmemx_put16_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put32_nbi_on_stream, nvshmemx_put32_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put64_nbi_on_stream, nvshmemx_put64_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put128_nbi_on_stream, nvshmemx_put128_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") // nvshmemx_putSIZE_nbi_block -ENTRY(nvshmemx_put8_nbi_block, nvshmemx_put8_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put16_nbi_block, nvshmemx_put16_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put32_nbi_block, nvshmemx_put32_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put64_nbi_block, nvshmemx_put64_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put128_nbi_block, nvshmemx_put128_nbi_block, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_put8_nbi_block, nvshmemx_put8_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put16_nbi_block, nvshmemx_put16_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put32_nbi_block, nvshmemx_put32_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put64_nbi_block, nvshmemx_put64_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put128_nbi_block, nvshmemx_put128_nbi_block, true, NO_FLAG, P4, "Succeessful") // nvshmemx_putSIZE_nbi_warp -ENTRY(nvshmemx_put8_nbi_warp, nvshmemx_put8_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put16_nbi_warp, nvshmemx_put16_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put32_nbi_warp, nvshmemx_put32_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put64_nbi_warp, nvshmemx_put64_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_put128_nbi_warp, nvshmemx_put128_nbi_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_put8_nbi_warp, nvshmemx_put8_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put16_nbi_warp, nvshmemx_put16_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put32_nbi_warp, nvshmemx_put32_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put64_nbi_warp, nvshmemx_put64_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_put128_nbi_warp, nvshmemx_put128_nbi_warp, true, NO_FLAG, P4, "Succeessful") ENTRY(nvshmem_putmem_nbi, nvshmem_putmem_nbi, true, NO_FLAG, P4, "Succeessful") -ENTRY(nvshmemx_putmem_nbi_on_stream, nvshmemx_putmem_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_putmem_nbi_block, nvshmemx_putmem_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_putmem_nbi_warp, nvshmemx_putmem_nbi_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_putmem_nbi_on_stream, nvshmemx_putmem_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_putmem_nbi_block, nvshmemx_putmem_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_putmem_nbi_warp, nvshmemx_putmem_nbi_warp, true, NO_FLAG, P4, "Succeessful") // nvshmem_TYPENAME_get_nbi -ENTRY(nvshmem_float_get_nbi, nvshmem_float_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_double_get_nbi, nvshmem_double_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_char_get_nbi, nvshmem_char_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_schar_get_nbi, nvshmem_schar_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_short_get_nbi, nvshmem_short_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int_get_nbi, nvshmem_int_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_long_get_nbi, nvshmem_long_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_longlong_get_nbi, nvshmem_longlong_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uchar_get_nbi, nvshmem_uchar_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ushort_get_nbi, nvshmem_ushort_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint_get_nbi, nvshmem_uint_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulong_get_nbi, nvshmem_ulong_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ulonglong_get_nbi, nvshmem_ulonglong_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int8_get_nbi, nvshmem_int8_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int16_get_nbi, nvshmem_int16_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int32_get_nbi, nvshmem_int32_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_int64_get_nbi, nvshmem_int64_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint8_get_nbi, nvshmem_uint8_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint16_get_nbi, nvshmem_uint16_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint32_get_nbi, nvshmem_uint32_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_uint64_get_nbi, nvshmem_uint64_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_size_get_nbi, nvshmem_size_get_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_ptrdiff_get_nbi, nvshmem_ptrdiff_get_nbi, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_float_get_nbi, nvshmem_float_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_double_get_nbi, nvshmem_double_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_char_get_nbi, nvshmem_char_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_schar_get_nbi, nvshmem_schar_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_short_get_nbi, nvshmem_short_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int_get_nbi, nvshmem_int_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_long_get_nbi, nvshmem_long_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_longlong_get_nbi, nvshmem_longlong_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uchar_get_nbi, nvshmem_uchar_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ushort_get_nbi, nvshmem_ushort_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint_get_nbi, nvshmem_uint_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulong_get_nbi, nvshmem_ulong_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ulonglong_get_nbi, nvshmem_ulonglong_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int8_get_nbi, nvshmem_int8_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int16_get_nbi, nvshmem_int16_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int32_get_nbi, nvshmem_int32_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_int64_get_nbi, nvshmem_int64_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint8_get_nbi, nvshmem_uint8_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint16_get_nbi, nvshmem_uint16_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint32_get_nbi, nvshmem_uint32_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_uint64_get_nbi, nvshmem_uint64_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_size_get_nbi, nvshmem_size_get_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_ptrdiff_get_nbi, nvshmem_ptrdiff_get_nbi, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_get_nbi_on_stream -ENTRY(nvshmemx_float_get_nbi_on_stream, nvshmemx_float_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_get_nbi_on_stream, nvshmemx_double_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_get_nbi_on_stream, nvshmemx_char_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_get_nbi_on_stream, nvshmemx_schar_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_get_nbi_on_stream, nvshmemx_short_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_get_nbi_on_stream, nvshmemx_int_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_get_nbi_on_stream, nvshmemx_long_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_get_nbi_on_stream, nvshmemx_longlong_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_get_nbi_on_stream, nvshmemx_uchar_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_get_nbi_on_stream, nvshmemx_ushort_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_get_nbi_on_stream, nvshmemx_uint_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_get_nbi_on_stream, nvshmemx_ulong_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_get_nbi_on_stream, nvshmemx_ulonglong_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_get_nbi_on_stream, nvshmemx_int8_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_get_nbi_on_stream, nvshmemx_int16_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_get_nbi_on_stream, nvshmemx_int32_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_get_nbi_on_stream, nvshmemx_int64_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_get_nbi_on_stream, nvshmemx_uint8_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_get_nbi_on_stream, nvshmemx_uint16_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_get_nbi_on_stream, nvshmemx_uint32_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_get_nbi_on_stream, nvshmemx_uint64_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_get_nbi_on_stream, nvshmemx_size_get_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_get_nbi_on_stream, nvshmemx_ptrdiff_get_nbi_on_stream, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_get_nbi_on_stream, nvshmemx_float_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_get_nbi_on_stream, nvshmemx_double_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_get_nbi_on_stream, nvshmemx_char_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_get_nbi_on_stream, nvshmemx_schar_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_get_nbi_on_stream, nvshmemx_short_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_get_nbi_on_stream, nvshmemx_int_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_get_nbi_on_stream, nvshmemx_long_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_get_nbi_on_stream, nvshmemx_longlong_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_get_nbi_on_stream, nvshmemx_uchar_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_get_nbi_on_stream, nvshmemx_ushort_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_get_nbi_on_stream, nvshmemx_uint_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_get_nbi_on_stream, nvshmemx_ulong_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_get_nbi_on_stream, nvshmemx_ulonglong_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_get_nbi_on_stream, nvshmemx_int8_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_get_nbi_on_stream, nvshmemx_int16_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_get_nbi_on_stream, nvshmemx_int32_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_get_nbi_on_stream, nvshmemx_int64_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_get_nbi_on_stream, nvshmemx_uint8_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_get_nbi_on_stream, nvshmemx_uint16_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_get_nbi_on_stream, nvshmemx_uint32_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_get_nbi_on_stream, nvshmemx_uint64_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_get_nbi_on_stream, nvshmemx_size_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_get_nbi_on_stream, nvshmemx_ptrdiff_get_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_get_nbi_block -ENTRY(nvshmemx_float_get_nbi_block, nvshmemx_float_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_get_nbi_block, nvshmemx_double_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_get_nbi_block, nvshmemx_char_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_get_nbi_block, nvshmemx_schar_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_get_nbi_block, nvshmemx_short_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_get_nbi_block, nvshmemx_int_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_get_nbi_block, nvshmemx_long_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_get_nbi_block, nvshmemx_longlong_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_get_nbi_block, nvshmemx_uchar_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_get_nbi_block, nvshmemx_ushort_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_get_nbi_block, nvshmemx_uint_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_get_nbi_block, nvshmemx_ulong_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_get_nbi_block, nvshmemx_ulonglong_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_get_nbi_block, nvshmemx_int8_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_get_nbi_block, nvshmemx_int16_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_get_nbi_block, nvshmemx_int32_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_get_nbi_block, nvshmemx_int64_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_get_nbi_block, nvshmemx_uint8_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_get_nbi_block, nvshmemx_uint16_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_get_nbi_block, nvshmemx_uint32_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_get_nbi_block, nvshmemx_uint64_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_get_nbi_block, nvshmemx_size_get_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_get_nbi_block, nvshmemx_ptrdiff_get_nbi_block, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_get_nbi_block, nvshmemx_float_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_get_nbi_block, nvshmemx_double_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_get_nbi_block, nvshmemx_char_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_get_nbi_block, nvshmemx_schar_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_get_nbi_block, nvshmemx_short_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_get_nbi_block, nvshmemx_int_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_get_nbi_block, nvshmemx_long_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_get_nbi_block, nvshmemx_longlong_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_get_nbi_block, nvshmemx_uchar_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_get_nbi_block, nvshmemx_ushort_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_get_nbi_block, nvshmemx_uint_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_get_nbi_block, nvshmemx_ulong_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_get_nbi_block, nvshmemx_ulonglong_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_get_nbi_block, nvshmemx_int8_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_get_nbi_block, nvshmemx_int16_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_get_nbi_block, nvshmemx_int32_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_get_nbi_block, nvshmemx_int64_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_get_nbi_block, nvshmemx_uint8_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_get_nbi_block, nvshmemx_uint16_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_get_nbi_block, nvshmemx_uint32_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_get_nbi_block, nvshmemx_uint64_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_get_nbi_block, nvshmemx_size_get_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_get_nbi_block, nvshmemx_ptrdiff_get_nbi_block, true, NO_FLAG, P4, "Succeessful") // nvshmemx_TYPENAME_get_nbi_warp -ENTRY(nvshmemx_float_get_nbi_warp, nvshmemx_float_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_double_get_nbi_warp, nvshmemx_double_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_char_get_nbi_warp, nvshmemx_char_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_schar_get_nbi_warp, nvshmemx_schar_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_short_get_nbi_warp, nvshmemx_short_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int_get_nbi_warp, nvshmemx_int_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_long_get_nbi_warp, nvshmemx_long_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_longlong_get_nbi_warp, nvshmemx_longlong_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uchar_get_nbi_warp, nvshmemx_uchar_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ushort_get_nbi_warp, nvshmemx_ushort_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint_get_nbi_warp, nvshmemx_uint_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulong_get_nbi_warp, nvshmemx_ulong_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ulonglong_get_nbi_warp, nvshmemx_ulonglong_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int8_get_nbi_warp, nvshmemx_int8_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int16_get_nbi_warp, nvshmemx_int16_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int32_get_nbi_warp, nvshmemx_int32_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_int64_get_nbi_warp, nvshmemx_int64_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint8_get_nbi_warp, nvshmemx_uint8_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint16_get_nbi_warp, nvshmemx_uint16_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint32_get_nbi_warp, nvshmemx_uint32_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_uint64_get_nbi_warp, nvshmemx_uint64_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_size_get_nbi_warp, nvshmemx_size_get_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_ptrdiff_get_nbi_warp, nvshmemx_ptrdiff_get_nbi_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_float_get_nbi_warp, nvshmemx_float_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_double_get_nbi_warp, nvshmemx_double_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_char_get_nbi_warp, nvshmemx_char_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_schar_get_nbi_warp, nvshmemx_schar_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_short_get_nbi_warp, nvshmemx_short_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int_get_nbi_warp, nvshmemx_int_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_long_get_nbi_warp, nvshmemx_long_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_longlong_get_nbi_warp, nvshmemx_longlong_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uchar_get_nbi_warp, nvshmemx_uchar_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ushort_get_nbi_warp, nvshmemx_ushort_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint_get_nbi_warp, nvshmemx_uint_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulong_get_nbi_warp, nvshmemx_ulong_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ulonglong_get_nbi_warp, nvshmemx_ulonglong_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int8_get_nbi_warp, nvshmemx_int8_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int16_get_nbi_warp, nvshmemx_int16_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int32_get_nbi_warp, nvshmemx_int32_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_int64_get_nbi_warp, nvshmemx_int64_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint8_get_nbi_warp, nvshmemx_uint8_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint16_get_nbi_warp, nvshmemx_uint16_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint32_get_nbi_warp, nvshmemx_uint32_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_uint64_get_nbi_warp, nvshmemx_uint64_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_size_get_nbi_warp, nvshmemx_size_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_ptrdiff_get_nbi_warp, nvshmemx_ptrdiff_get_nbi_warp, true, NO_FLAG, P4, "Succeessful") // nvshmem_getSIZE_nbi -ENTRY(nvshmem_get8_nbi, nvshmem_get8_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_get16_nbi, nvshmem_get16_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_get32_nbi, nvshmem_get32_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_get64_nbi, nvshmem_get64_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_get128_nbi, nvshmem_get128_nbi, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_get8_nbi, nvshmem_get8_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_get16_nbi, nvshmem_get16_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_get32_nbi, nvshmem_get32_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_get64_nbi, nvshmem_get64_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_get128_nbi, nvshmem_get128_nbi, true, NO_FLAG, P4, "Succeessful") // nvshmemx_getSIZE_nbi_on_stream -ENTRY(nvshmemx_get8_nbi_on_stream, nvshmemx_get8_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get16_nbi_on_stream, nvshmemx_get16_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get32_nbi_on_stream, nvshmemx_get32_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get64_nbi_on_stream, nvshmemx_get64_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get128_nbi_on_stream, nvshmemx_get128_nbi_on_stream, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_get8_nbi_on_stream, nvshmemx_get8_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get16_nbi_on_stream, nvshmemx_get16_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get32_nbi_on_stream, nvshmemx_get32_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get64_nbi_on_stream, nvshmemx_get64_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get128_nbi_on_stream, nvshmemx_get128_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") // nvshmemx_getSIZE_nbi_block -ENTRY(nvshmemx_get8_nbi_block, nvshmemx_get8_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get16_nbi_block, nvshmemx_get16_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get32_nbi_block, nvshmemx_get32_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get64_nbi_block, nvshmemx_get64_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get128_nbi_block, nvshmemx_get128_nbi_block, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_get8_nbi_block, nvshmemx_get8_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get16_nbi_block, nvshmemx_get16_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get32_nbi_block, nvshmemx_get32_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get64_nbi_block, nvshmemx_get64_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get128_nbi_block, nvshmemx_get128_nbi_block, true, NO_FLAG, P4, "Succeessful") // nvshmemx_getSIZE_nbi_warp -ENTRY(nvshmemx_get8_nbi_warp, nvshmemx_get8_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get16_nbi_warp, nvshmemx_get16_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get32_nbi_warp, nvshmemx_get32_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get64_nbi_warp, nvshmemx_get64_nbi_warp, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_get128_nbi_warp, nvshmemx_get128_nbi_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_get8_nbi_warp, nvshmemx_get8_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get16_nbi_warp, nvshmemx_get16_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get32_nbi_warp, nvshmemx_get32_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get64_nbi_warp, nvshmemx_get64_nbi_warp, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_get128_nbi_warp, nvshmemx_get128_nbi_warp, true, NO_FLAG, P4, "Succeessful") -ENTRY(nvshmem_getmem_nbi, nvshmem_getmem_nbi, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_getmem_nbi_on_stream, nvshmemx_getmem_nbi_on_stream, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_getmem_nbi_block, nvshmemx_getmem_nbi_block, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_getmem_nbi_warp, nvshmemx_getmem_nbi_warp, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_getmem_nbi, nvshmem_getmem_nbi, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_getmem_nbi_on_stream, nvshmemx_getmem_nbi_on_stream, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_getmem_nbi_block, nvshmemx_getmem_nbi_block, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_getmem_nbi_warp, nvshmemx_getmem_nbi_warp, true, NO_FLAG, P4, "Succeessful") // Atomic Memory Operations (AMO) @@ -1328,7 +1328,7 @@ ENTRY(nvshmemx_signal_op, nvshmemx_signal_op, true, NO_FLAG, P4, "Succeessful") // Collective Operations ENTRY(nvshmem_barrier_all, nvshmem_barrier_all, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_barrier_all_on_stream, nvshmemx_barrier_all_on_stream, false, NO_FLAG, P4, "comment") +ENTRY(nvshmemx_barrier_all_on_stream, nvshmemx_barrier_all_on_stream, true, NO_FLAG, P4, "Succeessful") ENTRY(nvshmemx_barrier_all_block, nvshmemx_barrier_all_block, false, NO_FLAG, P4, "comment") ENTRY(nvshmemx_barrier_all_warp, nvshmemx_barrier_all_warp, false, NO_FLAG, P4, "comment") @@ -2538,8 +2538,8 @@ ENTRY(nvshmem_signal_wait_until, nvshmem_signal_wait_until, true, NO_FLAG, P4, " // Memory Ordering -ENTRY(nvshmem_fence, nvshmem_fence, false, NO_FLAG, P4, "comment") -ENTRY(nvshmem_quiet, nvshmem_quiet, false, NO_FLAG, P4, "comment") -ENTRY(nvshmemx_quiet_on_stream, nvshmemx_quiet_on_stream, false, NO_FLAG, P4, "comment") +ENTRY(nvshmem_fence, nvshmem_fence, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmem_quiet, nvshmem_quiet, true, NO_FLAG, P4, "Succeessful") +ENTRY(nvshmemx_quiet_on_stream, nvshmemx_quiet_on_stream, true, NO_FLAG, P4, "Succeessful") // clang-format on diff --git a/clang/test/dpct/nvshmem/coll_ops.cu b/clang/test/dpct/nvshmem/coll_ops.cu new file mode 100644 index 000000000000..a3e0f7b60c32 --- /dev/null +++ b/clang/test/dpct/nvshmem/coll_ops.cu @@ -0,0 +1,16 @@ +// REQUIRES: system-linux +// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1 +// RUN: dpct --format-range=none -out-root %T/nvshmem %s --cuda-include-path="%cuda-path/include" +// RUN: FileCheck %s --match-full-lines --input-file %T/nvshmem/coll_ops.dp.cpp +// RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/nvshmem/coll_ops.dp.cpp -o %T/nvshmem/coll_ops.dp.o %} +#include +#include + +int main() { + cudaStream_t stream = 0; + + // CHECK: ishmemx_barrier_all_on_queue(*stream); + nvshmemx_barrier_all_on_stream(stream); + + return 0; +} diff --git a/clang/test/dpct/nvshmem/mem_order.cu b/clang/test/dpct/nvshmem/mem_order.cu new file mode 100644 index 000000000000..ef643d6469b5 --- /dev/null +++ b/clang/test/dpct/nvshmem/mem_order.cu @@ -0,0 +1,30 @@ +// REQUIRES: system-linux +// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1 +// RUN: dpct --format-range=none -out-root %T/nvshmem %s --cuda-include-path="%cuda-path/include" +// RUN: FileCheck %s --match-full-lines --input-file %T/nvshmem/mem_order.dp.cpp +// RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/nvshmem/mem_order.dp.cpp -o %T/nvshmem/mem_order.dp.o %} +#include +#include + +__host__ __device__ void test() { + // CHECK: ishmem_fence(); + nvshmem_fence(); + + // CHECK: ishmem_quiet(); + nvshmem_quiet(); +} + +int main() { + cudaStream_t stream; + + // CHECK: ishmem_fence(); + nvshmem_fence(); + + // CHECK: ishmem_quiet(); + nvshmem_quiet(); + + // ishmemx_quiet_on_queue(*stream); + nvshmemx_quiet_on_stream(stream); + + return 0; +} diff --git a/clang/test/dpct/nvshmem/rma.cu b/clang/test/dpct/nvshmem/rma.cu new file mode 100644 index 000000000000..76c16adbc568 --- /dev/null +++ b/clang/test/dpct/nvshmem/rma.cu @@ -0,0 +1,1653 @@ +// REQUIRES: system-linux +// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1 +// RUN: dpct --format-range=none -out-root %T/nvshmem %s --cuda-include-path="%cuda-path/include" +// RUN: FileCheck %s --match-full-lines --input-file %T/nvshmem/rma.dp.cpp +// RUN: %if build_lit %{icpx -c -fsycl -DNO_BUILD_TEST %T/nvshmem/rma.dp.cpp -o %T/nvshmem/rma.dp.o %} +#include +#include + + +__host__ __device__ void test(int target_pe) { + const void *src_void; + void *dst_void; + + // Standard RMA types + float *src_float; + float *dst_float; + + double *src_double; + double *dst_double; + + char *src_char; + char *dst_char; + + signed char *src_schar; + signed char *dst_schar; + + short *src_short; + short *dst_short; + + int *src_int; + int *dst_int; + + long *src_long; + long *dst_long; + + long long *src_longlong; + long long *dst_longlong; + + unsigned char *src_uchar; + unsigned char *dst_uchar; + + unsigned short *src_ushort; + unsigned short *dst_ushort; + + unsigned int *src_uint; + unsigned int *dst_uint; + + unsigned long *src_ulong; + unsigned long *dst_ulong; + + unsigned long long *src_ulonglong; + unsigned long long *dst_ulonglong; + + int8_t *src_int8; + int8_t *dst_int8; + + int16_t *src_int16; + int16_t *dst_int16; + + int32_t *src_int32; + int32_t *dst_int32; + + int64_t *src_int64; + int64_t *dst_int64; + + uint8_t *src_uint8; + uint8_t *dst_uint8; + + uint16_t *src_uint16; + uint16_t *dst_uint16; + + uint32_t *src_uint32; + uint32_t *dst_uint32; + + uint64_t *src_uint64; + uint64_t *dst_uint64; + + size_t *src_size; + size_t *dst_size; + + ptrdiff_t *src_ptrdiff; + ptrdiff_t *dst_ptrdiff; + + const int count = 10; + + // nvshmem_TYPENAME_put + // ishmem_TYPENAME_put + // CHECK: ishmem_put(dst_float, src_float, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_double, src_double, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_char, src_char, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_schar, src_schar, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_short, src_short, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_int, src_int, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_long, src_long, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_longlong, src_longlong, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_uchar, src_uchar, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_ushort, src_ushort, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_uint, src_uint, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_ulong, src_ulong, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_ulonglong, src_ulonglong, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_int8, src_int8, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_int16, src_int16, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_int32, src_int32, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_int64, src_int64, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_uint8, src_uint8, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_uint16, src_uint16, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_uint32, src_uint32, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_uint64, src_uint64, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_size, src_size, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_ptrdiff, src_ptrdiff, count, target_pe); + nvshmem_float_put(dst_float, src_float, count, target_pe); + nvshmem_double_put(dst_double, src_double, count, target_pe); + nvshmem_char_put(dst_char, src_char, count, target_pe); + nvshmem_schar_put(dst_schar, src_schar, count, target_pe); + nvshmem_short_put(dst_short, src_short, count, target_pe); + nvshmem_int_put(dst_int, src_int, count, target_pe); + nvshmem_long_put(dst_long, src_long, count, target_pe); + nvshmem_longlong_put(dst_longlong, src_longlong, count, target_pe); + nvshmem_uchar_put(dst_uchar, src_uchar, count, target_pe); + nvshmem_ushort_put(dst_ushort, src_ushort, count, target_pe); + nvshmem_uint_put(dst_uint, src_uint, count, target_pe); + nvshmem_ulong_put(dst_ulong, src_ulong, count, target_pe); + nvshmem_ulonglong_put(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmem_int8_put(dst_int8, src_int8, count, target_pe); + nvshmem_int16_put(dst_int16, src_int16, count, target_pe); + nvshmem_int32_put(dst_int32, src_int32, count, target_pe); + nvshmem_int64_put(dst_int64, src_int64, count, target_pe); + nvshmem_uint8_put(dst_uint8, src_uint8, count, target_pe); + nvshmem_uint16_put(dst_uint16, src_uint16, count, target_pe); + nvshmem_uint32_put(dst_uint32, src_uint32, count, target_pe); + nvshmem_uint64_put(dst_uint64, src_uint64, count, target_pe); + nvshmem_size_put(dst_size, src_size, count, target_pe); + nvshmem_ptrdiff_put(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_put_block + // ishmemx_TYPENAME_put_work_group + // CHECK: ishmemx_put_work_group(dst_float, src_float, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_double, src_double, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_char, src_char, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_schar, src_schar, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_short, src_short, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_int, src_int, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_long, src_long, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_longlong, src_longlong, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_uchar, src_uchar, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_ushort, src_ushort, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_uint, src_uint, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_ulong, src_ulong, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_ulonglong, src_ulonglong, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_int8, src_int8, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_int16, src_int16, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_int32, src_int32, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_int64, src_int64, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_uint8, src_uint8, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_uint16, src_uint16, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_uint32, src_uint32, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_uint64, src_uint64, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_size, src_size, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_work_group(dst_ptrdiff, src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_float_put_block(dst_float, src_float, count, target_pe); + nvshmemx_double_put_block(dst_double, src_double, count, target_pe); + nvshmemx_char_put_block(dst_char, src_char, count, target_pe); + nvshmemx_schar_put_block(dst_schar, src_schar, count, target_pe); + nvshmemx_short_put_block(dst_short, src_short, count, target_pe); + nvshmemx_int_put_block(dst_int, src_int, count, target_pe); + nvshmemx_long_put_block(dst_long, src_long, count, target_pe); + nvshmemx_longlong_put_block(dst_longlong, src_longlong, count, target_pe); + nvshmemx_uchar_put_block(dst_uchar, src_uchar, count, target_pe); + nvshmemx_ushort_put_block(dst_ushort, src_ushort, count, target_pe); + nvshmemx_uint_put_block(dst_uint, src_uint, count, target_pe); + nvshmemx_ulong_put_block(dst_ulong, src_ulong, count, target_pe); + nvshmemx_ulonglong_put_block(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmemx_int8_put_block(dst_int8, src_int8, count, target_pe); + nvshmemx_int16_put_block(dst_int16, src_int16, count, target_pe); + nvshmemx_int32_put_block(dst_int32, src_int32, count, target_pe); + nvshmemx_int64_put_block(dst_int64, src_int64, count, target_pe); + nvshmemx_uint8_put_block(dst_uint8, src_uint8, count, target_pe); + nvshmemx_uint16_put_block(dst_uint16, src_uint16, count, target_pe); + nvshmemx_uint32_put_block(dst_uint32, src_uint32, count, target_pe); + nvshmemx_uint64_put_block(dst_uint64, src_uint64, count, target_pe); + nvshmemx_size_put_block(dst_size, src_size, count, target_pe); + nvshmemx_ptrdiff_put_block(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_put_warp + // ishmemx_TYPENAME_put_work_group + // CHECK: ishmemx_put_work_group(dst_float, src_float, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_double, src_double, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_char, src_char, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_schar, src_schar, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_short, src_short, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_int, src_int, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_long, src_long, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_longlong, src_longlong, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_uchar, src_uchar, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_ushort, src_ushort, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_uint, src_uint, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_ulong, src_ulong, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_ulonglong, src_ulonglong, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_int8, src_int8, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_int16, src_int16, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_int32, src_int32, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_int64, src_int64, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_uint8, src_uint8, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_uint16, src_uint16, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_uint32, src_uint32, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_uint64, src_uint64, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_size, src_size, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_work_group(dst_ptrdiff, src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_float_put_warp(dst_float, src_float, count, target_pe); + nvshmemx_double_put_warp(dst_double, src_double, count, target_pe); + nvshmemx_char_put_warp(dst_char, src_char, count, target_pe); + nvshmemx_schar_put_warp(dst_schar, src_schar, count, target_pe); + nvshmemx_short_put_warp(dst_short, src_short, count, target_pe); + nvshmemx_int_put_warp(dst_int, src_int, count, target_pe); + nvshmemx_long_put_warp(dst_long, src_long, count, target_pe); + nvshmemx_longlong_put_warp(dst_longlong, src_longlong, count, target_pe); + nvshmemx_uchar_put_warp(dst_uchar, src_uchar, count, target_pe); + nvshmemx_ushort_put_warp(dst_ushort, src_ushort, count, target_pe); + nvshmemx_uint_put_warp(dst_uint, src_uint, count, target_pe); + nvshmemx_ulong_put_warp(dst_ulong, src_ulong, count, target_pe); + nvshmemx_ulonglong_put_warp(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmemx_int8_put_warp(dst_int8, src_int8, count, target_pe); + nvshmemx_int16_put_warp(dst_int16, src_int16, count, target_pe); + nvshmemx_int32_put_warp(dst_int32, src_int32, count, target_pe); + nvshmemx_int64_put_warp(dst_int64, src_int64, count, target_pe); + nvshmemx_uint8_put_warp(dst_uint8, src_uint8, count, target_pe); + nvshmemx_uint16_put_warp(dst_uint16, src_uint16, count, target_pe); + nvshmemx_uint32_put_warp(dst_uint32, src_uint32, count, target_pe); + nvshmemx_uint64_put_warp(dst_uint64, src_uint64, count, target_pe); + nvshmemx_size_put_warp(dst_size, src_size, count, target_pe); + nvshmemx_ptrdiff_put_warp(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmem_putSIZE + // ishmem_putSIZE + // CHECK: ishmem_put8(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put16(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put32(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put64(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put128(dst_void, src_void, count, target_pe); + nvshmem_put8(dst_void, src_void, count, target_pe); + nvshmem_put16(dst_void, src_void, count, target_pe); + nvshmem_put32(dst_void, src_void, count, target_pe); + nvshmem_put64(dst_void, src_void, count, target_pe); + nvshmem_put128(dst_void, src_void, count, target_pe); + + // nvshmemx_putSIZE_block + // ishmemx_putSIZE_work_group + // CHECK: ishmemx_put8_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put16_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put32_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put64_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put128_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_put8_block(dst_void, src_void, count, target_pe); + nvshmemx_put16_block(dst_void, src_void, count, target_pe); + nvshmemx_put32_block(dst_void, src_void, count, target_pe); + nvshmemx_put64_block(dst_void, src_void, count, target_pe); + nvshmemx_put128_block(dst_void, src_void, count, target_pe); + + // nvshmemx_putSIZE_warp + // ishmemx_putSIZE_work_group + // CHECK: ishmemx_put8_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put16_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put32_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put64_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put128_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_put8_warp(dst_void, src_void, count, target_pe); + nvshmemx_put16_warp(dst_void, src_void, count, target_pe); + nvshmemx_put32_warp(dst_void, src_void, count, target_pe); + nvshmemx_put64_warp(dst_void, src_void, count, target_pe); + nvshmemx_put128_warp(dst_void, src_void, count, target_pe); + + // nvshmem_TYPENAME_iput + // ishmem_TYPENAME_iput + // CHECK: ishmem_iput(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_float_iput(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_double_iput(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_char_iput(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_schar_iput(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_short_iput(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int_iput(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_long_iput(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_longlong_iput(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uchar_iput(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ushort_iput(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint_iput(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ulong_iput(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ulonglong_iput(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int8_iput(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int16_iput(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int32_iput(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int64_iput(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint8_iput(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint16_iput(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint32_iput(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint64_iput(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_size_iput(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ptrdiff_iput(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_iput_block + // ishmemx_TYPENAME_iput_work_group + // CHECK: ishmemx_iput_work_group(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_float_iput_block(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_double_iput_block(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_char_iput_block(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_schar_iput_block(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_short_iput_block(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int_iput_block(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_long_iput_block(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_longlong_iput_block(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uchar_iput_block(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ushort_iput_block(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint_iput_block(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ulong_iput_block(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ulonglong_iput_block(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int8_iput_block(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int16_iput_block(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int32_iput_block(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int64_iput_block(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint8_iput_block(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint16_iput_block(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint32_iput_block(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint64_iput_block(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_size_iput_block(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ptrdiff_iput_block(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_iput_warp + // ishmemx_TYPENAME_iput_work_group + // CHECK: ishmemx_iput_work_group(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput_work_group(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_float_iput_warp(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_double_iput_warp(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_char_iput_warp(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_schar_iput_warp(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_short_iput_warp(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int_iput_warp(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_long_iput_warp(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_longlong_iput_warp(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uchar_iput_warp(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ushort_iput_warp(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint_iput_warp(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ulong_iput_warp(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ulonglong_iput_warp(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int8_iput_warp(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int16_iput_warp(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int32_iput_warp(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int64_iput_warp(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint8_iput_warp(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint16_iput_warp(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint32_iput_warp(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint64_iput_warp(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_size_iput_warp(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ptrdiff_iput_warp(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmem_iputSIZE + // ishmem_iputSIZE + // CHECK: ishmem_iput8(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput16(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput32(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput64(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput128(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iput8(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iput16(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iput32(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iput64(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iput128(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmemx_iputSIZE_block + // ishmemx_iputSIZE_work_group + // CHECK: ishmemx_iput8_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput16_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput32_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput64_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iput128_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_iput8_block(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iput16_block(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iput32_block(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iput64_block(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iput128_block(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmemx_iputSIZE_warp + // ishmemx_iputSIZE_work_group + // CHECK: ishmemx_iput8_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput16_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput32_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput64_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iput128_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_iput8_warp(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iput16_warp(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iput32_warp(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iput64_warp(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iput128_warp(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmem_putmem + // ishmem_putmem + // CHECK: ishmem_putmem(dst_void, src_void, count, target_pe); + nvshmem_putmem(dst_void, src_void, count, target_pe); + + // nvshmemx_putmem_block + // ishmemx_putmem_work_group + // CHECK: ishmemx_putmem_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_putmem_block(dst_void, src_void, count, target_pe); + + // nvshmemx_putmem_warp + // ishmemx_putmem_work_group + // CHECK: ishmemx_putmem_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_putmem_warp(dst_void, src_void, count, target_pe); + + // nvshmem_TYPENAME_p + // ishmem_TYPENAME_p + // CHECK: ishmem_p(dst_float, *src_float, target_pe); + // CHECK-NEXT: ishmem_p(dst_double, *src_double, target_pe); + // CHECK-NEXT: ishmem_p(dst_char, *src_char, target_pe); + // CHECK-NEXT: ishmem_p(dst_schar, *src_schar, target_pe); + // CHECK-NEXT: ishmem_p(dst_short, *src_short, target_pe); + // CHECK-NEXT: ishmem_p(dst_int, *src_int, target_pe); + // CHECK-NEXT: ishmem_p(dst_long, *src_long, target_pe); + // CHECK-NEXT: ishmem_p(dst_longlong, *src_longlong, target_pe); + // CHECK-NEXT: ishmem_p(dst_uchar, *src_uchar, target_pe); + // CHECK-NEXT: ishmem_p(dst_ushort, *src_ushort, target_pe); + // CHECK-NEXT: ishmem_p(dst_uint, *src_uint, target_pe); + // CHECK-NEXT: ishmem_p(dst_ulong, *src_ulong, target_pe); + // CHECK-NEXT: ishmem_p(dst_ulonglong, *src_ulonglong, target_pe); + // CHECK-NEXT: ishmem_p(dst_int8, *src_int8, target_pe); + // CHECK-NEXT: ishmem_p(dst_int16, *src_int16, target_pe); + // CHECK-NEXT: ishmem_p(dst_int32, *src_int32, target_pe); + // CHECK-NEXT: ishmem_p(dst_int64, *src_int64, target_pe); + // CHECK-NEXT: ishmem_p(dst_uint8, *src_uint8, target_pe); + // CHECK-NEXT: ishmem_p(dst_uint16, *src_uint16, target_pe); + // CHECK-NEXT: ishmem_p(dst_uint32, *src_uint32, target_pe); + // CHECK-NEXT: ishmem_p(dst_uint64, *src_uint64, target_pe); + // CHECK-NEXT: ishmem_p(dst_size, *src_size, target_pe); + // CHECK-NEXT: ishmem_p(dst_ptrdiff, *src_ptrdiff, target_pe); + nvshmem_float_p(dst_float, *src_float, target_pe); + nvshmem_double_p(dst_double, *src_double, target_pe); + nvshmem_char_p(dst_char, *src_char, target_pe); + nvshmem_schar_p(dst_schar, *src_schar, target_pe); + nvshmem_short_p(dst_short, *src_short, target_pe); + nvshmem_int_p(dst_int, *src_int, target_pe); + nvshmem_long_p(dst_long, *src_long, target_pe); + nvshmem_longlong_p(dst_longlong, *src_longlong, target_pe); + nvshmem_uchar_p(dst_uchar, *src_uchar, target_pe); + nvshmem_ushort_p(dst_ushort, *src_ushort, target_pe); + nvshmem_uint_p(dst_uint, *src_uint, target_pe); + nvshmem_ulong_p(dst_ulong, *src_ulong, target_pe); + nvshmem_ulonglong_p(dst_ulonglong, *src_ulonglong, target_pe); + nvshmem_int8_p(dst_int8, *src_int8, target_pe); + nvshmem_int16_p(dst_int16, *src_int16, target_pe); + nvshmem_int32_p(dst_int32, *src_int32, target_pe); + nvshmem_int64_p(dst_int64, *src_int64, target_pe); + nvshmem_uint8_p(dst_uint8, *src_uint8, target_pe); + nvshmem_uint16_p(dst_uint16, *src_uint16, target_pe); + nvshmem_uint32_p(dst_uint32, *src_uint32, target_pe); + nvshmem_uint64_p(dst_uint64, *src_uint64, target_pe); + nvshmem_size_p(dst_size, *src_size, target_pe); + nvshmem_ptrdiff_p(dst_ptrdiff, *src_ptrdiff, target_pe); + + // nvshmem_TYPENAME_get + // ishmem_TYPENAME_get + // CHECK: ishmem_get(dst_float, src_float, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_double, src_double, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_char, src_char, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_schar, src_schar, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_short, src_short, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_int, src_int, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_long, src_long, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_longlong, src_longlong, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_uchar, src_uchar, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_ushort, src_ushort, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_uint, src_uint, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_ulong, src_ulong, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_ulonglong, src_ulonglong, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_int8, src_int8, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_int16, src_int16, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_int32, src_int32, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_int64, src_int64, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_uint8, src_uint8, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_uint16, src_uint16, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_uint32, src_uint32, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_uint64, src_uint64, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_size, src_size, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_ptrdiff, src_ptrdiff, count, target_pe); + nvshmem_float_get(dst_float, src_float, count, target_pe); + nvshmem_double_get(dst_double, src_double, count, target_pe); + nvshmem_char_get(dst_char, src_char, count, target_pe); + nvshmem_schar_get(dst_schar, src_schar, count, target_pe); + nvshmem_short_get(dst_short, src_short, count, target_pe); + nvshmem_int_get(dst_int, src_int, count, target_pe); + nvshmem_long_get(dst_long, src_long, count, target_pe); + nvshmem_longlong_get(dst_longlong, src_longlong, count, target_pe); + nvshmem_uchar_get(dst_uchar, src_uchar, count, target_pe); + nvshmem_ushort_get(dst_ushort, src_ushort, count, target_pe); + nvshmem_uint_get(dst_uint, src_uint, count, target_pe); + nvshmem_ulong_get(dst_ulong, src_ulong, count, target_pe); + nvshmem_ulonglong_get(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmem_int8_get(dst_int8, src_int8, count, target_pe); + nvshmem_int16_get(dst_int16, src_int16, count, target_pe); + nvshmem_int32_get(dst_int32, src_int32, count, target_pe); + nvshmem_int64_get(dst_int64, src_int64, count, target_pe); + nvshmem_uint8_get(dst_uint8, src_uint8, count, target_pe); + nvshmem_uint16_get(dst_uint16, src_uint16, count, target_pe); + nvshmem_uint32_get(dst_uint32, src_uint32, count, target_pe); + nvshmem_uint64_get(dst_uint64, src_uint64, count, target_pe); + nvshmem_size_get(dst_size, src_size, count, target_pe); + nvshmem_ptrdiff_get(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_get_block + // ishmemx_TYPENAME_get_work_group + // CHECK: ishmemx_get_work_group(dst_float, src_float, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_double, src_double, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_char, src_char, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_schar, src_schar, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_short, src_short, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_int, src_int, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_long, src_long, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_longlong, src_longlong, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_uchar, src_uchar, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_ushort, src_ushort, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_uint, src_uint, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_ulong, src_ulong, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_ulonglong, src_ulonglong, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_int8, src_int8, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_int16, src_int16, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_int32, src_int32, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_int64, src_int64, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_uint8, src_uint8, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_uint16, src_uint16, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_uint32, src_uint32, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_uint64, src_uint64, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_size, src_size, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_work_group(dst_ptrdiff, src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_float_get_block(dst_float, src_float, count, target_pe); + nvshmemx_double_get_block(dst_double, src_double, count, target_pe); + nvshmemx_char_get_block(dst_char, src_char, count, target_pe); + nvshmemx_schar_get_block(dst_schar, src_schar, count, target_pe); + nvshmemx_short_get_block(dst_short, src_short, count, target_pe); + nvshmemx_int_get_block(dst_int, src_int, count, target_pe); + nvshmemx_long_get_block(dst_long, src_long, count, target_pe); + nvshmemx_longlong_get_block(dst_longlong, src_longlong, count, target_pe); + nvshmemx_uchar_get_block(dst_uchar, src_uchar, count, target_pe); + nvshmemx_ushort_get_block(dst_ushort, src_ushort, count, target_pe); + nvshmemx_uint_get_block(dst_uint, src_uint, count, target_pe); + nvshmemx_ulong_get_block(dst_ulong, src_ulong, count, target_pe); + nvshmemx_ulonglong_get_block(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmemx_int8_get_block(dst_int8, src_int8, count, target_pe); + nvshmemx_int16_get_block(dst_int16, src_int16, count, target_pe); + nvshmemx_int32_get_block(dst_int32, src_int32, count, target_pe); + nvshmemx_int64_get_block(dst_int64, src_int64, count, target_pe); + nvshmemx_uint8_get_block(dst_uint8, src_uint8, count, target_pe); + nvshmemx_uint16_get_block(dst_uint16, src_uint16, count, target_pe); + nvshmemx_uint32_get_block(dst_uint32, src_uint32, count, target_pe); + nvshmemx_uint64_get_block(dst_uint64, src_uint64, count, target_pe); + nvshmemx_size_get_block(dst_size, src_size, count, target_pe); + nvshmemx_ptrdiff_get_block(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_get_warp + // ishmemx_TYPENAME_get_work_group + // CHECK: ishmemx_get_work_group(dst_float, src_float, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_double, src_double, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_char, src_char, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_schar, src_schar, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_short, src_short, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_int, src_int, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_long, src_long, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_longlong, src_longlong, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_uchar, src_uchar, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_ushort, src_ushort, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_uint, src_uint, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_ulong, src_ulong, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_ulonglong, src_ulonglong, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_int8, src_int8, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_int16, src_int16, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_int32, src_int32, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_int64, src_int64, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_uint8, src_uint8, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_uint16, src_uint16, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_uint32, src_uint32, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_uint64, src_uint64, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_size, src_size, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_work_group(dst_ptrdiff, src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_float_get_warp(dst_float, src_float, count, target_pe); + nvshmemx_double_get_warp(dst_double, src_double, count, target_pe); + nvshmemx_char_get_warp(dst_char, src_char, count, target_pe); + nvshmemx_schar_get_warp(dst_schar, src_schar, count, target_pe); + nvshmemx_short_get_warp(dst_short, src_short, count, target_pe); + nvshmemx_int_get_warp(dst_int, src_int, count, target_pe); + nvshmemx_long_get_warp(dst_long, src_long, count, target_pe); + nvshmemx_longlong_get_warp(dst_longlong, src_longlong, count, target_pe); + nvshmemx_uchar_get_warp(dst_uchar, src_uchar, count, target_pe); + nvshmemx_ushort_get_warp(dst_ushort, src_ushort, count, target_pe); + nvshmemx_uint_get_warp(dst_uint, src_uint, count, target_pe); + nvshmemx_ulong_get_warp(dst_ulong, src_ulong, count, target_pe); + nvshmemx_ulonglong_get_warp(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmemx_int8_get_warp(dst_int8, src_int8, count, target_pe); + nvshmemx_int16_get_warp(dst_int16, src_int16, count, target_pe); + nvshmemx_int32_get_warp(dst_int32, src_int32, count, target_pe); + nvshmemx_int64_get_warp(dst_int64, src_int64, count, target_pe); + nvshmemx_uint8_get_warp(dst_uint8, src_uint8, count, target_pe); + nvshmemx_uint16_get_warp(dst_uint16, src_uint16, count, target_pe); + nvshmemx_uint32_get_warp(dst_uint32, src_uint32, count, target_pe); + nvshmemx_uint64_get_warp(dst_uint64, src_uint64, count, target_pe); + nvshmemx_size_get_warp(dst_size, src_size, count, target_pe); + nvshmemx_ptrdiff_get_warp(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmem_getSIZE + // ishmem_getSIZE + // CHECK: ishmem_get8(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get16(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get32(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get64(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get128(dst_void, src_void, count, target_pe); + nvshmem_get8(dst_void, src_void, count, target_pe); + nvshmem_get16(dst_void, src_void, count, target_pe); + nvshmem_get32(dst_void, src_void, count, target_pe); + nvshmem_get64(dst_void, src_void, count, target_pe); + nvshmem_get128(dst_void, src_void, count, target_pe); + + // nvshmemx_getSIZE_block + // ishmemx_getSIZE_work_group + // CHECK: ishmemx_get8_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get16_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get32_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get64_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get128_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_get8_block(dst_void, src_void, count, target_pe); + nvshmemx_get16_block(dst_void, src_void, count, target_pe); + nvshmemx_get32_block(dst_void, src_void, count, target_pe); + nvshmemx_get64_block(dst_void, src_void, count, target_pe); + nvshmemx_get128_block(dst_void, src_void, count, target_pe); + + // nvshmemx_getSIZE_warp + // ishmemx_getSIZE_work_group + // CHECK: ishmemx_get8_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get16_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get32_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get64_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get128_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_get8_warp(dst_void, src_void, count, target_pe); + nvshmemx_get16_warp(dst_void, src_void, count, target_pe); + nvshmemx_get32_warp(dst_void, src_void, count, target_pe); + nvshmemx_get64_warp(dst_void, src_void, count, target_pe); + nvshmemx_get128_warp(dst_void, src_void, count, target_pe); + + // nvshmem_TYPENAME_iget + // ishmem_TYPENAME_iget + // CHECK: ishmem_iget(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_float_iget(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_double_iget(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_char_iget(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_schar_iget(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_short_iget(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int_iget(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_long_iget(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_longlong_iget(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uchar_iget(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ushort_iget(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint_iget(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ulong_iget(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ulonglong_iget(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int8_iget(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int16_iget(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int32_iget(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int64_iget(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint8_iget(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint16_iget(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint32_iget(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint64_iget(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_size_iget(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ptrdiff_iget(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_iget_block + // ishmemx_TYPENAME_iget_work_group + // CHECK: ishmemx_iget_work_group(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_float_iget_block(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_double_iget_block(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_char_iget_block(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_schar_iget_block(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_short_iget_block(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int_iget_block(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_long_iget_block(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_longlong_iget_block(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uchar_iget_block(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ushort_iget_block(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint_iget_block(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ulong_iget_block(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ulonglong_iget_block(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int8_iget_block(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int16_iget_block(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int32_iget_block(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int64_iget_block(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint8_iget_block(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint16_iget_block(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint32_iget_block(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint64_iget_block(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_size_iget_block(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ptrdiff_iget_block(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_iget_warp + // ishmemx_TYPENAME_iget_work_group + // CHECK: ishmemx_iget_work_group(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget_work_group(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_float_iget_warp(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_double_iget_warp(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_char_iget_warp(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_schar_iget_warp(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_short_iget_warp(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int_iget_warp(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_long_iget_warp(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_longlong_iget_warp(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uchar_iget_warp(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ushort_iget_warp(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint_iget_warp(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ulong_iget_warp(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ulonglong_iget_warp(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int8_iget_warp(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int16_iget_warp(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int32_iget_warp(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_int64_iget_warp(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint8_iget_warp(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint16_iget_warp(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint32_iget_warp(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_uint64_iget_warp(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_size_iget_warp(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_ptrdiff_iget_warp(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmem_igetSIZE + // ishmem_igetSIZE + // CHECK: ishmem_iget8(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget16(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget32(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget64(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget128(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iget8(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iget16(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iget32(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iget64(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iget128(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmemx_igetSIZE_block + // ishmemx_igetSIZE_work_group + // CHECK: ishmemx_iget8_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget16_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget32_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget64_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_iget128_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_iget8_block(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iget16_block(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iget32_block(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iget64_block(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iget128_block(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmemx_igetSIZE_warp + // ishmemx_igetSIZE_work_group + // CHECK: ishmemx_iget8_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget16_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget32_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget64_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_iget128_work_group(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_iget8_warp(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iget16_warp(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iget32_warp(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iget64_warp(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmemx_iget128_warp(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmem_getmem + // ishmem_getmem + // CHECK: ishmem_getmem(dst_void, src_void, count, target_pe); + nvshmem_getmem(dst_void, src_void, count, target_pe); + + // nvshmemx_getmem_block + // ishmemx_getmem_work_group + // CHECK: ishmemx_getmem_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_getmem_block(dst_void, src_void, count, target_pe); + + // nvshmemx_getmem_warp + // ishmemx_getmem_work_group + // CHECK: ishmemx_getmem_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_getmem_warp(dst_void, src_void, count, target_pe); + + // nvshmem_TYPENAME_g + // ishmem_TYPENAME_g + // CHECK: *dst_float = ishmem_g(src_float, target_pe); + // CHECK-NEXT: *dst_double = ishmem_g(src_double, target_pe); + // CHECK-NEXT: *dst_char = ishmem_g(src_char, target_pe); + // CHECK-NEXT: *dst_schar = ishmem_g(src_schar, target_pe); + // CHECK-NEXT: *dst_short = ishmem_g(src_short, target_pe); + // CHECK-NEXT: *dst_int = ishmem_g(src_int, target_pe); + // CHECK-NEXT: *dst_long = ishmem_g(src_long, target_pe); + // CHECK-NEXT: *dst_longlong = ishmem_g(src_longlong, target_pe); + // CHECK-NEXT: *dst_uchar = ishmem_g(src_uchar, target_pe); + // CHECK-NEXT: *dst_ushort = ishmem_g(src_ushort, target_pe); + // CHECK-NEXT: *dst_uint = ishmem_g(src_uint, target_pe); + // CHECK-NEXT: *dst_ulong = ishmem_g(src_ulong, target_pe); + // CHECK-NEXT: *dst_ulonglong = ishmem_g(src_ulonglong, target_pe); + // CHECK-NEXT: *dst_int8 = ishmem_g(src_int8, target_pe); + // CHECK-NEXT: *dst_int16 = ishmem_g(src_int16, target_pe); + // CHECK-NEXT: *dst_int32 = ishmem_g(src_int32, target_pe); + // CHECK-NEXT: *dst_int64 = ishmem_g(src_int64, target_pe); + // CHECK-NEXT: *dst_uint8 = ishmem_g(src_uint8, target_pe); + // CHECK-NEXT: *dst_uint16 = ishmem_g(src_uint16, target_pe); + // CHECK-NEXT: *dst_uint32 = ishmem_g(src_uint32, target_pe); + // CHECK-NEXT: *dst_uint64 = ishmem_g(src_uint64, target_pe); + // CHECK-NEXT: *dst_size = ishmem_g(src_size, target_pe); + // CHECK-NEXT: *dst_ptrdiff = ishmem_g(src_ptrdiff, target_pe); + *dst_float = nvshmem_float_g(src_float, target_pe); + *dst_double = nvshmem_double_g(src_double, target_pe); + *dst_char = nvshmem_char_g(src_char, target_pe); + *dst_schar = nvshmem_schar_g(src_schar, target_pe); + *dst_short = nvshmem_short_g(src_short, target_pe); + *dst_int = nvshmem_int_g(src_int, target_pe); + *dst_long = nvshmem_long_g(src_long, target_pe); + *dst_longlong = nvshmem_longlong_g(src_longlong, target_pe); + *dst_uchar = nvshmem_uchar_g(src_uchar, target_pe); + *dst_ushort = nvshmem_ushort_g(src_ushort, target_pe); + *dst_uint = nvshmem_uint_g(src_uint, target_pe); + *dst_ulong = nvshmem_ulong_g(src_ulong, target_pe); + *dst_ulonglong = nvshmem_ulonglong_g(src_ulonglong, target_pe); + *dst_int8 = nvshmem_int8_g(src_int8, target_pe); + *dst_int16 = nvshmem_int16_g(src_int16, target_pe); + *dst_int32 = nvshmem_int32_g(src_int32, target_pe); + *dst_int64 = nvshmem_int64_g(src_int64, target_pe); + *dst_uint8 = nvshmem_uint8_g(src_uint8, target_pe); + *dst_uint16 = nvshmem_uint16_g(src_uint16, target_pe); + *dst_uint32 = nvshmem_uint32_g(src_uint32, target_pe); + *dst_uint64 = nvshmem_uint64_g(src_uint64, target_pe); + *dst_size = nvshmem_size_g(src_size, target_pe); + *dst_ptrdiff = nvshmem_ptrdiff_g(src_ptrdiff, target_pe); +} + + +int main() { + const void *src_void; + void *dst_void; + + // Standard RMA types + float *src_float; + float *dst_float; + + double *src_double; + double *dst_double; + + char *src_char; + char *dst_char; + + signed char *src_schar; + signed char *dst_schar; + + short *src_short; + short *dst_short; + + int *src_int; + int *dst_int; + + long *src_long; + long *dst_long; + + long long *src_longlong; + long long *dst_longlong; + + unsigned char *src_uchar; + unsigned char *dst_uchar; + + unsigned short *src_ushort; + unsigned short *dst_ushort; + + unsigned int *src_uint; + unsigned int *dst_uint; + + unsigned long *src_ulong; + unsigned long *dst_ulong; + + unsigned long long *src_ulonglong; + unsigned long long *dst_ulonglong; + + int8_t *src_int8; + int8_t *dst_int8; + + int16_t *src_int16; + int16_t *dst_int16; + + int32_t *src_int32; + int32_t *dst_int32; + + int64_t *src_int64; + int64_t *dst_int64; + + uint8_t *src_uint8; + uint8_t *dst_uint8; + + uint16_t *src_uint16; + uint16_t *dst_uint16; + + uint32_t *src_uint32; + uint32_t *dst_uint32; + + uint64_t *src_uint64; + uint64_t *dst_uint64; + + size_t *src_size; + size_t *dst_size; + + ptrdiff_t *src_ptrdiff; + ptrdiff_t *dst_ptrdiff; + + int target_pe = 0; + const int count = 10; + cudaStream_t stream = 0; + + // nvshmem_TYPENAME_put + // ishmem_TYPENAME_put + // CHECK: ishmem_put(dst_float, src_float, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_double, src_double, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_char, src_char, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_schar, src_schar, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_short, src_short, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_int, src_int, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_long, src_long, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_longlong, src_longlong, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_uchar, src_uchar, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_ushort, src_ushort, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_uint, src_uint, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_ulong, src_ulong, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_ulonglong, src_ulonglong, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_int8, src_int8, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_int16, src_int16, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_int32, src_int32, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_int64, src_int64, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_uint8, src_uint8, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_uint16, src_uint16, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_uint32, src_uint32, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_uint64, src_uint64, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_size, src_size, count, target_pe); + // CHECK-NEXT: ishmem_put(dst_ptrdiff, src_ptrdiff, count, target_pe); + nvshmem_float_put(dst_float, src_float, count, target_pe); + nvshmem_double_put(dst_double, src_double, count, target_pe); + nvshmem_char_put(dst_char, src_char, count, target_pe); + nvshmem_schar_put(dst_schar, src_schar, count, target_pe); + nvshmem_short_put(dst_short, src_short, count, target_pe); + nvshmem_int_put(dst_int, src_int, count, target_pe); + nvshmem_long_put(dst_long, src_long, count, target_pe); + nvshmem_longlong_put(dst_longlong, src_longlong, count, target_pe); + nvshmem_uchar_put(dst_uchar, src_uchar, count, target_pe); + nvshmem_ushort_put(dst_ushort, src_ushort, count, target_pe); + nvshmem_uint_put(dst_uint, src_uint, count, target_pe); + nvshmem_ulong_put(dst_ulong, src_ulong, count, target_pe); + nvshmem_ulonglong_put(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmem_int8_put(dst_int8, src_int8, count, target_pe); + nvshmem_int16_put(dst_int16, src_int16, count, target_pe); + nvshmem_int32_put(dst_int32, src_int32, count, target_pe); + nvshmem_int64_put(dst_int64, src_int64, count, target_pe); + nvshmem_uint8_put(dst_uint8, src_uint8, count, target_pe); + nvshmem_uint16_put(dst_uint16, src_uint16, count, target_pe); + nvshmem_uint32_put(dst_uint32, src_uint32, count, target_pe); + nvshmem_uint64_put(dst_uint64, src_uint64, count, target_pe); + nvshmem_size_put(dst_size, src_size, count, target_pe); + nvshmem_ptrdiff_put(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_put_on_stream + // ishmemx_TYPENAME_put_on_queue + // CHECK: ishmemx_put_on_queue(dst_float, src_float, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_double, src_double, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_char, src_char, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_schar, src_schar, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_short, src_short, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_int, src_int, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_long, src_long, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_longlong, src_longlong, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_uchar, src_uchar, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_ushort, src_ushort, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_uint, src_uint, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_ulong, src_ulong, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_ulonglong, src_ulonglong, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_int8, src_int8, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_int16, src_int16, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_int32, src_int32, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_int64, src_int64, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_uint8, src_uint8, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_uint16, src_uint16, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_uint32, src_uint32, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_uint64, src_uint64, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_size, src_size, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_on_queue(dst_ptrdiff, src_ptrdiff, count, target_pe, *stream); + nvshmemx_float_put_on_stream(dst_float, src_float, count, target_pe, stream); + nvshmemx_double_put_on_stream(dst_double, src_double, count, target_pe, stream); + nvshmemx_char_put_on_stream(dst_char, src_char, count, target_pe, stream); + nvshmemx_schar_put_on_stream(dst_schar, src_schar, count, target_pe, stream); + nvshmemx_short_put_on_stream(dst_short, src_short, count, target_pe, stream); + nvshmemx_int_put_on_stream(dst_int, src_int, count, target_pe, stream); + nvshmemx_long_put_on_stream(dst_long, src_long, count, target_pe, stream); + nvshmemx_longlong_put_on_stream(dst_longlong, src_longlong, count, target_pe, stream); + nvshmemx_uchar_put_on_stream(dst_uchar, src_uchar, count, target_pe, stream); + nvshmemx_ushort_put_on_stream(dst_ushort, src_ushort, count, target_pe, stream); + nvshmemx_uint_put_on_stream(dst_uint, src_uint, count, target_pe, stream); + nvshmemx_ulong_put_on_stream(dst_ulong, src_ulong, count, target_pe, stream); + nvshmemx_ulonglong_put_on_stream(dst_ulonglong, src_ulonglong, count, target_pe, stream); + nvshmemx_int8_put_on_stream(dst_int8, src_int8, count, target_pe, stream); + nvshmemx_int16_put_on_stream(dst_int16, src_int16, count, target_pe, stream); + nvshmemx_int32_put_on_stream(dst_int32, src_int32, count, target_pe, stream); + nvshmemx_int64_put_on_stream(dst_int64, src_int64, count, target_pe, stream); + nvshmemx_uint8_put_on_stream(dst_uint8, src_uint8, count, target_pe, stream); + nvshmemx_uint16_put_on_stream(dst_uint16, src_uint16, count, target_pe, stream); + nvshmemx_uint32_put_on_stream(dst_uint32, src_uint32, count, target_pe, stream); + nvshmemx_uint64_put_on_stream(dst_uint64, src_uint64, count, target_pe, stream); + nvshmemx_size_put_on_stream(dst_size, src_size, count, target_pe, stream); + nvshmemx_ptrdiff_put_on_stream(dst_ptrdiff, src_ptrdiff, count, target_pe, stream); + + // nvshmem_putSIZE + // ishmem_putSIZE + // CHECK: ishmem_put8(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put16(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put32(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put64(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put128(dst_void, src_void, count, target_pe); + nvshmem_put8(dst_void, src_void, count, target_pe); + nvshmem_put16(dst_void, src_void, count, target_pe); + nvshmem_put32(dst_void, src_void, count, target_pe); + nvshmem_put64(dst_void, src_void, count, target_pe); + nvshmem_put128(dst_void, src_void, count, target_pe); + + // nvshmemx_putSIZE_on_stream + // ishmemx_putSIZE_on_queue + // CHECK: ishmemx_put8_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put16_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put32_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put64_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put128_on_queue(dst_void, src_void, count, target_pe, *stream); + nvshmemx_put8_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_put16_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_put32_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_put64_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_put128_on_stream(dst_void, src_void, count, target_pe, stream); + + // nvshmem_TYPENAME_iput + // ishmem_TYPENAME_iput + // CHECK: ishmem_iput(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_float_iput(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_double_iput(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_char_iput(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_schar_iput(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_short_iput(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int_iput(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_long_iput(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_longlong_iput(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uchar_iput(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ushort_iput(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint_iput(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ulong_iput(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ulonglong_iput(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int8_iput(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int16_iput(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int32_iput(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int64_iput(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint8_iput(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint16_iput(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint32_iput(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint64_iput(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_size_iput(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ptrdiff_iput(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_iput_on_stream + // ishmemx_TYPENAME_iput_on_queue + // CHECK: ishmemx_iput_on_queue(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput_on_queue(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + nvshmemx_float_iput_on_stream(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_double_iput_on_stream(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_char_iput_on_stream(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_schar_iput_on_stream(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_short_iput_on_stream(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_int_iput_on_stream(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_long_iput_on_stream(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_longlong_iput_on_stream(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_uchar_iput_on_stream(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_ushort_iput_on_stream(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_uint_iput_on_stream(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_ulong_iput_on_stream(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_ulonglong_iput_on_stream(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_int8_iput_on_stream(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_int16_iput_on_stream(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_int32_iput_on_stream(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_int64_iput_on_stream(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_uint8_iput_on_stream(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_uint16_iput_on_stream(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_uint32_iput_on_stream(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_uint64_iput_on_stream(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_size_iput_on_stream(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_ptrdiff_iput_on_stream(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + + // nvshmem_iputSIZE + // ishmem_iputSIZE + // CHECK: ishmem_iput8(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput16(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput32(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput64(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iput128(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iput8(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iput16(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iput32(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iput64(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iput128(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmemx_iputSIZE_on_stream + // ishmemx_iputSIZE_on_queue + // CHECK: ishmemx_iput8_on_queue(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput16_on_queue(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput32_on_queue(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput64_on_queue(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iput128_on_queue(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + nvshmemx_iput8_on_stream(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_iput16_on_stream(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_iput32_on_stream(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_iput64_on_stream(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_iput128_on_stream(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + + // nvshmem_putmem + // ishmem_putmem + // CHECK: ishmem_putmem(dst_void, src_void, count, target_pe); + nvshmem_putmem(dst_void, src_void, count, target_pe); + + // nvshmemx_putmem_on_stream + // ishmemx_putmem_on_queue + // CHECK: ishmemx_putmem_on_queue(dst_void, src_void, count, target_pe, *stream); + nvshmemx_putmem_on_stream(dst_void, src_void, count, target_pe, stream); + + // nvshmem_TYPENAME_p + // ishmem_TYPENAME_p + // CHECK: ishmem_p(dst_float, *src_float, target_pe); + // CHECK-NEXT: ishmem_p(dst_double, *src_double, target_pe); + // CHECK-NEXT: ishmem_p(dst_char, *src_char, target_pe); + // CHECK-NEXT: ishmem_p(dst_schar, *src_schar, target_pe); + // CHECK-NEXT: ishmem_p(dst_short, *src_short, target_pe); + // CHECK-NEXT: ishmem_p(dst_int, *src_int, target_pe); + // CHECK-NEXT: ishmem_p(dst_long, *src_long, target_pe); + // CHECK-NEXT: ishmem_p(dst_longlong, *src_longlong, target_pe); + // CHECK-NEXT: ishmem_p(dst_uchar, *src_uchar, target_pe); + // CHECK-NEXT: ishmem_p(dst_ushort, *src_ushort, target_pe); + // CHECK-NEXT: ishmem_p(dst_uint, *src_uint, target_pe); + // CHECK-NEXT: ishmem_p(dst_ulong, *src_ulong, target_pe); + // CHECK-NEXT: ishmem_p(dst_ulonglong, *src_ulonglong, target_pe); + // CHECK-NEXT: ishmem_p(dst_int8, *src_int8, target_pe); + // CHECK-NEXT: ishmem_p(dst_int16, *src_int16, target_pe); + // CHECK-NEXT: ishmem_p(dst_int32, *src_int32, target_pe); + // CHECK-NEXT: ishmem_p(dst_int64, *src_int64, target_pe); + // CHECK-NEXT: ishmem_p(dst_uint8, *src_uint8, target_pe); + // CHECK-NEXT: ishmem_p(dst_uint16, *src_uint16, target_pe); + // CHECK-NEXT: ishmem_p(dst_uint32, *src_uint32, target_pe); + // CHECK-NEXT: ishmem_p(dst_uint64, *src_uint64, target_pe); + // CHECK-NEXT: ishmem_p(dst_size, *src_size, target_pe); + // CHECK-NEXT: ishmem_p(dst_ptrdiff, *src_ptrdiff, target_pe); + nvshmem_float_p(dst_float, *src_float, target_pe); + nvshmem_double_p(dst_double, *src_double, target_pe); + nvshmem_char_p(dst_char, *src_char, target_pe); + nvshmem_schar_p(dst_schar, *src_schar, target_pe); + nvshmem_short_p(dst_short, *src_short, target_pe); + nvshmem_int_p(dst_int, *src_int, target_pe); + nvshmem_long_p(dst_long, *src_long, target_pe); + nvshmem_longlong_p(dst_longlong, *src_longlong, target_pe); + nvshmem_uchar_p(dst_uchar, *src_uchar, target_pe); + nvshmem_ushort_p(dst_ushort, *src_ushort, target_pe); + nvshmem_uint_p(dst_uint, *src_uint, target_pe); + nvshmem_ulong_p(dst_ulong, *src_ulong, target_pe); + nvshmem_ulonglong_p(dst_ulonglong, *src_ulonglong, target_pe); + nvshmem_int8_p(dst_int8, *src_int8, target_pe); + nvshmem_int16_p(dst_int16, *src_int16, target_pe); + nvshmem_int32_p(dst_int32, *src_int32, target_pe); + nvshmem_int64_p(dst_int64, *src_int64, target_pe); + nvshmem_uint8_p(dst_uint8, *src_uint8, target_pe); + nvshmem_uint16_p(dst_uint16, *src_uint16, target_pe); + nvshmem_uint32_p(dst_uint32, *src_uint32, target_pe); + nvshmem_uint64_p(dst_uint64, *src_uint64, target_pe); + nvshmem_size_p(dst_size, *src_size, target_pe); + nvshmem_ptrdiff_p(dst_ptrdiff, *src_ptrdiff, target_pe); + + // nvshmem_TYPENAME_get + // ishmem_TYPENAME_get + // CHECK: ishmem_get(dst_float, src_float, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_double, src_double, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_char, src_char, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_schar, src_schar, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_short, src_short, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_int, src_int, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_long, src_long, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_longlong, src_longlong, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_uchar, src_uchar, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_ushort, src_ushort, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_uint, src_uint, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_ulong, src_ulong, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_ulonglong, src_ulonglong, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_int8, src_int8, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_int16, src_int16, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_int32, src_int32, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_int64, src_int64, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_uint8, src_uint8, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_uint16, src_uint16, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_uint32, src_uint32, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_uint64, src_uint64, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_size, src_size, count, target_pe); + // CHECK-NEXT: ishmem_get(dst_ptrdiff, src_ptrdiff, count, target_pe); + nvshmem_float_get(dst_float, src_float, count, target_pe); + nvshmem_double_get(dst_double, src_double, count, target_pe); + nvshmem_char_get(dst_char, src_char, count, target_pe); + nvshmem_schar_get(dst_schar, src_schar, count, target_pe); + nvshmem_short_get(dst_short, src_short, count, target_pe); + nvshmem_int_get(dst_int, src_int, count, target_pe); + nvshmem_long_get(dst_long, src_long, count, target_pe); + nvshmem_longlong_get(dst_longlong, src_longlong, count, target_pe); + nvshmem_uchar_get(dst_uchar, src_uchar, count, target_pe); + nvshmem_ushort_get(dst_ushort, src_ushort, count, target_pe); + nvshmem_uint_get(dst_uint, src_uint, count, target_pe); + nvshmem_ulong_get(dst_ulong, src_ulong, count, target_pe); + nvshmem_ulonglong_get(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmem_int8_get(dst_int8, src_int8, count, target_pe); + nvshmem_int16_get(dst_int16, src_int16, count, target_pe); + nvshmem_int32_get(dst_int32, src_int32, count, target_pe); + nvshmem_int64_get(dst_int64, src_int64, count, target_pe); + nvshmem_uint8_get(dst_uint8, src_uint8, count, target_pe); + nvshmem_uint16_get(dst_uint16, src_uint16, count, target_pe); + nvshmem_uint32_get(dst_uint32, src_uint32, count, target_pe); + nvshmem_uint64_get(dst_uint64, src_uint64, count, target_pe); + nvshmem_size_get(dst_size, src_size, count, target_pe); + nvshmem_ptrdiff_get(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_get_on_stream + // ishmemx_TYPENAME_get_on_queue + // CHECK: ishmemx_get_on_queue(dst_float, src_float, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_double, src_double, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_char, src_char, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_schar, src_schar, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_short, src_short, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_int, src_int, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_long, src_long, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_longlong, src_longlong, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_uchar, src_uchar, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_ushort, src_ushort, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_uint, src_uint, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_ulong, src_ulong, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_ulonglong, src_ulonglong, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_int8, src_int8, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_int16, src_int16, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_int32, src_int32, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_int64, src_int64, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_uint8, src_uint8, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_uint16, src_uint16, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_uint32, src_uint32, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_uint64, src_uint64, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_size, src_size, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_on_queue(dst_ptrdiff, src_ptrdiff, count, target_pe, *stream); + nvshmemx_float_get_on_stream(dst_float, src_float, count, target_pe, stream); + nvshmemx_double_get_on_stream(dst_double, src_double, count, target_pe, stream); + nvshmemx_char_get_on_stream(dst_char, src_char, count, target_pe, stream); + nvshmemx_schar_get_on_stream(dst_schar, src_schar, count, target_pe, stream); + nvshmemx_short_get_on_stream(dst_short, src_short, count, target_pe, stream); + nvshmemx_int_get_on_stream(dst_int, src_int, count, target_pe, stream); + nvshmemx_long_get_on_stream(dst_long, src_long, count, target_pe, stream); + nvshmemx_longlong_get_on_stream(dst_longlong, src_longlong, count, target_pe, stream); + nvshmemx_uchar_get_on_stream(dst_uchar, src_uchar, count, target_pe, stream); + nvshmemx_ushort_get_on_stream(dst_ushort, src_ushort, count, target_pe, stream); + nvshmemx_uint_get_on_stream(dst_uint, src_uint, count, target_pe, stream); + nvshmemx_ulong_get_on_stream(dst_ulong, src_ulong, count, target_pe, stream); + nvshmemx_ulonglong_get_on_stream(dst_ulonglong, src_ulonglong, count, target_pe, stream); + nvshmemx_int8_get_on_stream(dst_int8, src_int8, count, target_pe, stream); + nvshmemx_int16_get_on_stream(dst_int16, src_int16, count, target_pe, stream); + nvshmemx_int32_get_on_stream(dst_int32, src_int32, count, target_pe, stream); + nvshmemx_int64_get_on_stream(dst_int64, src_int64, count, target_pe, stream); + nvshmemx_uint8_get_on_stream(dst_uint8, src_uint8, count, target_pe, stream); + nvshmemx_uint16_get_on_stream(dst_uint16, src_uint16, count, target_pe, stream); + nvshmemx_uint32_get_on_stream(dst_uint32, src_uint32, count, target_pe, stream); + nvshmemx_uint64_get_on_stream(dst_uint64, src_uint64, count, target_pe, stream); + nvshmemx_size_get_on_stream(dst_size, src_size, count, target_pe, stream); + nvshmemx_ptrdiff_get_on_stream(dst_ptrdiff, src_ptrdiff, count, target_pe, stream); + + // nvshmem_getSIZE + // ishmem_getSIZE + // CHECK: ishmem_get8(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get16(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get32(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get64(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get128(dst_void, src_void, count, target_pe); + nvshmem_get8(dst_void, src_void, count, target_pe); + nvshmem_get16(dst_void, src_void, count, target_pe); + nvshmem_get32(dst_void, src_void, count, target_pe); + nvshmem_get64(dst_void, src_void, count, target_pe); + nvshmem_get128(dst_void, src_void, count, target_pe); + + // nvshmemx_getSIZE_on_stream + // ishmemx_getSIZE_on_queue + // CHECK: ishmemx_get8_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get16_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get32_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get64_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get128_on_queue(dst_void, src_void, count, target_pe, *stream); + nvshmemx_get8_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_get16_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_get32_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_get64_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_get128_on_stream(dst_void, src_void, count, target_pe, stream); + + // nvshmem_TYPENAME_iget + // ishmem_TYPENAME_iget + // CHECK: ishmem_iget(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_float_iget(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_double_iget(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_char_iget(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_schar_iget(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_short_iget(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int_iget(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_long_iget(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_longlong_iget(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uchar_iget(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ushort_iget(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint_iget(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ulong_iget(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ulonglong_iget(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int8_iget(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int16_iget(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int32_iget(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_int64_iget(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint8_iget(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint16_iget(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint32_iget(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_uint64_iget(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_size_iget(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_ptrdiff_iget(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_iget_on_stream + // ishmemx_TYPENAME_iget_on_queue + // CHECK: ishmemx_iget_on_queue(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget_on_queue(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + nvshmemx_float_iget_on_stream(dst_float, src_float, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_double_iget_on_stream(dst_double, src_double, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_char_iget_on_stream(dst_char, src_char, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_schar_iget_on_stream(dst_schar, src_schar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_short_iget_on_stream(dst_short, src_short, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_int_iget_on_stream(dst_int, src_int, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_long_iget_on_stream(dst_long, src_long, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_longlong_iget_on_stream(dst_longlong, src_longlong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_uchar_iget_on_stream(dst_uchar, src_uchar, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_ushort_iget_on_stream(dst_ushort, src_ushort, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_uint_iget_on_stream(dst_uint, src_uint, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_ulong_iget_on_stream(dst_ulong, src_ulong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_ulonglong_iget_on_stream(dst_ulonglong, src_ulonglong, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_int8_iget_on_stream(dst_int8, src_int8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_int16_iget_on_stream(dst_int16, src_int16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_int32_iget_on_stream(dst_int32, src_int32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_int64_iget_on_stream(dst_int64, src_int64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_uint8_iget_on_stream(dst_uint8, src_uint8, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_uint16_iget_on_stream(dst_uint16, src_uint16, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_uint32_iget_on_stream(dst_uint32, src_uint32, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_uint64_iget_on_stream(dst_uint64, src_uint64, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_size_iget_on_stream(dst_size, src_size, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_ptrdiff_iget_on_stream(dst_ptrdiff, src_ptrdiff, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + + // nvshmem_igetSIZE + // ishmem_igetSIZE + // CHECK: ishmem_iget8(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget16(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget32(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget64(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + // CHECK-NEXT: ishmem_iget128(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iget8(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iget16(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iget32(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iget64(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + nvshmem_iget128(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe); + + // nvshmemx_igetSIZE_on_stream + // ishmemx_igetSIZE_on_queue + // CHECK: ishmemx_iget8_on_queue(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget16_on_queue(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget32_on_queue(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget64_on_queue(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_iget128_on_queue(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, *stream); + nvshmemx_iget8_on_stream(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_iget16_on_stream(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_iget32_on_stream(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_iget64_on_stream(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + nvshmemx_iget128_on_stream(dst_void, src_void, *dst_ptrdiff, *src_ptrdiff, count, target_pe, stream); + + // nvshmem_getmem + // ishmem_getmem + // CHECK: ishmem_getmem(dst_void, src_void, count, target_pe); + nvshmem_getmem(dst_void, src_void, count, target_pe); + + // nvshmemx_getmem_on_stream + // ishmemx_getmem_on_queue + // CHECK: ishmemx_getmem_on_queue(dst_void, src_void, count, target_pe, *stream); + nvshmemx_getmem_on_stream(dst_void, src_void, count, target_pe, stream); + + // nvshmem_TYPENAME_g + // ishmem_TYPENAME_g + // CHECK: *dst_float = ishmem_g(src_float, target_pe); + // CHECK-NEXT: *dst_double = ishmem_g(src_double, target_pe); + // CHECK-NEXT: *dst_char = ishmem_g(src_char, target_pe); + // CHECK-NEXT: *dst_schar = ishmem_g(src_schar, target_pe); + // CHECK-NEXT: *dst_short = ishmem_g(src_short, target_pe); + // CHECK-NEXT: *dst_int = ishmem_g(src_int, target_pe); + // CHECK-NEXT: *dst_long = ishmem_g(src_long, target_pe); + // CHECK-NEXT: *dst_longlong = ishmem_g(src_longlong, target_pe); + // CHECK-NEXT: *dst_uchar = ishmem_g(src_uchar, target_pe); + // CHECK-NEXT: *dst_ushort = ishmem_g(src_ushort, target_pe); + // CHECK-NEXT: *dst_uint = ishmem_g(src_uint, target_pe); + // CHECK-NEXT: *dst_ulong = ishmem_g(src_ulong, target_pe); + // CHECK-NEXT: *dst_ulonglong = ishmem_g(src_ulonglong, target_pe); + // CHECK-NEXT: *dst_int8 = ishmem_g(src_int8, target_pe); + // CHECK-NEXT: *dst_int16 = ishmem_g(src_int16, target_pe); + // CHECK-NEXT: *dst_int32 = ishmem_g(src_int32, target_pe); + // CHECK-NEXT: *dst_int64 = ishmem_g(src_int64, target_pe); + // CHECK-NEXT: *dst_uint8 = ishmem_g(src_uint8, target_pe); + // CHECK-NEXT: *dst_uint16 = ishmem_g(src_uint16, target_pe); + // CHECK-NEXT: *dst_uint32 = ishmem_g(src_uint32, target_pe); + // CHECK-NEXT: *dst_uint64 = ishmem_g(src_uint64, target_pe); + // CHECK-NEXT: *dst_size = ishmem_g(src_size, target_pe); + // CHECK-NEXT: *dst_ptrdiff = ishmem_g(src_ptrdiff, target_pe); + *dst_float = nvshmem_float_g(src_float, target_pe); + *dst_double = nvshmem_double_g(src_double, target_pe); + *dst_char = nvshmem_char_g(src_char, target_pe); + *dst_schar = nvshmem_schar_g(src_schar, target_pe); + *dst_short = nvshmem_short_g(src_short, target_pe); + *dst_int = nvshmem_int_g(src_int, target_pe); + *dst_long = nvshmem_long_g(src_long, target_pe); + *dst_longlong = nvshmem_longlong_g(src_longlong, target_pe); + *dst_uchar = nvshmem_uchar_g(src_uchar, target_pe); + *dst_ushort = nvshmem_ushort_g(src_ushort, target_pe); + *dst_uint = nvshmem_uint_g(src_uint, target_pe); + *dst_ulong = nvshmem_ulong_g(src_ulong, target_pe); + *dst_ulonglong = nvshmem_ulonglong_g(src_ulonglong, target_pe); + *dst_int8 = nvshmem_int8_g(src_int8, target_pe); + *dst_int16 = nvshmem_int16_g(src_int16, target_pe); + *dst_int32 = nvshmem_int32_g(src_int32, target_pe); + *dst_int64 = nvshmem_int64_g(src_int64, target_pe); + *dst_uint8 = nvshmem_uint8_g(src_uint8, target_pe); + *dst_uint16 = nvshmem_uint16_g(src_uint16, target_pe); + *dst_uint32 = nvshmem_uint32_g(src_uint32, target_pe); + *dst_uint64 = nvshmem_uint64_g(src_uint64, target_pe); + *dst_size = nvshmem_size_g(src_size, target_pe); + *dst_ptrdiff = nvshmem_ptrdiff_g(src_ptrdiff, target_pe); + + return 0; +} diff --git a/clang/test/dpct/nvshmem/rma_nbi.cu b/clang/test/dpct/nvshmem/rma_nbi.cu index 0cd04a507878..9a5a74d6c0f7 100644 --- a/clang/test/dpct/nvshmem/rma_nbi.cu +++ b/clang/test/dpct/nvshmem/rma_nbi.cu @@ -11,23 +11,827 @@ __host__ __device__ void test(int target_pe) { const void *src_void; void *dst_void; + // Standard RMA types + float *src_float; + float *dst_float; + + double *src_double; + double *dst_double; + + char *src_char; + char *dst_char; + + signed char *src_schar; + signed char *dst_schar; + + short *src_short; + short *dst_short; + + int *src_int; + int *dst_int; + + long *src_long; + long *dst_long; + + long long *src_longlong; + long long *dst_longlong; + + unsigned char *src_uchar; + unsigned char *dst_uchar; + + unsigned short *src_ushort; + unsigned short *dst_ushort; + + unsigned int *src_uint; + unsigned int *dst_uint; + + unsigned long *src_ulong; + unsigned long *dst_ulong; + + unsigned long long *src_ulonglong; + unsigned long long *dst_ulonglong; + + int8_t *src_int8; + int8_t *dst_int8; + + int16_t *src_int16; + int16_t *dst_int16; + + int32_t *src_int32; + int32_t *dst_int32; + + int64_t *src_int64; + int64_t *dst_int64; + + uint8_t *src_uint8; + uint8_t *dst_uint8; + + uint16_t *src_uint16; + uint16_t *dst_uint16; + + uint32_t *src_uint32; + uint32_t *dst_uint32; + + uint64_t *src_uint64; + uint64_t *dst_uint64; + + size_t *src_size; + size_t *dst_size; + + ptrdiff_t *src_ptrdiff; + ptrdiff_t *dst_ptrdiff; + const int count = 10; + // nvshmem_TYPENAME_put_nbi + // ishmem_TYPENAME_put_nbi + // CHECK: ishmem_put_nbi(dst_float, src_float, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_double, src_double, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_char, src_char, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_schar, src_schar, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_short, src_short, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_int, src_int, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_long, src_long, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_longlong, src_longlong, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_uchar, src_uchar, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_ushort, src_ushort, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_uint, src_uint, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_ulong, src_ulong, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_ulonglong, src_ulonglong, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_int8, src_int8, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_int16, src_int16, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_int32, src_int32, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_int64, src_int64, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_uint8, src_uint8, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_uint16, src_uint16, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_uint32, src_uint32, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_uint64, src_uint64, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_size, src_size, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_ptrdiff, src_ptrdiff, count, target_pe); + nvshmem_float_put_nbi(dst_float, src_float, count, target_pe); + nvshmem_double_put_nbi(dst_double, src_double, count, target_pe); + nvshmem_char_put_nbi(dst_char, src_char, count, target_pe); + nvshmem_schar_put_nbi(dst_schar, src_schar, count, target_pe); + nvshmem_short_put_nbi(dst_short, src_short, count, target_pe); + nvshmem_int_put_nbi(dst_int, src_int, count, target_pe); + nvshmem_long_put_nbi(dst_long, src_long, count, target_pe); + nvshmem_longlong_put_nbi(dst_longlong, src_longlong, count, target_pe); + nvshmem_uchar_put_nbi(dst_uchar, src_uchar, count, target_pe); + nvshmem_ushort_put_nbi(dst_ushort, src_ushort, count, target_pe); + nvshmem_uint_put_nbi(dst_uint, src_uint, count, target_pe); + nvshmem_ulong_put_nbi(dst_ulong, src_ulong, count, target_pe); + nvshmem_ulonglong_put_nbi(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmem_int8_put_nbi(dst_int8, src_int8, count, target_pe); + nvshmem_int16_put_nbi(dst_int16, src_int16, count, target_pe); + nvshmem_int32_put_nbi(dst_int32, src_int32, count, target_pe); + nvshmem_int64_put_nbi(dst_int64, src_int64, count, target_pe); + nvshmem_uint8_put_nbi(dst_uint8, src_uint8, count, target_pe); + nvshmem_uint16_put_nbi(dst_uint16, src_uint16, count, target_pe); + nvshmem_uint32_put_nbi(dst_uint32, src_uint32, count, target_pe); + nvshmem_uint64_put_nbi(dst_uint64, src_uint64, count, target_pe); + nvshmem_size_put_nbi(dst_size, src_size, count, target_pe); + nvshmem_ptrdiff_put_nbi(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_put_nbi_block + // ishmemx_TYPENAME_put_nbi_work_group + // CHECK: ishmemx_put_nbi_work_group(dst_float, src_float, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_double, src_double, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_char, src_char, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_schar, src_schar, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_short, src_short, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_int, src_int, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_long, src_long, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_longlong, src_longlong, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_uchar, src_uchar, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_ushort, src_ushort, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_uint, src_uint, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_ulong, src_ulong, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_ulonglong, src_ulonglong, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_int8, src_int8, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_int16, src_int16, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_int32, src_int32, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_int64, src_int64, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_uint8, src_uint8, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_uint16, src_uint16, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_uint32, src_uint32, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_uint64, src_uint64, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_size, src_size, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_ptrdiff, src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_float_put_nbi_block(dst_float, src_float, count, target_pe); + nvshmemx_double_put_nbi_block(dst_double, src_double, count, target_pe); + nvshmemx_char_put_nbi_block(dst_char, src_char, count, target_pe); + nvshmemx_schar_put_nbi_block(dst_schar, src_schar, count, target_pe); + nvshmemx_short_put_nbi_block(dst_short, src_short, count, target_pe); + nvshmemx_int_put_nbi_block(dst_int, src_int, count, target_pe); + nvshmemx_long_put_nbi_block(dst_long, src_long, count, target_pe); + nvshmemx_longlong_put_nbi_block(dst_longlong, src_longlong, count, target_pe); + nvshmemx_uchar_put_nbi_block(dst_uchar, src_uchar, count, target_pe); + nvshmemx_ushort_put_nbi_block(dst_ushort, src_ushort, count, target_pe); + nvshmemx_uint_put_nbi_block(dst_uint, src_uint, count, target_pe); + nvshmemx_ulong_put_nbi_block(dst_ulong, src_ulong, count, target_pe); + nvshmemx_ulonglong_put_nbi_block(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmemx_int8_put_nbi_block(dst_int8, src_int8, count, target_pe); + nvshmemx_int16_put_nbi_block(dst_int16, src_int16, count, target_pe); + nvshmemx_int32_put_nbi_block(dst_int32, src_int32, count, target_pe); + nvshmemx_int64_put_nbi_block(dst_int64, src_int64, count, target_pe); + nvshmemx_uint8_put_nbi_block(dst_uint8, src_uint8, count, target_pe); + nvshmemx_uint16_put_nbi_block(dst_uint16, src_uint16, count, target_pe); + nvshmemx_uint32_put_nbi_block(dst_uint32, src_uint32, count, target_pe); + nvshmemx_uint64_put_nbi_block(dst_uint64, src_uint64, count, target_pe); + nvshmemx_size_put_nbi_block(dst_size, src_size, count, target_pe); + nvshmemx_ptrdiff_put_nbi_block(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_put_nbi_warp + // ishmemx_TYPENAME_put_nbi_work_group + // CHECK: ishmemx_put_nbi_work_group(dst_float, src_float, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_double, src_double, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_char, src_char, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_schar, src_schar, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_short, src_short, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_int, src_int, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_long, src_long, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_longlong, src_longlong, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_uchar, src_uchar, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_ushort, src_ushort, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_uint, src_uint, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_ulong, src_ulong, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_ulonglong, src_ulonglong, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_int8, src_int8, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_int16, src_int16, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_int32, src_int32, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_int64, src_int64, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_uint8, src_uint8, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_uint16, src_uint16, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_uint32, src_uint32, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_uint64, src_uint64, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_size, src_size, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put_nbi_work_group(dst_ptrdiff, src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_float_put_nbi_warp(dst_float, src_float, count, target_pe); + nvshmemx_double_put_nbi_warp(dst_double, src_double, count, target_pe); + nvshmemx_char_put_nbi_warp(dst_char, src_char, count, target_pe); + nvshmemx_schar_put_nbi_warp(dst_schar, src_schar, count, target_pe); + nvshmemx_short_put_nbi_warp(dst_short, src_short, count, target_pe); + nvshmemx_int_put_nbi_warp(dst_int, src_int, count, target_pe); + nvshmemx_long_put_nbi_warp(dst_long, src_long, count, target_pe); + nvshmemx_longlong_put_nbi_warp(dst_longlong, src_longlong, count, target_pe); + nvshmemx_uchar_put_nbi_warp(dst_uchar, src_uchar, count, target_pe); + nvshmemx_ushort_put_nbi_warp(dst_ushort, src_ushort, count, target_pe); + nvshmemx_uint_put_nbi_warp(dst_uint, src_uint, count, target_pe); + nvshmemx_ulong_put_nbi_warp(dst_ulong, src_ulong, count, target_pe); + nvshmemx_ulonglong_put_nbi_warp(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmemx_int8_put_nbi_warp(dst_int8, src_int8, count, target_pe); + nvshmemx_int16_put_nbi_warp(dst_int16, src_int16, count, target_pe); + nvshmemx_int32_put_nbi_warp(dst_int32, src_int32, count, target_pe); + nvshmemx_int64_put_nbi_warp(dst_int64, src_int64, count, target_pe); + nvshmemx_uint8_put_nbi_warp(dst_uint8, src_uint8, count, target_pe); + nvshmemx_uint16_put_nbi_warp(dst_uint16, src_uint16, count, target_pe); + nvshmemx_uint32_put_nbi_warp(dst_uint32, src_uint32, count, target_pe); + nvshmemx_uint64_put_nbi_warp(dst_uint64, src_uint64, count, target_pe); + nvshmemx_size_put_nbi_warp(dst_size, src_size, count, target_pe); + nvshmemx_ptrdiff_put_nbi_warp(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmem_putSIZE_nbi + // ishmem_putSIZE_nbi + // CHECK: ishmem_put8_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put16_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put32_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put64_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put128_nbi(dst_void, src_void, count, target_pe); + nvshmem_put8_nbi(dst_void, src_void, count, target_pe); + nvshmem_put16_nbi(dst_void, src_void, count, target_pe); + nvshmem_put32_nbi(dst_void, src_void, count, target_pe); + nvshmem_put64_nbi(dst_void, src_void, count, target_pe); + nvshmem_put128_nbi(dst_void, src_void, count, target_pe); + + // nvshmemx_putSIZE_nbi_block + // ishmemx_putSIZE_nbi_work_group + // CHECK: ishmemx_put8_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put16_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put32_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put64_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_put128_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_put8_nbi_block(dst_void, src_void, count, target_pe); + nvshmemx_put16_nbi_block(dst_void, src_void, count, target_pe); + nvshmemx_put32_nbi_block(dst_void, src_void, count, target_pe); + nvshmemx_put64_nbi_block(dst_void, src_void, count, target_pe); + nvshmemx_put128_nbi_block(dst_void, src_void, count, target_pe); + + // nvshmemx_putSIZE_nbi_warp + // ishmemx_putSIZE_nbi_work_group + // CHECK: ishmemx_put8_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put16_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put32_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put64_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_put128_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_put8_nbi_warp(dst_void, src_void, count, target_pe); + nvshmemx_put16_nbi_warp(dst_void, src_void, count, target_pe); + nvshmemx_put32_nbi_warp(dst_void, src_void, count, target_pe); + nvshmemx_put64_nbi_warp(dst_void, src_void, count, target_pe); + nvshmemx_put128_nbi_warp(dst_void, src_void, count, target_pe); + // nvshmem_putmem_nbi // ishmem_putmem_nbi // CHECK: ishmem_putmem_nbi(dst_void, src_void, count, target_pe); nvshmem_putmem_nbi(dst_void, src_void, count, target_pe); + + // nvshmemx_putmem_nbi_block + // ishmemx_putmem_nbi_work_group + // CHECK: ishmemx_putmem_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_putmem_nbi_block(dst_void, src_void, count, target_pe); + + // nvshmemx_putmem_nbi_warp + // ishmemx_putmem_nbi_work_group + // CHECK: ishmemx_putmem_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_putmem_nbi_warp(dst_void, src_void, count, target_pe); + + // nvshmem_TYPENAME_get_nbi + // ishmem_TYPENAME_get_nbi + // CHECK: ishmem_get_nbi(dst_float, src_float, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_double, src_double, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_char, src_char, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_schar, src_schar, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_short, src_short, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_int, src_int, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_long, src_long, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_longlong, src_longlong, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_uchar, src_uchar, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_ushort, src_ushort, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_uint, src_uint, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_ulong, src_ulong, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_ulonglong, src_ulonglong, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_int8, src_int8, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_int16, src_int16, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_int32, src_int32, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_int64, src_int64, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_uint8, src_uint8, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_uint16, src_uint16, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_uint32, src_uint32, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_uint64, src_uint64, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_size, src_size, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_ptrdiff, src_ptrdiff, count, target_pe); + nvshmem_float_get_nbi(dst_float, src_float, count, target_pe); + nvshmem_double_get_nbi(dst_double, src_double, count, target_pe); + nvshmem_char_get_nbi(dst_char, src_char, count, target_pe); + nvshmem_schar_get_nbi(dst_schar, src_schar, count, target_pe); + nvshmem_short_get_nbi(dst_short, src_short, count, target_pe); + nvshmem_int_get_nbi(dst_int, src_int, count, target_pe); + nvshmem_long_get_nbi(dst_long, src_long, count, target_pe); + nvshmem_longlong_get_nbi(dst_longlong, src_longlong, count, target_pe); + nvshmem_uchar_get_nbi(dst_uchar, src_uchar, count, target_pe); + nvshmem_ushort_get_nbi(dst_ushort, src_ushort, count, target_pe); + nvshmem_uint_get_nbi(dst_uint, src_uint, count, target_pe); + nvshmem_ulong_get_nbi(dst_ulong, src_ulong, count, target_pe); + nvshmem_ulonglong_get_nbi(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmem_int8_get_nbi(dst_int8, src_int8, count, target_pe); + nvshmem_int16_get_nbi(dst_int16, src_int16, count, target_pe); + nvshmem_int32_get_nbi(dst_int32, src_int32, count, target_pe); + nvshmem_int64_get_nbi(dst_int64, src_int64, count, target_pe); + nvshmem_uint8_get_nbi(dst_uint8, src_uint8, count, target_pe); + nvshmem_uint16_get_nbi(dst_uint16, src_uint16, count, target_pe); + nvshmem_uint32_get_nbi(dst_uint32, src_uint32, count, target_pe); + nvshmem_uint64_get_nbi(dst_uint64, src_uint64, count, target_pe); + nvshmem_size_get_nbi(dst_size, src_size, count, target_pe); + nvshmem_ptrdiff_get_nbi(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_get_nbi_block + // ishmemx_TYPENAME_get_nbi_work_group + // CHECK: ishmemx_get_nbi_work_group(dst_float, src_float, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_double, src_double, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_char, src_char, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_schar, src_schar, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_short, src_short, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_int, src_int, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_long, src_long, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_longlong, src_longlong, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_uchar, src_uchar, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_ushort, src_ushort, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_uint, src_uint, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_ulong, src_ulong, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_ulonglong, src_ulonglong, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_int8, src_int8, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_int16, src_int16, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_int32, src_int32, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_int64, src_int64, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_uint8, src_uint8, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_uint16, src_uint16, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_uint32, src_uint32, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_uint64, src_uint64, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_size, src_size, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_ptrdiff, src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_float_get_nbi_block(dst_float, src_float, count, target_pe); + nvshmemx_double_get_nbi_block(dst_double, src_double, count, target_pe); + nvshmemx_char_get_nbi_block(dst_char, src_char, count, target_pe); + nvshmemx_schar_get_nbi_block(dst_schar, src_schar, count, target_pe); + nvshmemx_short_get_nbi_block(dst_short, src_short, count, target_pe); + nvshmemx_int_get_nbi_block(dst_int, src_int, count, target_pe); + nvshmemx_long_get_nbi_block(dst_long, src_long, count, target_pe); + nvshmemx_longlong_get_nbi_block(dst_longlong, src_longlong, count, target_pe); + nvshmemx_uchar_get_nbi_block(dst_uchar, src_uchar, count, target_pe); + nvshmemx_ushort_get_nbi_block(dst_ushort, src_ushort, count, target_pe); + nvshmemx_uint_get_nbi_block(dst_uint, src_uint, count, target_pe); + nvshmemx_ulong_get_nbi_block(dst_ulong, src_ulong, count, target_pe); + nvshmemx_ulonglong_get_nbi_block(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmemx_int8_get_nbi_block(dst_int8, src_int8, count, target_pe); + nvshmemx_int16_get_nbi_block(dst_int16, src_int16, count, target_pe); + nvshmemx_int32_get_nbi_block(dst_int32, src_int32, count, target_pe); + nvshmemx_int64_get_nbi_block(dst_int64, src_int64, count, target_pe); + nvshmemx_uint8_get_nbi_block(dst_uint8, src_uint8, count, target_pe); + nvshmemx_uint16_get_nbi_block(dst_uint16, src_uint16, count, target_pe); + nvshmemx_uint32_get_nbi_block(dst_uint32, src_uint32, count, target_pe); + nvshmemx_uint64_get_nbi_block(dst_uint64, src_uint64, count, target_pe); + nvshmemx_size_get_nbi_block(dst_size, src_size, count, target_pe); + nvshmemx_ptrdiff_get_nbi_block(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_get_nbi_warp + // ishmemx_TYPENAME_get_nbi_work_group + // CHECK: ishmemx_get_nbi_work_group(dst_float, src_float, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_double, src_double, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_char, src_char, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_schar, src_schar, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_short, src_short, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_int, src_int, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_long, src_long, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_longlong, src_longlong, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_uchar, src_uchar, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_ushort, src_ushort, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_uint, src_uint, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_ulong, src_ulong, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_ulonglong, src_ulonglong, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_int8, src_int8, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_int16, src_int16, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_int32, src_int32, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_int64, src_int64, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_uint8, src_uint8, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_uint16, src_uint16, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_uint32, src_uint32, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_uint64, src_uint64, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_size, src_size, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get_nbi_work_group(dst_ptrdiff, src_ptrdiff, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_float_get_nbi_warp(dst_float, src_float, count, target_pe); + nvshmemx_double_get_nbi_warp(dst_double, src_double, count, target_pe); + nvshmemx_char_get_nbi_warp(dst_char, src_char, count, target_pe); + nvshmemx_schar_get_nbi_warp(dst_schar, src_schar, count, target_pe); + nvshmemx_short_get_nbi_warp(dst_short, src_short, count, target_pe); + nvshmemx_int_get_nbi_warp(dst_int, src_int, count, target_pe); + nvshmemx_long_get_nbi_warp(dst_long, src_long, count, target_pe); + nvshmemx_longlong_get_nbi_warp(dst_longlong, src_longlong, count, target_pe); + nvshmemx_uchar_get_nbi_warp(dst_uchar, src_uchar, count, target_pe); + nvshmemx_ushort_get_nbi_warp(dst_ushort, src_ushort, count, target_pe); + nvshmemx_uint_get_nbi_warp(dst_uint, src_uint, count, target_pe); + nvshmemx_ulong_get_nbi_warp(dst_ulong, src_ulong, count, target_pe); + nvshmemx_ulonglong_get_nbi_warp(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmemx_int8_get_nbi_warp(dst_int8, src_int8, count, target_pe); + nvshmemx_int16_get_nbi_warp(dst_int16, src_int16, count, target_pe); + nvshmemx_int32_get_nbi_warp(dst_int32, src_int32, count, target_pe); + nvshmemx_int64_get_nbi_warp(dst_int64, src_int64, count, target_pe); + nvshmemx_uint8_get_nbi_warp(dst_uint8, src_uint8, count, target_pe); + nvshmemx_uint16_get_nbi_warp(dst_uint16, src_uint16, count, target_pe); + nvshmemx_uint32_get_nbi_warp(dst_uint32, src_uint32, count, target_pe); + nvshmemx_uint64_get_nbi_warp(dst_uint64, src_uint64, count, target_pe); + nvshmemx_size_get_nbi_warp(dst_size, src_size, count, target_pe); + nvshmemx_ptrdiff_get_nbi_warp(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmem_getSIZE_nbi + // ishmem_getSIZE_nbi + // CHECK: ishmem_get8_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get16_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get32_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get64_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get128_nbi(dst_void, src_void, count, target_pe); + nvshmem_get8_nbi(dst_void, src_void, count, target_pe); + nvshmem_get16_nbi(dst_void, src_void, count, target_pe); + nvshmem_get32_nbi(dst_void, src_void, count, target_pe); + nvshmem_get64_nbi(dst_void, src_void, count, target_pe); + nvshmem_get128_nbi(dst_void, src_void, count, target_pe); + + // nvshmemx_getSIZE_nbi_block + // ishmemx_getSIZE_nbi_work_group + // CHECK: ishmemx_get8_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get16_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get32_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get64_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + // CHECK-NEXT: ishmemx_get128_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_get8_nbi_block(dst_void, src_void, count, target_pe); + nvshmemx_get16_nbi_block(dst_void, src_void, count, target_pe); + nvshmemx_get32_nbi_block(dst_void, src_void, count, target_pe); + nvshmemx_get64_nbi_block(dst_void, src_void, count, target_pe); + nvshmemx_get128_nbi_block(dst_void, src_void, count, target_pe); + + // nvshmemx_getSIZE_nbi_warp + // ishmemx_getSIZE_nbi_work_group + // CHECK: ishmemx_get8_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get16_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get32_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get64_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + // CHECK-NEXT: ishmemx_get128_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_get8_nbi_warp(dst_void, src_void, count, target_pe); + nvshmemx_get16_nbi_warp(dst_void, src_void, count, target_pe); + nvshmemx_get32_nbi_warp(dst_void, src_void, count, target_pe); + nvshmemx_get64_nbi_warp(dst_void, src_void, count, target_pe); + nvshmemx_get128_nbi_warp(dst_void, src_void, count, target_pe); + + // nvshmem_getmem_nbi + // ishmem_getmem_nbi + // CHECK: ishmem_getmem_nbi(dst_void, src_void, count, target_pe); + nvshmem_getmem_nbi(dst_void, src_void, count, target_pe); + + // nvshmemx_getmem_nbi_block + // ishmemx_getmem_nbi_work_group + // CHECK: ishmemx_getmem_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_work_group<3>()); + nvshmemx_getmem_nbi_block(dst_void, src_void, count, target_pe); + + // nvshmemx_getmem_nbi_warp + // ishmemx_getmem_nbi_work_group + // CHECK: ishmemx_getmem_nbi_work_group(dst_void, src_void, count, target_pe, sycl::ext::oneapi::this_work_item::get_sub_group()); + nvshmemx_getmem_nbi_warp(dst_void, src_void, count, target_pe); } + int main() { const void *src_void; void *dst_void; + // Standard RMA types + float *src_float; + float *dst_float; + + double *src_double; + double *dst_double; + + char *src_char; + char *dst_char; + + signed char *src_schar; + signed char *dst_schar; + + short *src_short; + short *dst_short; + + int *src_int; + int *dst_int; + + long *src_long; + long *dst_long; + + long long *src_longlong; + long long *dst_longlong; + + unsigned char *src_uchar; + unsigned char *dst_uchar; + + unsigned short *src_ushort; + unsigned short *dst_ushort; + + unsigned int *src_uint; + unsigned int *dst_uint; + + unsigned long *src_ulong; + unsigned long *dst_ulong; + + unsigned long long *src_ulonglong; + unsigned long long *dst_ulonglong; + + int8_t *src_int8; + int8_t *dst_int8; + + int16_t *src_int16; + int16_t *dst_int16; + + int32_t *src_int32; + int32_t *dst_int32; + + int64_t *src_int64; + int64_t *dst_int64; + + uint8_t *src_uint8; + uint8_t *dst_uint8; + + uint16_t *src_uint16; + uint16_t *dst_uint16; + + uint32_t *src_uint32; + uint32_t *dst_uint32; + + uint64_t *src_uint64; + uint64_t *dst_uint64; + + size_t *src_size; + size_t *dst_size; + + ptrdiff_t *src_ptrdiff; + ptrdiff_t *dst_ptrdiff; + int target_pe = 0; const int count = 10; + cudaStream_t stream = 0; + + // nvshmem_TYPENAME_put_nbi + // ishmem_TYPENAME_put_nbi + // CHECK: ishmem_put_nbi(dst_float, src_float, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_double, src_double, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_char, src_char, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_schar, src_schar, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_short, src_short, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_int, src_int, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_long, src_long, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_longlong, src_longlong, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_uchar, src_uchar, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_ushort, src_ushort, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_uint, src_uint, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_ulong, src_ulong, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_ulonglong, src_ulonglong, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_int8, src_int8, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_int16, src_int16, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_int32, src_int32, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_int64, src_int64, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_uint8, src_uint8, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_uint16, src_uint16, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_uint32, src_uint32, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_uint64, src_uint64, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_size, src_size, count, target_pe); + // CHECK-NEXT: ishmem_put_nbi(dst_ptrdiff, src_ptrdiff, count, target_pe); + nvshmem_float_put_nbi(dst_float, src_float, count, target_pe); + nvshmem_double_put_nbi(dst_double, src_double, count, target_pe); + nvshmem_char_put_nbi(dst_char, src_char, count, target_pe); + nvshmem_schar_put_nbi(dst_schar, src_schar, count, target_pe); + nvshmem_short_put_nbi(dst_short, src_short, count, target_pe); + nvshmem_int_put_nbi(dst_int, src_int, count, target_pe); + nvshmem_long_put_nbi(dst_long, src_long, count, target_pe); + nvshmem_longlong_put_nbi(dst_longlong, src_longlong, count, target_pe); + nvshmem_uchar_put_nbi(dst_uchar, src_uchar, count, target_pe); + nvshmem_ushort_put_nbi(dst_ushort, src_ushort, count, target_pe); + nvshmem_uint_put_nbi(dst_uint, src_uint, count, target_pe); + nvshmem_ulong_put_nbi(dst_ulong, src_ulong, count, target_pe); + nvshmem_ulonglong_put_nbi(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmem_int8_put_nbi(dst_int8, src_int8, count, target_pe); + nvshmem_int16_put_nbi(dst_int16, src_int16, count, target_pe); + nvshmem_int32_put_nbi(dst_int32, src_int32, count, target_pe); + nvshmem_int64_put_nbi(dst_int64, src_int64, count, target_pe); + nvshmem_uint8_put_nbi(dst_uint8, src_uint8, count, target_pe); + nvshmem_uint16_put_nbi(dst_uint16, src_uint16, count, target_pe); + nvshmem_uint32_put_nbi(dst_uint32, src_uint32, count, target_pe); + nvshmem_uint64_put_nbi(dst_uint64, src_uint64, count, target_pe); + nvshmem_size_put_nbi(dst_size, src_size, count, target_pe); + nvshmem_ptrdiff_put_nbi(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_put_nbi_on_stream + // ishmemx_TYPENAME_put_nbi_on_queue + // CHECK: ishmemx_put_nbi_on_queue(dst_float, src_float, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_double, src_double, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_char, src_char, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_schar, src_schar, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_short, src_short, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_int, src_int, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_long, src_long, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_longlong, src_longlong, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_uchar, src_uchar, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_ushort, src_ushort, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_uint, src_uint, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_ulong, src_ulong, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_ulonglong, src_ulonglong, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_int8, src_int8, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_int16, src_int16, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_int32, src_int32, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_int64, src_int64, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_uint8, src_uint8, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_uint16, src_uint16, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_uint32, src_uint32, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_uint64, src_uint64, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_size, src_size, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put_nbi_on_queue(dst_ptrdiff, src_ptrdiff, count, target_pe, *stream); + nvshmemx_float_put_nbi_on_stream(dst_float, src_float, count, target_pe, stream); + nvshmemx_double_put_nbi_on_stream(dst_double, src_double, count, target_pe, stream); + nvshmemx_char_put_nbi_on_stream(dst_char, src_char, count, target_pe, stream); + nvshmemx_schar_put_nbi_on_stream(dst_schar, src_schar, count, target_pe, stream); + nvshmemx_short_put_nbi_on_stream(dst_short, src_short, count, target_pe, stream); + nvshmemx_int_put_nbi_on_stream(dst_int, src_int, count, target_pe, stream); + nvshmemx_long_put_nbi_on_stream(dst_long, src_long, count, target_pe, stream); + nvshmemx_longlong_put_nbi_on_stream(dst_longlong, src_longlong, count, target_pe, stream); + nvshmemx_uchar_put_nbi_on_stream(dst_uchar, src_uchar, count, target_pe, stream); + nvshmemx_ushort_put_nbi_on_stream(dst_ushort, src_ushort, count, target_pe, stream); + nvshmemx_uint_put_nbi_on_stream(dst_uint, src_uint, count, target_pe, stream); + nvshmemx_ulong_put_nbi_on_stream(dst_ulong, src_ulong, count, target_pe, stream); + nvshmemx_ulonglong_put_nbi_on_stream(dst_ulonglong, src_ulonglong, count, target_pe, stream); + nvshmemx_int8_put_nbi_on_stream(dst_int8, src_int8, count, target_pe, stream); + nvshmemx_int16_put_nbi_on_stream(dst_int16, src_int16, count, target_pe, stream); + nvshmemx_int32_put_nbi_on_stream(dst_int32, src_int32, count, target_pe, stream); + nvshmemx_int64_put_nbi_on_stream(dst_int64, src_int64, count, target_pe, stream); + nvshmemx_uint8_put_nbi_on_stream(dst_uint8, src_uint8, count, target_pe, stream); + nvshmemx_uint16_put_nbi_on_stream(dst_uint16, src_uint16, count, target_pe, stream); + nvshmemx_uint32_put_nbi_on_stream(dst_uint32, src_uint32, count, target_pe, stream); + nvshmemx_uint64_put_nbi_on_stream(dst_uint64, src_uint64, count, target_pe, stream); + nvshmemx_size_put_nbi_on_stream(dst_size, src_size, count, target_pe, stream); + nvshmemx_ptrdiff_put_nbi_on_stream(dst_ptrdiff, src_ptrdiff, count, target_pe, stream); + + // nvshmem_putSIZE_nbi + // ishmem_putSIZE_nbi + // CHECK: ishmem_put8_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put16_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put32_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put64_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_put128_nbi(dst_void, src_void, count, target_pe); + nvshmem_put8_nbi(dst_void, src_void, count, target_pe); + nvshmem_put16_nbi(dst_void, src_void, count, target_pe); + nvshmem_put32_nbi(dst_void, src_void, count, target_pe); + nvshmem_put64_nbi(dst_void, src_void, count, target_pe); + nvshmem_put128_nbi(dst_void, src_void, count, target_pe); + + // nvshmemx_putSIZE_nbi_on_stream + // ishmemx_putSIZE_nbi_on_queue + // CHECK: ishmemx_put8_nbi_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put16_nbi_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put32_nbi_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put64_nbi_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_put128_nbi_on_queue(dst_void, src_void, count, target_pe, *stream); + nvshmemx_put8_nbi_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_put16_nbi_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_put32_nbi_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_put64_nbi_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_put128_nbi_on_stream(dst_void, src_void, count, target_pe, stream); // nvshmem_putmem_nbi // ishmem_putmem_nbi // CHECK: ishmem_putmem_nbi(dst_void, src_void, count, target_pe); nvshmem_putmem_nbi(dst_void, src_void, count, target_pe); + + // nvshmemx_putmem_nbi_on_stream + // ishmemx_putmem_nbi_on_queue + // CHECK: ishmemx_putmem_nbi_on_queue(dst_void, src_void, count, target_pe, *stream); + nvshmemx_putmem_nbi_on_stream(dst_void, src_void, count, target_pe, stream); + + // nvshmem_TYPENAME_get_nbi + // ishmem_TYPENAME_get_nbi + // CHECK: ishmem_get_nbi(dst_float, src_float, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_double, src_double, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_char, src_char, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_schar, src_schar, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_short, src_short, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_int, src_int, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_long, src_long, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_longlong, src_longlong, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_uchar, src_uchar, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_ushort, src_ushort, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_uint, src_uint, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_ulong, src_ulong, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_ulonglong, src_ulonglong, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_int8, src_int8, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_int16, src_int16, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_int32, src_int32, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_int64, src_int64, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_uint8, src_uint8, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_uint16, src_uint16, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_uint32, src_uint32, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_uint64, src_uint64, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_size, src_size, count, target_pe); + // CHECK-NEXT: ishmem_get_nbi(dst_ptrdiff, src_ptrdiff, count, target_pe); + nvshmem_float_get_nbi(dst_float, src_float, count, target_pe); + nvshmem_double_get_nbi(dst_double, src_double, count, target_pe); + nvshmem_char_get_nbi(dst_char, src_char, count, target_pe); + nvshmem_schar_get_nbi(dst_schar, src_schar, count, target_pe); + nvshmem_short_get_nbi(dst_short, src_short, count, target_pe); + nvshmem_int_get_nbi(dst_int, src_int, count, target_pe); + nvshmem_long_get_nbi(dst_long, src_long, count, target_pe); + nvshmem_longlong_get_nbi(dst_longlong, src_longlong, count, target_pe); + nvshmem_uchar_get_nbi(dst_uchar, src_uchar, count, target_pe); + nvshmem_ushort_get_nbi(dst_ushort, src_ushort, count, target_pe); + nvshmem_uint_get_nbi(dst_uint, src_uint, count, target_pe); + nvshmem_ulong_get_nbi(dst_ulong, src_ulong, count, target_pe); + nvshmem_ulonglong_get_nbi(dst_ulonglong, src_ulonglong, count, target_pe); + nvshmem_int8_get_nbi(dst_int8, src_int8, count, target_pe); + nvshmem_int16_get_nbi(dst_int16, src_int16, count, target_pe); + nvshmem_int32_get_nbi(dst_int32, src_int32, count, target_pe); + nvshmem_int64_get_nbi(dst_int64, src_int64, count, target_pe); + nvshmem_uint8_get_nbi(dst_uint8, src_uint8, count, target_pe); + nvshmem_uint16_get_nbi(dst_uint16, src_uint16, count, target_pe); + nvshmem_uint32_get_nbi(dst_uint32, src_uint32, count, target_pe); + nvshmem_uint64_get_nbi(dst_uint64, src_uint64, count, target_pe); + nvshmem_size_get_nbi(dst_size, src_size, count, target_pe); + nvshmem_ptrdiff_get_nbi(dst_ptrdiff, src_ptrdiff, count, target_pe); + + // nvshmemx_TYPENAME_get_nbi_on_stream + // ishmemx_TYPENAME_get_nbi_on_queue + // CHECK: ishmemx_get_nbi_on_queue(dst_float, src_float, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_double, src_double, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_char, src_char, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_schar, src_schar, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_short, src_short, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_int, src_int, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_long, src_long, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_longlong, src_longlong, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_uchar, src_uchar, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_ushort, src_ushort, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_uint, src_uint, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_ulong, src_ulong, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_ulonglong, src_ulonglong, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_int8, src_int8, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_int16, src_int16, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_int32, src_int32, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_int64, src_int64, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_uint8, src_uint8, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_uint16, src_uint16, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_uint32, src_uint32, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_uint64, src_uint64, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_size, src_size, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get_nbi_on_queue(dst_ptrdiff, src_ptrdiff, count, target_pe, *stream); + nvshmemx_float_get_nbi_on_stream(dst_float, src_float, count, target_pe, stream); + nvshmemx_double_get_nbi_on_stream(dst_double, src_double, count, target_pe, stream); + nvshmemx_char_get_nbi_on_stream(dst_char, src_char, count, target_pe, stream); + nvshmemx_schar_get_nbi_on_stream(dst_schar, src_schar, count, target_pe, stream); + nvshmemx_short_get_nbi_on_stream(dst_short, src_short, count, target_pe, stream); + nvshmemx_int_get_nbi_on_stream(dst_int, src_int, count, target_pe, stream); + nvshmemx_long_get_nbi_on_stream(dst_long, src_long, count, target_pe, stream); + nvshmemx_longlong_get_nbi_on_stream(dst_longlong, src_longlong, count, target_pe, stream); + nvshmemx_uchar_get_nbi_on_stream(dst_uchar, src_uchar, count, target_pe, stream); + nvshmemx_ushort_get_nbi_on_stream(dst_ushort, src_ushort, count, target_pe, stream); + nvshmemx_uint_get_nbi_on_stream(dst_uint, src_uint, count, target_pe, stream); + nvshmemx_ulong_get_nbi_on_stream(dst_ulong, src_ulong, count, target_pe, stream); + nvshmemx_ulonglong_get_nbi_on_stream(dst_ulonglong, src_ulonglong, count, target_pe, stream); + nvshmemx_int8_get_nbi_on_stream(dst_int8, src_int8, count, target_pe, stream); + nvshmemx_int16_get_nbi_on_stream(dst_int16, src_int16, count, target_pe, stream); + nvshmemx_int32_get_nbi_on_stream(dst_int32, src_int32, count, target_pe, stream); + nvshmemx_int64_get_nbi_on_stream(dst_int64, src_int64, count, target_pe, stream); + nvshmemx_uint8_get_nbi_on_stream(dst_uint8, src_uint8, count, target_pe, stream); + nvshmemx_uint16_get_nbi_on_stream(dst_uint16, src_uint16, count, target_pe, stream); + nvshmemx_uint32_get_nbi_on_stream(dst_uint32, src_uint32, count, target_pe, stream); + nvshmemx_uint64_get_nbi_on_stream(dst_uint64, src_uint64, count, target_pe, stream); + nvshmemx_size_get_nbi_on_stream(dst_size, src_size, count, target_pe, stream); + nvshmemx_ptrdiff_get_nbi_on_stream(dst_ptrdiff, src_ptrdiff, count, target_pe, stream); + + // nvshmem_getSIZE_nbi + // ishmem_getSIZE_nbi + // CHECK: ishmem_get8_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get16_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get32_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get64_nbi(dst_void, src_void, count, target_pe); + // CHECK-NEXT: ishmem_get128_nbi(dst_void, src_void, count, target_pe); + nvshmem_get8_nbi(dst_void, src_void, count, target_pe); + nvshmem_get16_nbi(dst_void, src_void, count, target_pe); + nvshmem_get32_nbi(dst_void, src_void, count, target_pe); + nvshmem_get64_nbi(dst_void, src_void, count, target_pe); + nvshmem_get128_nbi(dst_void, src_void, count, target_pe); + + // nvshmemx_getSIZE_nbi_on_stream + // ishmemx_getSIZE_nbi_on_queue + // CHECK: ishmemx_get8_nbi_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get16_nbi_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get32_nbi_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get64_nbi_on_queue(dst_void, src_void, count, target_pe, *stream); + // CHECK-NEXT: ishmemx_get128_nbi_on_queue(dst_void, src_void, count, target_pe, *stream); + nvshmemx_get8_nbi_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_get16_nbi_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_get32_nbi_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_get64_nbi_on_stream(dst_void, src_void, count, target_pe, stream); + nvshmemx_get128_nbi_on_stream(dst_void, src_void, count, target_pe, stream); + + // nvshmem_getmem_nbi + // ishmem_getmem_nbi + // CHECK: ishmem_getmem_nbi(dst_void, src_void, count, target_pe); + nvshmem_getmem_nbi(dst_void, src_void, count, target_pe); + + // nvshmemx_getmem_nbi_on_stream + // ishmemx_getmem_nbi_on_queue + // CHECK: ishmemx_getmem_nbi_on_queue(dst_void, src_void, count, target_pe, *stream); + nvshmemx_getmem_nbi_on_stream(dst_void, src_void, count, target_pe, stream); + + return 0; }