diff --git a/projects/miopen/addkernels/addkernels.cpp b/projects/miopen/addkernels/addkernels.cpp index 074ae60f95ce..c9615c94d2f9 100644 --- a/projects/miopen/addkernels/addkernels.cpp +++ b/projects/miopen/addkernels/addkernels.cpp @@ -41,16 +41,16 @@ void Bin2Hex(std::istream& source, while(blockStart < sourceSize) { - source.read(reinterpret_cast(buffer.get()), bufferSize); + source.read(reinterpret_cast(buffer.get()), std::streamsize(bufferSize)); - const std::streamoff pos = source.tellg(); - const std::streamoff blockSize = (pos < 0 ? sourceSize : pos) - blockStart; - std::streamoff i = 0; + const std::streamoff pos = source.tellg(); + const auto blockSize = (pos < 0 ? sourceSize : pos) - blockStart; + size_t i = 0; while(i < blockSize) { - size_t j = i; - const size_t end = std::min(i + lineSize, blockSize); + size_t j = i; + const auto end = std::min(i + lineSize, size_t(blockSize)); for(; j < end; j++) target << "0x" << std::setw(2) << static_cast(buffer[j]) << ","; @@ -98,7 +98,7 @@ void Bin2Asm(std::istream& source, // Write binary data for(std::streamoff blockStart = 0; blockStart < sourceSize; blockStart += bufferSize) { - source.read(buffer.get(), bufferSize); + source.read(buffer.get(), std::streamsize(bufferSize)); const auto pos = source.tellg(); const auto blockSize = (pos < 0 ? sourceSize : pos) - blockStart; @@ -329,11 +329,11 @@ int main(int argc, char* argv[]) } else if(arg == "-l" || arg == "-line-size") { - lineSize = std::stol(argv[++i]); + lineSize = std::stoul(argv[++i]); } else if(arg == "-b" || arg == "-buffer") { - bufferSize = std::stol(argv[++i]); + bufferSize = std::stoul(argv[++i]); } else if(arg == "-g" || arg == "-guard") { @@ -446,7 +446,7 @@ int main(int argc, char* argv[]) std::cerr << "failure opening file: " << targetFile << "\n"; return 1; } - file.write(sourceCode.data(), sourceCode.length()); + file.write(sourceCode.data(), std::streamsize(sourceCode.length())); } return 0; diff --git a/projects/miopen/addkernels/include_inliner.cpp b/projects/miopen/addkernels/include_inliner.cpp index 9fe80cd8bafe..6745ed1e4776 100644 --- a/projects/miopen/addkernels/include_inliner.cpp +++ b/projects/miopen/addkernels/include_inliner.cpp @@ -89,7 +89,7 @@ void IncludeInliner::ProcessCore(std::istream& input, if(!word.empty() && word == directive && recurse) { - auto first_quote_pos = line.find('"', static_cast(line_parser.tellg()) + 1); + auto first_quote_pos = line.find('"', static_cast(line_parser.tellg()) + 1); std::string::size_type second_quote_pos; if(first_quote_pos != std::string::npos) @@ -103,7 +103,7 @@ void IncludeInliner::ProcessCore(std::istream& input, if(!allow_angle_brackets) throw WrongInlineDirectiveException(GetIncludeStackTrace(current_line)); - first_quote_pos = line.find('<', static_cast(line_parser.tellg()) + 1); + first_quote_pos = line.find('<', static_cast(line_parser.tellg()) + 1); if(first_quote_pos == std::string::npos) throw WrongInlineDirectiveException(GetIncludeStackTrace(current_line)); diff --git a/projects/miopen/cmake/EnableCompilerWarnings.cmake b/projects/miopen/cmake/EnableCompilerWarnings.cmake index 10a610f1c4a7..e515da9b9f33 100644 --- a/projects/miopen/cmake/EnableCompilerWarnings.cmake +++ b/projects/miopen/cmake/EnableCompilerWarnings.cmake @@ -46,7 +46,6 @@ set(__clang_cxx_compile_options -Weverything -Wno-c++98-compat -Wno-c++98-compat-pedantic - -Wno-conversion -Wno-double-promotion -Wno-exit-time-destructors -Wno-extra-semi @@ -71,7 +70,6 @@ set(__clang_cxx_compile_options if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL "19") list(APPEND __clang_cxx_compile_options - -Wno-unique-object-duplication -Wno-switch-default) endif() @@ -85,6 +83,8 @@ endif() if(WIN32) list(APPEND __clang_cxx_compile_options + -Wno-ignored-attributes + -Wno-language-extension-token -fms-extensions -fms-compatibility) endif() diff --git a/projects/miopen/driver/layernorm_driver.hpp b/projects/miopen/driver/layernorm_driver.hpp index 6f6662f202f6..08473b515a03 100644 --- a/projects/miopen/driver/layernorm_driver.hpp +++ b/projects/miopen/driver/layernorm_driver.hpp @@ -498,8 +498,9 @@ int LayerNormDriver::RunBackwardGPU() std::cout << "Wall-clock Time Backward LayerNorm Elapsed: " << t.gettime_ms() / iter << " ms\n"; - float kernel_average_time = - iter > 1 ? (kernel_total_time - kernel_first_time) / (iter - 1) : kernel_first_time; + float kernel_average_time = iter > 1 + ? (kernel_total_time - kernel_first_time) / float(iter - 1) + : kernel_first_time; std::cout << "GPU Kernel Time Backward LayerNorm Elapsed: " << kernel_average_time << " ms\n"; } diff --git a/projects/miopen/driver/timer.hpp b/projects/miopen/driver/timer.hpp index 4af4c9a2ab9f..897ca9d91ee7 100644 --- a/projects/miopen/driver/timer.hpp +++ b/projects/miopen/driver/timer.hpp @@ -47,7 +47,7 @@ class Timer { public: - Timer(){}; + Timer() {}; void start(const bool enabled = true) { if(!enabled) @@ -80,7 +80,7 @@ class Timer class Timer2 { public: - Timer2(){}; + Timer2() {}; void start(const bool enabled = true) { if(!enabled) @@ -262,7 +262,7 @@ class RNNCombTimeLogger printf("GPU Kernel Time Elapsed: %f ms\n", n_iter > 1 ? gpu_avg / (n_iter - 1) : gpu_time); printf("Wall-clock Time Elapsed: %f ms\n", - n_iter > 1 ? host_avg / (n_iter - 1) : hostTimePerLaunch[0]); + n_iter > 1 ? host_avg / float(n_iter - 1) : hostTimePerLaunch[0]); } enum class ClockMode diff --git a/projects/miopen/src/activ/problem_description.cpp b/projects/miopen/src/activ/problem_description.cpp index 9bc484259dc7..1be3a181e855 100644 --- a/projects/miopen/src/activ/problem_description.cpp +++ b/projects/miopen/src/activ/problem_description.cpp @@ -54,7 +54,7 @@ NetworkConfig ProblemDescription::MakeNetworkConfig() const const auto read_len = (packed) ? x_elem_sz : x_width2D; - const auto read_unit = (read_len % 4 == 0) ? 4 : (read_len % 2 == 0) ? 2 : 1; + const auto read_unit = (read_len % 4u == 0) ? 4u : (read_len % 2u == 0) ? 2u : 1u; const auto MAP_RD = read_len / read_unit; std::ostringstream ss; diff --git a/projects/miopen/src/api/find2_0_commons.cpp b/projects/miopen/src/api/find2_0_commons.cpp index 3a4fc617ee96..10def302cb56 100644 --- a/projects/miopen/src/api/find2_0_commons.cpp +++ b/projects/miopen/src/api/find2_0_commons.cpp @@ -272,7 +272,7 @@ miopenStatus_t miopenFindSolutions(miopenHandle_t handle, }, problem_deref); - for(auto i = 0; i < solutions_deref.size(); ++i) + for(size_t i = 0; i < solutions_deref.size(); ++i) { auto& theSolution = miopen::deref(solutions + i); theSolution = new miopen::Solution{std::move(solutions_deref[i])}; diff --git a/projects/miopen/src/base64.cpp b/projects/miopen/src/base64.cpp index f0f734a9f779..7a37315d0882 100644 --- a/projects/miopen/src/base64.cpp +++ b/projects/miopen/src/base64.cpp @@ -55,8 +55,8 @@ std::string base64Encode(const uint8_t* data, std::size_t length) if(i == 3) { charArray4[0] = (charArray3[0] & 0xfc) >> 2; - charArray4[1] = ((charArray3[0] & 0x03) << 4) + ((charArray3[1] & 0xf0) >> 4); - charArray4[2] = ((charArray3[1] & 0x0f) << 2) + ((charArray3[2] & 0xc0) >> 6); + charArray4[1] = uint8_t((charArray3[0] & 0x03) << 4) + uint8_t((charArray3[1] & 0xf0) >> 4); + charArray4[2] = uint8_t((charArray3[1] & 0x0f) << 2) + uint8_t((charArray3[2] & 0xc0) >> 6); charArray4[3] = charArray3[2] & 0x3f; std::transform(std::begin(charArray4), @@ -75,8 +75,8 @@ std::string base64Encode(const uint8_t* data, std::size_t length) } charArray4[0] = (charArray3[0] & 0xfc) >> 2; - charArray4[1] = ((charArray3[0] & 0x03) << 4) + ((charArray3[1] & 0xf0) >> 4); - charArray4[2] = ((charArray3[1] & 0x0f) << 2) + ((charArray3[2] & 0xc0) >> 6); + charArray4[1] = uint8_t((charArray3[0] & 0x03) << 4) + uint8_t((charArray3[1] & 0xf0) >> 4); + charArray4[2] = uint8_t((charArray3[1] & 0x0f) << 2) + uint8_t((charArray3[2] & 0xc0) >> 6); charArray4[3] = charArray3[2] & 0x3f; for(size_t j = 0; j < i + 1; j++) @@ -123,9 +123,9 @@ std::vector base64Decode(const std::string_view& encodedString) charArray4[i++] = static_cast(std::distance(base64Chars.begin(), it)); if(i == 4) { - charArray3[0] = (charArray4[0] << 2) + ((charArray4[1] & 0x30) >> 4); - charArray3[1] = ((charArray4[1] & 0xf) << 4) + ((charArray4[2] & 0x3c) >> 2); - charArray3[2] = ((charArray4[2] & 0x3) << 6) + charArray4[3]; + charArray3[0] = uint8_t(charArray4[0] << 2) + uint8_t((charArray4[1] & 0x30) >> 4); + charArray3[1] = uint8_t((charArray4[1] & 0xf) << 4) + uint8_t((charArray4[2] & 0x3c) >> 2); + charArray3[2] = uint8_t((charArray4[2] & 0x3) << 6) + charArray4[3]; decodedData.insert(decodedData.end(), charArray3, charArray3 + 3); i = 0; @@ -139,9 +139,9 @@ std::vector base64Decode(const std::string_view& encodedString) charArray4[j] = 0; } - charArray3[0] = (charArray4[0] << 2) + ((charArray4[1] & 0x30) >> 4); - charArray3[1] = ((charArray4[1] & 0xf) << 4) + ((charArray4[2] & 0x3c) >> 2); - charArray3[2] = ((charArray4[2] & 0x3) << 6) + charArray4[3]; + charArray3[0] = uint8_t(charArray4[0] << 2) + uint8_t((charArray4[1] & 0x30) >> 4); + charArray3[1] = uint8_t((charArray4[1] & 0xf) << 4) + uint8_t((charArray4[2] & 0x3c) >> 2); + charArray3[2] = uint8_t((charArray4[2] & 0x3) << 6) + charArray4[3]; for(size_t j = 0; j < i - 1; j++) { diff --git a/projects/miopen/src/buffer_info.cpp b/projects/miopen/src/buffer_info.cpp index 2a510d428fbd..a65c6c5accff 100644 --- a/projects/miopen/src/buffer_info.cpp +++ b/projects/miopen/src/buffer_info.cpp @@ -107,11 +107,16 @@ MemLayout_t GetGroupConvLayout(MemLayout_t layout, bool IsDataBuffer) MIOPEN_THROW(std::string("Internal error in GetGroupConvLayout: Unknown MemLayout_t ")); } -BuffInfo::BuffInfo(MemLayout_t layout, int nk, int c, int h, int w, int g, int _element_size) +BuffInfo::BuffInfo(MemLayout_t layout, + unsigned nk, + unsigned c, + unsigned h, + unsigned w, + unsigned g, + size_t _element_size) { - element_size = _element_size; - const size_t count = static_cast(nk) * c * h * w * g; + const size_t count = static_cast(nk * c * h * w * g); total_byte_size = count * element_size; size.nk = nk; size.g = g; diff --git a/projects/miopen/src/cat/problem_description.cpp b/projects/miopen/src/cat/problem_description.cpp index 283d23fccf2a..3ec7ce218b60 100644 --- a/projects/miopen/src/cat/problem_description.cpp +++ b/projects/miopen/src/cat/problem_description.cpp @@ -46,13 +46,13 @@ NetworkConfig ProblemDescription::MakeNetworkConfig() const for(int i = 0; i < xCount; i++) { auto xlength = xDescs[i]->GetLengths(); - max_x_dim_size = std::max(max_x_dim_size, xlength[dim]); + max_x_dim_size = std::max(max_x_dim_size, xlength[size_t(dim)]); } auto ylength = yDesc.GetLengths(); auto outer_size = std::accumulate( ylength.begin(), ylength.begin() + dim, static_cast(1), std::multiplies()); - auto stride = yDesc.GetStrides()[dim]; + auto stride = yDesc.GetStrides()[size_t(dim)]; auto dtype = yDesc.GetType(); auto data_size = get_data_size(dtype); auto max_inner_size = max_x_dim_size * stride * data_size / sizeof(short4); diff --git a/projects/miopen/src/check_numerics.cpp b/projects/miopen/src/check_numerics.cpp index 0f3b0cb8f767..0466b5a6cd54 100644 --- a/projects/miopen/src/check_numerics.cpp +++ b/projects/miopen/src/check_numerics.cpp @@ -37,7 +37,7 @@ namespace miopen { bool CheckNumericsEnabled(const int bitMask) { - return (env::value(MIOPEN_CHECK_NUMERICS) & bitMask) != 0; + return (env::value(MIOPEN_CHECK_NUMERICS) & static_cast(bitMask)) != 0; } // Must keep this structure synchronized with one in MIOpenCheckNumerics @@ -73,7 +73,7 @@ std::string GetKernelName(miopenDataType_t data_type) bool checkNumericsImpl( const Handle& handle, int mode, const TensorDescriptor& dDesc, ConstData_t data, bool isInput) { - int numElements = dDesc.GetElementSize(); + size_t numElements = dDesc.GetElementSize(); CheckNumericsResult abnormal_h; auto abnormal_d = handle.Create(sizeof(CheckNumericsResult)); // TODO - someday avoid slow malloc/free here @@ -123,8 +123,8 @@ bool checkNumericsImpl( { assert(numElements != 0); MIOPEN_LOG((isAbnormal ? miopen::LoggingLevel::Warning : miopen::LoggingLevel::Info), - "Stats: mean=" << (abnormal_h.sum / numElements) - << " absmean=" << (abnormal_h.absSum / numElements) + "Stats: mean=" << (abnormal_h.sum / float(numElements)) + << " absmean=" << (abnormal_h.absSum / float(numElements)) << " min=" << abnormal_h.min << " max=" << abnormal_h.max); } } @@ -158,7 +158,7 @@ bool checkNumericsImpl( // Returns: 1 if abnormal value (inf or nan) detected in specified data, 0 otherwise bool checkNumericsInput(const Handle& handle, const TensorDescriptor& dDesc, ConstData_t data) { - return checkNumericsImpl(handle, env::value(MIOPEN_CHECK_NUMERICS), dDesc, data, true); + return checkNumericsImpl(handle, (int)env::value(MIOPEN_CHECK_NUMERICS), dDesc, data, true); } // Synchronizes to wait for kernel to finish, then checks data for output: @@ -166,7 +166,7 @@ bool checkNumericsInput(const Handle& handle, const TensorDescriptor& dDesc, Con bool checkNumericsOutput(const Handle& handle, const TensorDescriptor& dDesc, ConstData_t data) { handle.Finish(); - return checkNumericsImpl(handle, env::value(MIOPEN_CHECK_NUMERICS), dDesc, data, false); + return checkNumericsImpl(handle, (int)env::value(MIOPEN_CHECK_NUMERICS), dDesc, data, false); } } // namespace miopen diff --git a/projects/miopen/src/conv/invokers/gcn_asm_wino.cpp b/projects/miopen/src/conv/invokers/gcn_asm_wino.cpp index 68a42637246d..4f4a2123aa3b 100644 --- a/projects/miopen/src/conv/invokers/gcn_asm_wino.cpp +++ b/projects/miopen/src/conv/invokers/gcn_asm_wino.cpp @@ -113,13 +113,13 @@ InvokerFactory MakeGcnAsmWinoV2InvokerFactory(const WinoShaderArgsV2& args, uint64_t a_offset = 0; // activation parameters - float alpha = 0.0f; - float beta = 0.0f; + double alpha = 0.0; + double beta = 0.0; if(fused && (args.activation_mode != WinoShaderActivationModeV2_t::IDENTITY)) { const auto& invoke_ctx = primitive_params.CastTo(); - const int idx = do_bias ? 2 : 1; + const size_t idx = do_bias ? 2 : 1; const auto& activ_args = dynamic_cast(*invoke_ctx.op_args.params[idx]); if(args.activation_mode == WinoShaderActivationModeV2_t::SCALED_TANH) @@ -187,8 +187,8 @@ InvokerFactory MakeGcnAsmWinoV2InvokerFactory(const WinoShaderArgsV2& args, args.out_h, // uint32_t, output height args.out_w, // uint32_t, output width bias_addr, // uint64_t, address of bias buffer - alpha, // fp32, activation parameter alpha - beta, // fp32, activation parameter beta + float(alpha), // fp32, activation parameter alpha + float(beta), // fp32, activation parameter beta d_offset, // uint64_t, byte offset for buffer referenced by data_addr f_offset, // uint64_t, byte offset for buffer referenced by filter_addr o_offset, // uint64_t, byte offset for buffer referenced by output_addr diff --git a/projects/miopen/src/conv/invokers/impl_gemm.cpp b/projects/miopen/src/conv/invokers/impl_gemm.cpp index aeb444eac3fc..048c83039523 100644 --- a/projects/miopen/src/conv/invokers/impl_gemm.cpp +++ b/projects/miopen/src/conv/invokers/impl_gemm.cpp @@ -53,10 +53,10 @@ InvokerFactory MakeImplGemmDataInvokerFactory(const ProblemDescription& problem) bool need_atomic_add = false; bool every_pixel_is_written = true; - for(int i = 0; i < conv.GetSpatialDimension(); ++i) + for(size_t i = 0; i < conv.GetSpatialDimension(); ++i) { const auto conv_stride = conv.GetConvStrides()[i]; - const auto conv_dilation = conv.GetConvDilations()[i]; + const auto conv_dilation = size_t(conv.GetConvDilations()[i]); const auto filter_size = tensors.wDesc.GetLengths()[2 + i]; if(conv_stride < conv_dilation * (filter_size - 1) + 1) @@ -170,7 +170,7 @@ InvokerFactory MakeImplGemmDataInvokerFactory(const ProblemDescription& problem) { bool every_pixel_is_written = true; - for(int i = 0; i < conv.GetSpatialDimension(); ++i) + for(size_t i = 0; i < conv.GetSpatialDimension(); ++i) { const auto conv_stride = conv.GetConvStrides()[i]; const auto conv_dilation = conv.GetConvDilations()[i]; diff --git a/projects/miopen/src/conv/invokers/impl_gemm_dynamic.cpp b/projects/miopen/src/conv/invokers/impl_gemm_dynamic.cpp index 0c9090070254..353f5c7c57bf 100644 --- a/projects/miopen/src/conv/invokers/impl_gemm_dynamic.cpp +++ b/projects/miopen/src/conv/invokers/impl_gemm_dynamic.cpp @@ -49,13 +49,13 @@ static float CallImplGemmDynamicForward1x1(const miopen::Handle& handle, MIOPEN_LOG_I(kernel.GetName()); // clang-format off - int hi = problem.GetInHeight(); - int wi = problem.GetInWidth(); - int n = problem.GetInBatchSize(); - int k = problem.GetOutChannels(); - int c = problem.GetInChannels(); - int ho = problem.GetOutHeight(); - int wo = problem.GetOutWidth(); + int hi = int(problem.GetInHeight()); + int wi = int(problem.GetInWidth()); + int n = int(problem.GetInBatchSize()); + int k = int(problem.GetOutChannels()); + int c = int(problem.GetInChannels()); + int ho = int(problem.GetOutHeight()); + int wo = int(problem.GetOutWidth()); int stride_h = problem.GetKernelStrideH(); int stride_w = problem.GetKernelStrideW(); int dilation_h = problem.GetDilationH(); @@ -173,10 +173,10 @@ InvokerFactory MakeImplGemmDynamicBackwardDataInvokerFactory(const ProblemDescri std::vector y_dot_slice_gid; std::vector x_dot_slice_gid; std::vector is_gemm_not_empty; - for(int gemm_id = 0; gemm_id < num_of_gemms; gemm_id++) + for(size_t gemm_id = 0; gemm_id < num_of_gemms; gemm_id++) { - dtile_iy_gid.emplace_back(gemm_id / x_tilda); - dtile_ix_gid.emplace_back(gemm_id % x_tilda); + dtile_iy_gid.emplace_back(gemm_id / size_t(x_tilda)); + dtile_ix_gid.emplace_back(gemm_id % size_t(x_tilda)); y_dot_slice_gid.emplace_back((dtile_iy_gid[gemm_id] + 1) * y_dot <= y ? y_dot : y % y_dot); x_dot_slice_gid.emplace_back((dtile_ix_gid[gemm_id] + 1) * x_dot <= x ? x_dot : x % x_dot); const int gemm_k_gid = k * y_dot_slice_gid[gemm_id] * x_dot_slice_gid[gemm_id]; @@ -198,7 +198,7 @@ InvokerFactory MakeImplGemmDynamicBackwardDataInvokerFactory(const ProblemDescri if(handle.IsProfilingEnabled()) elapsed += handle.GetKernelTime(); } - for(int gemm_id = 0; gemm_id < num_of_gemms; gemm_id++) + for(size_t gemm_id = 0; gemm_id < num_of_gemms; gemm_id++) { if(is_gemm_not_empty[gemm_id]) { @@ -309,44 +309,45 @@ MakeImplGemmDynamicBackwardDataInvokerFactory(const ProblemDescription& problem, std::vector y_dot_slice_gid; std::vector x_dot_slice_gid; std::vector is_gemm_not_empty; - for(int gemm_id = 0; gemm_id < num_of_gemms; gemm_id++) + for(size_t gemm_id = 0; gemm_id < num_of_gemms; gemm_id++) { - dtile_iy_gid.emplace_back(gemm_id / x_tilda); - dtile_ix_gid.emplace_back(gemm_id % x_tilda); + dtile_iy_gid.emplace_back(gemm_id / size_t(x_tilda)); + dtile_ix_gid.emplace_back(gemm_id % size_t(x_tilda)); y_dot_slice_gid.emplace_back((dtile_iy_gid[gemm_id] + 1) * y_dot <= y ? y_dot : y % y_dot); x_dot_slice_gid.emplace_back((dtile_ix_gid[gemm_id] + 1) * x_dot <= x ? x_dot : x % x_dot); const int gemm_k_gid = k * y_dot_slice_gid[gemm_id] * x_dot_slice_gid[gemm_id]; is_gemm_not_empty.emplace_back(gemm_k_gid > 0); } - bool need_set_zero = true; - int nxb = cfg.nxb; - int b = h_tilda_slice * w_tilda_slice; - b = (cfg.nxe == 0) ? (b) : ((b + nxb - 1) / nxb) * nxb; // pad to nxb modulo when nxe != 0 + bool need_set_zero{true}; + int nxb = cfg.nxb; + int b = h_tilda_slice * w_tilda_slice; + b = (cfg.nxe == 0) ? (b) : ((b + nxb - 1) / nxb) * nxb; // pad to nxb modulo when nxe != 0 + + uint32_t nb_n0 = uint32_t(cfg.tensor_b_cluster_lengths[2] * cfg.tensor_b_thread_lengths[2]); + uint32_t nb_n1b = uint32_t(cfg.tensor_b_cluster_lengths[3] * cfg.tensor_b_thread_lengths[3]); - uint32_t nb_n0 = cfg.tensor_b_cluster_lengths[2] * cfg.tensor_b_thread_lengths[2]; - uint32_t nb_n1b = cfg.tensor_b_cluster_lengths[3] * cfg.tensor_b_thread_lengths[3]; - uint32_t unmerge_sub_n = cfg.gemm_n_per_block / cfg.nxb; + uint32_t unmerge_sub_n = uint32_t(cfg.gemm_n_per_block / cfg.nxb); uint32_t unmerge_sub_n1 = unmerge_sub_n / nb_n0; - magic_div_u32_t mdiv_2 = - magic_div_u32_gen(((c / group) * n * b) / (cfg.gemm_m_per_block * cfg.gemm_n_per_block)); - magic_div_u32_t mdiv_3 = magic_div_u32_gen((n * b) / cfg.gemm_n_per_block); - magic_div_u32_t mdiv_4 = magic_div_u32_gen(b * unmerge_sub_n1 / nb_n1b); - magic_div_u32_t mdiv_5 = magic_div_u32_gen(b); - magic_div_u32_t mdiv_6 = magic_div_u32_gen(w_tilda_slice); + magic_div_u32_t mdiv_2 = magic_div_u32_gen( + uint32_t(((c / group) * n * b) / (cfg.gemm_m_per_block * cfg.gemm_n_per_block))); + magic_div_u32_t mdiv_3 = magic_div_u32_gen(uint32_t((n * b) / cfg.gemm_n_per_block)); + magic_div_u32_t mdiv_4 = magic_div_u32_gen(uint32_t(b) * unmerge_sub_n1 / nb_n1b); + magic_div_u32_t mdiv_5 = magic_div_u32_gen(uint32_t(b)); + magic_div_u32_t mdiv_6 = magic_div_u32_gen(uint32_t(w_tilda_slice)); std::vector mdiv_0_vec; std::vector mdiv_1_vec; std::vector shift_pack_0_vec; uint32_t shift_pack_1; - for(int gemm_id = 0; gemm_id < num_of_gemms; gemm_id++) + for(size_t gemm_id = 0; gemm_id < num_of_gemms; gemm_id++) { if(is_gemm_not_empty[gemm_id]) { mdiv_0_vec.push_back( - magic_div_u32_gen(y_dot_slice_gid[gemm_id] * x_dot_slice_gid[gemm_id])); - mdiv_1_vec.push_back(magic_div_u32_gen(x_dot_slice_gid[gemm_id])); + magic_div_u32_gen(uint32_t(y_dot_slice_gid[gemm_id] * x_dot_slice_gid[gemm_id]))); + mdiv_1_vec.push_back(magic_div_u32_gen(uint32_t(x_dot_slice_gid[gemm_id]))); } else { @@ -374,7 +375,7 @@ MakeImplGemmDynamicBackwardDataInvokerFactory(const ProblemDescription& problem, if(handle.IsProfilingEnabled()) elapsed += handle.GetKernelTime(); } - for(int gemm_id = 0; gemm_id < num_of_gemms; gemm_id++) + for(size_t gemm_id = 0; gemm_id < num_of_gemms; gemm_id++) { if(is_gemm_not_empty[gemm_id]) { @@ -460,34 +461,36 @@ InvokerFactory MakeImplGemmDynamicForwardXdlopsNHWCInvokerFactory( int x_karg = x; int splits_4G = solver::igemm_split_batch_size( - hi, wi, ho, wo, n, k, c, miopen::GetTypeSize(problem.GetInDataType())); + hi, wi, ho, wo, n, k, c, int(miopen::GetTypeSize(problem.GetInDataType()))); splits_4G = splits_4G == 0 ? n : splits_4G; - uint32_t gemm_m = (n / splits_4G) * ho * wo; - uint32_t gemm_n = k / group; + auto gemm_m = (n / splits_4G) * ho * wo; + auto gemm_n = k / group; magic_div_u32_t mdiv_0, mdiv_1, mdiv_2, mdiv_3, mdiv_4, mdiv_5; uint32_t shift_pack_0, shift_pack_1; uint32_t pack0 = 0; - mdiv_0 = magic_div_u32_gen((gemm_n + config.gemm_n_per_block - 1) / config.gemm_n_per_block); - mdiv_1 = magic_div_u32_gen(ho * wo); - mdiv_2 = magic_div_u32_gen(wo); - mdiv_3 = magic_div_u32_gen(((gemm_m + config.gemm_m_per_block - 1) / config.gemm_m_per_block) * - ((gemm_n + config.gemm_n_per_block - 1) / config.gemm_n_per_block)); + mdiv_0 = magic_div_u32_gen( + uint32_t((gemm_n + config.gemm_n_per_block - 1) / config.gemm_n_per_block)); + mdiv_1 = magic_div_u32_gen(uint32_t(ho * wo)); + mdiv_2 = magic_div_u32_gen(uint32_t(wo)); + mdiv_3 = magic_div_u32_gen( + uint32_t(((gemm_m + config.gemm_m_per_block - 1) / config.gemm_m_per_block) * + ((gemm_n + config.gemm_n_per_block - 1) / config.gemm_n_per_block))); shift_pack_0 = magic_div_u32_pack_shift(mdiv_0.shift, mdiv_1.shift, mdiv_2.shift, mdiv_3.shift); if(config.merge_e != 0) { - mdiv_4 = magic_div_u32_gen(x * (c / group)); - mdiv_5 = magic_div_u32_gen(c / group); + mdiv_4 = magic_div_u32_gen(uint32_t(x * (c / group))); + mdiv_5 = magic_div_u32_gen(uint32_t(c / group)); shift_pack_1 = magic_div_u32_pack_shift(mdiv_4.shift, mdiv_5.shift, 0, 0); - uint32_t s_move_slice_k_y = (config.gemm_k_per_block / (x * (c / group))) % y; - uint32_t s_move_slice_k_x = (config.gemm_k_per_block / (c / group)) % x; - uint32_t s_move_slice_k_c = config.gemm_k_per_block % (c / group); - y_karg = static_cast((s_move_slice_k_y << 24) | y); - x_karg = static_cast((s_move_slice_k_x << 24) | x); - c_karg = static_cast((s_move_slice_k_c << 24) | (c / group)); + uint32_t s_move_slice_k_y = uint32_t((config.gemm_k_per_block / (x * (c / group))) % y); + uint32_t s_move_slice_k_x = uint32_t((config.gemm_k_per_block / (c / group)) % x); + uint32_t s_move_slice_k_c = uint32_t(config.gemm_k_per_block % (c / group)); + y_karg = static_cast((s_move_slice_k_y << 24) | uint32_t(y)); + x_karg = static_cast((s_move_slice_k_x << 24) | uint32_t(x)); + c_karg = static_cast((s_move_slice_k_c << 24) | uint32_t(c / group)); } else { @@ -565,14 +568,17 @@ InvokerFactory MakeImplGemmDynamicForwardXdlopsNHWCInvokerFactory( if(is_nchw) { - TransposeSolutionDefault2Nhwc trans_input(ctx, problem.GetInDataType(), n, c, hi, wi); - TransposeSolutionDefault2Nhwc trans_weight(ctx, - problem.GetWeightsDataType(), - k, - c / group, - y, - x); // group * k_per_group as batch for weight - TransposeSolutionNhwc2Default trans_output(ctx, problem.GetOutDataType(), n, k, ho, wo); + TransposeSolutionDefault2Nhwc trans_input( + ctx, problem.GetInDataType(), uint32_t(n), uint32_t(c), uint32_t(hi), uint32_t(wi)); + TransposeSolutionDefault2Nhwc trans_weight( + ctx, + problem.GetWeightsDataType(), + uint32_t(k), + uint32_t(c / group), + uint32_t(y), + uint32_t(x)); // group * k_per_group as batch for weight + TransposeSolutionNhwc2Default trans_output( + ctx, problem.GetOutDataType(), uint32_t(n), uint32_t(k), uint32_t(ho), uint32_t(wo)); trans_input_skippable = trans_input.IsSkippable(); trans_weight_skippable = trans_weight.IsSkippable(); @@ -598,7 +604,8 @@ InvokerFactory MakeImplGemmDynamicForwardXdlopsNHWCInvokerFactory( trans_output_idx = idx++; } - const size_t cast_size = need_cast ? miopen::GetTypeSize(miopenFloat) * n * k * ho * wo : 0; + const auto cast_size = + need_cast ? miopen::GetTypeSize(miopenFloat) * size_t(n * k * ho * wo) : 0; MultiBufferWorkspaceTraits wt( {trans_input_size, trans_weight_size, trans_output_size, cast_size}); @@ -609,7 +616,7 @@ InvokerFactory MakeImplGemmDynamicForwardXdlopsNHWCInvokerFactory( const size_t cast_offset = wt.GetOffset(3); - const int kID_trans_start = isGfx90aFp16altSupport ? 2 : 1; + const size_t kID_trans_start = isGfx90aFp16altSupport ? 2 : 1; const TensorDescriptor cast_desc( miopenFloat, problem.GetOut().GetLengths(), problem.GetOut().GetStrides()); @@ -661,19 +668,19 @@ InvokerFactory MakeImplGemmDynamicForwardXdlopsNHWCInvokerFactory( { if(!trans_input_skippable) { - auto& karg_input = opArgsTrans[trans_input_idx]; + auto& karg_input = opArgsTrans[size_t(trans_input_idx)]; karg_input[0] = OpKernelArg(trans_input_buf.get()); karg_input[1] = OpKernelArg(tensors.in); - handle.Run(kernels[kID_trans_start + trans_input_idx])(karg_input); + handle.Run(kernels[kID_trans_start + size_t(trans_input_idx)])(karg_input); if(handle.IsProfilingEnabled()) elapsed += handle.GetKernelTime(); } if(!trans_weight_skippable) { - auto& karg_weight = opArgsTrans[trans_weight_idx]; + auto& karg_weight = opArgsTrans[size_t(trans_weight_idx)]; karg_weight[0] = OpKernelArg(trans_weight_buf.get()); karg_weight[1] = OpKernelArg(tensors.w); - handle.Run(kernels[kID_trans_start + trans_weight_idx])(karg_weight); + handle.Run(kernels[kID_trans_start + size_t(trans_weight_idx)])(karg_weight); if(handle.IsProfilingEnabled()) elapsed += handle.GetKernelTime(); } @@ -710,10 +717,10 @@ InvokerFactory MakeImplGemmDynamicForwardXdlopsNHWCInvokerFactory( if(is_nchw && !trans_output_skippable) { - auto& karg_output = opArgsTrans[trans_output_idx]; + auto& karg_output = opArgsTrans[size_t(trans_output_idx)]; karg_output[0] = OpKernelArg(tensors.out); karg_output[1] = OpKernelArg(trans_output_buf.get()); - handle.Run(kernels[kID_trans_start + trans_output_idx])(karg_output); + handle.Run(kernels[kID_trans_start + size_t(trans_output_idx)])(karg_output); if(handle.IsProfilingEnabled()) elapsed += handle.GetKernelTime(); } @@ -769,25 +776,25 @@ InvokerFactory MakeImplGemmDynamicBackwardDataXdlopsNHWCInvokerFactory( int num_of_gemms = x_tilda * y_tilda; int splits_4G = solver::igemm_split_batch_size( - hi, wi, ho, wo, n, k, c, miopen::GetTypeSize(problem.GetInDataType())); + hi, wi, ho, wo, n, k, c, int(miopen::GetTypeSize(problem.GetInDataType()))); int n_in_1_block = splits_4G == 0 ? 1 : (n / splits_4G); - uint32_t gemm_m = n_in_1_block * h_tilda_slice * w_tilda_slice; - uint32_t gemm_n = c / group; + auto gemm_m = n_in_1_block * h_tilda_slice * w_tilda_slice; + auto gemm_n = c / group; - magic_div_u32_t mdiv_x_tilda = magic_div_u32_gen(x_tilda); - magic_div_u32_t mdiv_y_tilda = magic_div_u32_gen(y_tilda); + magic_div_u32_t mdiv_x_tilda = magic_div_u32_gen(uint32_t(x_tilda)); + magic_div_u32_t mdiv_y_tilda = magic_div_u32_gen(uint32_t(y_tilda)); magic_div_u32_t mdiv_group_mn = magic_div_u32_gen( - group * ((gemm_n + config.gemm_n_per_block - 1) / config.gemm_n_per_block) * - ((gemm_m + config.gemm_m_per_block - 1) / config.gemm_m_per_block)); - - magic_div_u32_t mdiv_0 = - magic_div_u32_gen((gemm_n + config.gemm_n_per_block - 1) / config.gemm_n_per_block); - magic_div_u32_t mdiv_1 = - magic_div_u32_gen(((gemm_n + config.gemm_n_per_block - 1) / config.gemm_n_per_block) * - ((gemm_m + config.gemm_m_per_block - 1) / config.gemm_m_per_block)); - magic_div_u32_t mdiv_2 = magic_div_u32_gen(config.nxe != 0 ? w_tilda_slice : wi); - magic_div_u32_t mdiv_3 = magic_div_u32_gen(h_tilda_slice * w_tilda_slice); + uint32_t(group * ((gemm_n + config.gemm_n_per_block - 1) / config.gemm_n_per_block) * + ((gemm_m + config.gemm_m_per_block - 1) / config.gemm_m_per_block))); + + magic_div_u32_t mdiv_0 = magic_div_u32_gen( + uint32_t((gemm_n + config.gemm_n_per_block - 1) / config.gemm_n_per_block)); + magic_div_u32_t mdiv_1 = magic_div_u32_gen( + uint32_t(((gemm_n + config.gemm_n_per_block - 1) / config.gemm_n_per_block) * + ((gemm_m + config.gemm_m_per_block - 1) / config.gemm_m_per_block))); + magic_div_u32_t mdiv_2 = magic_div_u32_gen(uint32_t(config.nxe != 0 ? w_tilda_slice : wi)); + magic_div_u32_t mdiv_3 = magic_div_u32_gen(uint32_t(h_tilda_slice * w_tilda_slice)); uint32_t shift_pack_0 = magic_div_u32_pack_shift(mdiv_0.shift, mdiv_1.shift, mdiv_2.shift, mdiv_3.shift); @@ -880,14 +887,17 @@ InvokerFactory MakeImplGemmDynamicBackwardDataXdlopsNHWCInvokerFactory( if(is_nchw) { - TransposeSolutionNhwc2Default trans_input(ctx, problem.GetOutDataType(), n, c, hi, wi); - TransposeSolutionDefault2Nhwc trans_weight(ctx, - problem.GetWeightsDataType(), - k, - c / group, - y, - x); // group * k_per_group as batch for weight - TransposeSolutionDefault2Nhwc trans_output(ctx, problem.GetInDataType(), n, k, ho, wo); + TransposeSolutionNhwc2Default trans_input( + ctx, problem.GetOutDataType(), uint32_t(n), uint32_t(c), uint32_t(hi), uint32_t(wi)); + TransposeSolutionDefault2Nhwc trans_weight( + ctx, + problem.GetWeightsDataType(), + uint32_t(k), + uint32_t(c / group), + uint32_t(y), + uint32_t(x)); // group * k_per_group as batch for weight + TransposeSolutionDefault2Nhwc trans_output( + ctx, problem.GetInDataType(), uint32_t(n), uint32_t(k), uint32_t(ho), uint32_t(wo)); trans_input_skippable = trans_input.IsSkippable(); trans_weight_skippable = trans_weight.IsSkippable(); @@ -913,7 +923,8 @@ InvokerFactory MakeImplGemmDynamicBackwardDataXdlopsNHWCInvokerFactory( trans_output_idx = idx++; } - const size_t cast_size = need_cast ? miopen::GetTypeSize(miopenFloat) * n * c * hi * wi : 0; + const auto cast_size = + need_cast ? miopen::GetTypeSize(miopenFloat) * size_t(n * c * hi * wi) : 0; MultiBufferWorkspaceTraits wt( {trans_input_size, trans_weight_size, trans_output_size, cast_size}); @@ -924,7 +935,7 @@ InvokerFactory MakeImplGemmDynamicBackwardDataXdlopsNHWCInvokerFactory( const size_t cast_offset = wt.GetOffset(3); - const int kID_trans_start = isGfx90aFp16altSupport ? 2 : 1; + const size_t kID_trans_start = isGfx90aFp16altSupport ? 2 : 1; const TensorDescriptor cast_desc( miopenFloat, problem.GetOut().GetLengths(), problem.GetOut().GetStrides()); @@ -976,19 +987,19 @@ InvokerFactory MakeImplGemmDynamicBackwardDataXdlopsNHWCInvokerFactory( { if(!trans_output_skippable) { - auto& karg_output = opArgsTrans[trans_output_idx]; + auto& karg_output = opArgsTrans[size_t(trans_output_idx)]; karg_output[0] = OpKernelArg(trans_output_buf.get()); karg_output[1] = OpKernelArg(tensors.in); - handle.Run(kernels[kID_trans_start + trans_output_idx])(karg_output); + handle.Run(kernels[kID_trans_start + size_t(trans_output_idx)])(karg_output); if(handle.IsProfilingEnabled()) elapsed += handle.GetKernelTime(); } if(!trans_weight_skippable) { - auto& karg_weight = opArgsTrans[trans_weight_idx]; + auto& karg_weight = opArgsTrans[size_t(trans_weight_idx)]; karg_weight[0] = OpKernelArg(trans_weight_buf.get()); karg_weight[1] = OpKernelArg(tensors.w); - handle.Run(kernels[kID_trans_start + trans_weight_idx])(karg_weight); + handle.Run(kernels[kID_trans_start + size_t(trans_weight_idx)])(karg_weight); if(handle.IsProfilingEnabled()) elapsed += handle.GetKernelTime(); } @@ -1024,10 +1035,10 @@ InvokerFactory MakeImplGemmDynamicBackwardDataXdlopsNHWCInvokerFactory( } if((is_nchw && !trans_input_skippable)) { - auto& karg_input = opArgsTrans[trans_input_idx]; + auto& karg_input = opArgsTrans[size_t(trans_input_idx)]; karg_input[0] = OpKernelArg(tensors.out); karg_input[1] = OpKernelArg(trans_input_buf.get()); - handle.Run(kernels[kID_trans_start + trans_input_idx])(karg_input); + handle.Run(kernels[kID_trans_start + size_t(trans_input_idx)])(karg_input); if(handle.IsProfilingEnabled()) elapsed += handle.GetKernelTime(); } @@ -1045,23 +1056,24 @@ InvokerFactory MakeImplGemmDynamicForwardDlopsNCHWCInvokerFactory( const ProblemDescription& problem, const solver::conv::PerformanceConfigAsmImplicitGemmGTCFwdDlopsNCHWC& config) { - int hi = problem.GetInHeight(); - int wi = problem.GetInWidth(); - int n = problem.GetInBatchSize(); - int k = problem.GetOutChannels() * config.vector_c; - int c = problem.GetInChannels(); - int ks = 1; - int ho = problem.GetOutHeight(); - int wo = problem.GetOutWidth(); - int stride_h = problem.GetKernelStrideH(); - int stride_w = problem.GetKernelStrideW(); - int dilation_h = problem.GetDilationH(); - int dilation_w = problem.GetDilationW(); - int pad_h = problem.GetPadH(); - int pad_w = problem.GetPadW(); - int y = problem.GetWeightsHeight(); - int x = problem.GetWeightsWidth(); - int group = problem.GetGroupCount(); + int hi = int(problem.GetInHeight()); + int wi = int(problem.GetInWidth()); + int n = int(problem.GetInBatchSize()); + int k = int(problem.GetOutChannels()) * config.vector_c; + int c = int(problem.GetInChannels()); + int ks = 1; + int group = problem.GetGroupCount(); + + uint32_t ho = uint32_t(problem.GetOutHeight()); + uint32_t wo = uint32_t(problem.GetOutWidth()); + uint32_t stride_h = uint32_t(problem.GetKernelStrideH()); + uint32_t stride_w = uint32_t(problem.GetKernelStrideW()); + uint32_t dilation_h = uint32_t(problem.GetDilationH()); + uint32_t dilation_w = uint32_t(problem.GetDilationW()); + uint32_t pad_h = uint32_t(problem.GetPadH()); + uint32_t pad_w = uint32_t(problem.GetPadW()); + uint32_t y = uint32_t(problem.GetWeightsHeight()); + uint32_t x = uint32_t(problem.GetWeightsWidth()); // Currentlly we do not tile in H/W dimension, using tile H/W as Ho/Wo, Thus Number of Tile // equal to one @@ -1081,42 +1093,45 @@ InvokerFactory MakeImplGemmDynamicForwardDlopsNCHWCInvokerFactory( else MIOPEN_THROW("tile_hw should not be zero"); - int tile_hw = (tile_h << 16) | tile_w; - int ntile_hw = (ntile_h << 16) | ntile_w; + uint32_t tile_hw = (tile_h << 16) | tile_w; + uint32_t ntile_hw = (ntile_h << 16) | ntile_w; // Split K make no sense in vector format - int stride_hw = (stride_h << 16) | stride_w; - int dilation_hw = (dilation_h << 16) | dilation_w; - int pad_hw = (pad_h << 16) | pad_w; - int wei_hw = (y << 16) | x; + uint32_t stride_hw = (stride_h << 16) | stride_w; + uint32_t dilation_hw = (dilation_h << 16) | dilation_w; + uint32_t pad_hw = (pad_h << 16) | pad_w; + uint32_t wei_hw = (y << 16) | x; // Initialize here for better readibility - uint32_t s_move_slice_k_y = (config.gemm_k_per_block / config.vector_c / x) % y; - uint32_t s_move_slice_k_x = config.gemm_k_per_block / config.vector_c % x; - uint32_t s_move_slice_k_c = (config.gemm_k_per_block / config.vector_c / (x * y)) % (c / group); - int move_slice_k = (s_move_slice_k_y << 16) | (s_move_slice_k_x << 8) | s_move_slice_k_c; + uint32_t s_move_slice_k_y = (uint32_t(config.gemm_k_per_block / config.vector_c) / x) % y; + uint32_t s_move_slice_k_x = uint32_t(config.gemm_k_per_block / config.vector_c) % x; + uint32_t s_move_slice_k_c = + (uint32_t(config.gemm_k_per_block / config.vector_c) / (x * y)) % uint32_t(c / group); + uint32_t move_slice_k = (s_move_slice_k_y << 16) | (s_move_slice_k_x << 8) | s_move_slice_k_c; int splits_4G = solver::igemm_split_batch_size( - hi, wi, ho, wo, n, k, c, miopen::GetTypeSize(problem.GetInDataType())); + hi, wi, int(ho), int(wo), n, k, c, int(miopen::GetTypeSize(problem.GetInDataType()))); splits_4G = (splits_4G == 0 ? n : splits_4G); uint32_t gemm_n = 1; uint32_t gemm_m = 1; if(splits_4G != 0) { - gemm_n = (n / splits_4G) * tile_h * tile_w; - gemm_m = k / group; + gemm_n = uint32_t(n / splits_4G) * tile_h * tile_w; + gemm_m = uint32_t(k / group); } else MIOPEN_THROW("splits_4G should not be zero"); magic_div_u32_t mdiv_0, mdiv_1, mdiv_2, mdiv_3, mdiv_4, mdiv_5, mdiv_6, mdiv_7; uint32_t shift_pack_0, shift_pack_1; - mdiv_0 = magic_div_u32_gen((gemm_n + config.gemm_n_per_block - 1) / config.gemm_n_per_block); - mdiv_1 = magic_div_u32_gen((gemm_m + config.gemm_m_per_block - 1) / config.gemm_m_per_block); - mdiv_2 = magic_div_u32_gen(tile_h); - mdiv_3 = magic_div_u32_gen(tile_w); - mdiv_4 = magic_div_u32_gen(y); - mdiv_5 = magic_div_u32_gen(x); - mdiv_6 = magic_div_u32_gen(ntile_h); - mdiv_7 = magic_div_u32_gen(ntile_w); + auto n_per_block = uint32_t(config.gemm_n_per_block); + auto m_per_block = uint32_t(config.gemm_m_per_block); + mdiv_0 = magic_div_u32_gen((gemm_n + n_per_block - 1) / n_per_block); + mdiv_1 = magic_div_u32_gen((gemm_m + m_per_block - 1) / m_per_block); + mdiv_2 = magic_div_u32_gen(tile_h); + mdiv_3 = magic_div_u32_gen(tile_w); + mdiv_4 = magic_div_u32_gen(y); + mdiv_5 = magic_div_u32_gen(x); + mdiv_6 = magic_div_u32_gen(ntile_h); + mdiv_7 = magic_div_u32_gen(ntile_w); shift_pack_0 = magic_div_u32_pack_shift(mdiv_0.shift, mdiv_1.shift, mdiv_2.shift, mdiv_3.shift); shift_pack_1 = magic_div_u32_pack_shift(mdiv_4.shift, mdiv_5.shift, mdiv_6.shift, mdiv_7.shift); diff --git a/projects/miopen/src/conv/kernel_interface/winograd_kernel_interface.cpp b/projects/miopen/src/conv/kernel_interface/winograd_kernel_interface.cpp index e030f47ecce1..3c660768e75c 100644 --- a/projects/miopen/src/conv/kernel_interface/winograd_kernel_interface.cpp +++ b/projects/miopen/src/conv/kernel_interface/winograd_kernel_interface.cpp @@ -39,7 +39,7 @@ bool AssignAndCheck(Tdst& dst_v, Tsrc src_v) noexcept static_assert(std::is_integral_v); static_assert(std::is_integral_v); - dst_v = src_v; + dst_v = static_cast(src_v); if(dst_v != src_v) return false; diff --git a/projects/miopen/src/conv/problem_description.cpp b/projects/miopen/src/conv/problem_description.cpp index 94293fd764e3..7cf3afc2d584 100644 --- a/projects/miopen/src/conv/problem_description.cpp +++ b/projects/miopen/src/conv/problem_description.cpp @@ -48,7 +48,7 @@ namespace conv { namespace { std::function -PrintDHW(char sep, unsigned spatial_dims, int64_t depth, int64_t height, int64_t width) +PrintDHW(char sep, unsigned spatial_dims, size_t depth, size_t height, size_t width) { return [=](std::ostream& stream) { if(spatial_dims > 2) @@ -241,11 +241,20 @@ void ProblemDescription::MakeNetworkConfig(std::string& conf_key) const const auto sep = 'x'; SerializeStrides(optional, in, out, weights, sep); - ss << 'x' << PrintDHW('x', GetSpatialDims(), GetPadD(), GetPadH(), GetPadW()); ss << 'x' - << PrintDHW( - 'x', GetSpatialDims(), GetKernelStrideD(), GetKernelStrideH(), GetKernelStrideW()); - ss << 'x' << PrintDHW('x', GetSpatialDims(), GetDilationD(), GetDilationH(), GetDilationW()); + << PrintDHW('x', GetSpatialDims(), size_t(GetPadD()), size_t(GetPadH()), size_t(GetPadW())); + ss << 'x' + << PrintDHW('x', + GetSpatialDims(), + size_t(GetKernelStrideD()), + size_t(GetKernelStrideH()), + size_t(GetKernelStrideW())); + ss << 'x' + << PrintDHW('x', + GetSpatialDims(), + size_t(GetDilationD()), + size_t(GetDilationH()), + size_t(GetDilationW())); ss << 'x' << GetGroupCount(); ss << 'x' << GetDirectionStr(); ss << 'x' << GetAlphaBetaCaseStr(); @@ -267,9 +276,9 @@ void ProblemDescription::Serialize(std::ostream& stream) const stream << sep << GetOutChannels(); stream << sep << PrintDHW(sep, GetSpatialDims(), GetOutDepth(), GetOutHeight(), GetOutWidth()); stream << sep << GetInBatchSize(); - stream << sep << PrintDHW('x', GetSpatialDims(), GetPadD(), GetPadH(), GetPadW()); - stream << sep << PrintDHW('x', GetSpatialDims(), GetKernelStrideD(), GetKernelStrideH(), GetKernelStrideW()); - stream << sep << PrintDHW('x', GetSpatialDims(), GetDilationD(), GetDilationH(), GetDilationW()); + stream << sep << PrintDHW('x', GetSpatialDims(), size_t(GetPadD()), size_t(GetPadH()), size_t(GetPadW())); + stream << sep << PrintDHW('x', GetSpatialDims(), size_t(GetKernelStrideD()), size_t(GetKernelStrideH()), size_t(GetKernelStrideW())); + stream << sep << PrintDHW('x', GetSpatialDims(), size_t(GetDilationD()), size_t(GetDilationH()), size_t(GetDilationW())); stream << sep << GetBias(); if ((GetInLayout() == "NCHW" && GetWeightsLayout() == "NCHW" && GetOutLayout() == "NCHW") || (GetInLayout() == "NCDHW" && GetWeightsLayout() == "NCDHW" && GetOutLayout() == "NCDHW")) diff --git a/projects/miopen/src/conv/solver_finders.cpp b/projects/miopen/src/conv/solver_finders.cpp index bf0ec9c45600..27f4b7a2cbf3 100644 --- a/projects/miopen/src/conv/solver_finders.cpp +++ b/projects/miopen/src/conv/solver_finders.cpp @@ -275,7 +275,7 @@ std::vector EvaluateInvokers(const Handle& handle, << sol.solver_id); continue; } - skip_time *= env::value(MIOPEN_FIND_SKIP_PCT) / 100.0f; + skip_time *= float(env::value(MIOPEN_FIND_SKIP_PCT)) / 100.0f; } MIOPEN_LOG_I("Evaluating Solver: " << algorithm_name.ToString() << ":" << sol.solver_id); diff --git a/projects/miopen/src/convolution.cpp b/projects/miopen/src/convolution.cpp index 53b8b5fad639..b3fecfb22f3e 100644 --- a/projects/miopen/src/convolution.cpp +++ b/projects/miopen/src/convolution.cpp @@ -251,7 +251,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor& } else if(mode == miopenTranspose) { - if(in_c != wei_k || (group_count > 1 && (wei_k % group_count != 0))) + if(in_c != wei_k || (group_count > 1 && (wei_k % size_t(group_count) != 0))) { MIOPEN_THROW(miopenStatusBadParm, "Channels do not match for the filter"); } @@ -284,7 +284,8 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor& for(int i = 0; i < spatial_dim; ++i) { - out_spatial[i] = miopen::integer_division_ceil(in_spatial[i], GetConvStrides()[i]); + out_spatial[i] = + size_t(miopen::integer_division_ceil(in_spatial[i], GetConvStrides()[unsigned(i)])); } } else if(paddingMode == miopenPaddingValid && mode == miopenConvolution && @@ -294,8 +295,8 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor& for(int i = 0; i < spatial_dim; ++i) { - out_spatial[i] = miopen::integer_division_ceil( - std::ptrdiff_t(in_spatial[i]) - wei_spatial[i] + 1, GetConvStrides()[i]); + out_spatial[i] = size_t(miopen::integer_division_ceil( + std::ptrdiff_t(in_spatial[i] - wei_spatial[i] + 1), GetConvStrides()[unsigned(i)])); } } else if(paddingMode == miopenPaddingDefault || paddingMode == miopenPaddingSame || @@ -303,31 +304,31 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor& { if(mode == miopenTranspose) { - out_c = wei_c * group_count; + out_c = wei_c * size_t(group_count); - for(int i = 0; i < spatial_dim; ++i) + for(unsigned i = 0; i < spatial_dim; ++i) { - out_spatial[i] = std::max( + out_spatial[i] = size_t(std::max( 1, GetConvStrides()[i] * (std::ptrdiff_t(in_spatial[i]) - 1) + 1 + GetConvDilations()[i] * (std::ptrdiff_t(wei_spatial[i]) - 1) - 2 * static_cast(GetConvPads()[i]) + - GetTransposeConvPads()[i]); + GetTransposeConvPads()[i])); } } else { out_c = wei_k / wDesc.GetVectorLength(); - for(int i = 0; i < spatial_dim; ++i) + for(unsigned i = 0; i < spatial_dim; ++i) { - out_spatial[i] = std::max( + out_spatial[i] = size_t(std::max( 1, (ptrdiff_t(in_spatial[i]) - (1 + GetConvDilations()[i] * (std::ptrdiff_t(wei_spatial[i]) - 1)) + 2 * static_cast(GetConvPads()[i])) / GetConvStrides()[i] + - 1); + 1)); } } } @@ -346,7 +347,7 @@ ConvolutionDescriptor::GetForwardOutputTensorWithLayout(const TensorDescriptor& : xDesc.GetType()), // TODO: This function overrides the output type with // essentially the input which is incorrect. TensorDescriptor::StringToLayoutType( - yLayout, xDesc.IsVectorized(), xDesc.GetVectorLength()), + yLayout, xDesc.IsVectorized(), int(xDesc.GetVectorLength())), out_lens, out_strides}; } diff --git a/projects/miopen/src/convolution_api.cpp b/projects/miopen/src/convolution_api.cpp index 18d5c6496e95..1cc47973b53b 100644 --- a/projects/miopen/src/convolution_api.cpp +++ b/projects/miopen/src/convolution_api.cpp @@ -161,15 +161,16 @@ extern "C" miopenStatus_t miopenInitConvolutionNdDescriptor(miopenConvolutionDes const auto dilations = std::vector(dilationA, dilationA + spatialDim); MIOPEN_LOG_FUNCTION(convDesc, spatialDim, pads, strides, dilations, c_mode); return miopen::try_([&] { - miopen::deref(convDesc) = miopen::ConvolutionDescriptor(spatialDim, - c_mode, - miopenPaddingDefault, - pads, - strides, - dilations, - std::vector(spatialDim, 0), - 1, - 1.0); + miopen::deref(convDesc) = + miopen::ConvolutionDescriptor(size_t(spatialDim), + c_mode, + miopenPaddingDefault, + pads, + strides, + dilations, + std::vector(unsigned(spatialDim), 0), + 1, + 1.0); }); } @@ -233,9 +234,9 @@ miopenConvolutionABBackwardWeightsGetWorkSpaceSize(const miopenAlphaBetaCase_t a return miopen::try_([&] { miopenDataType_t data_type = miopen::deref(outputTensorDesc).GetType(); - size_t spatial_dims = miopen::deref(convDesc).GetSpatialDimension(); + unsigned spatial_dims = unsigned(miopen::deref(convDesc).GetSpatialDimension()); - int G = miopen::deref(convDesc).GetGroupCount(); + size_t G = size_t(miopen::deref(convDesc).GetGroupCount()); size_t K = std::get<1>( miopen::GetNCDHW(spatial_dims, miopen::deref(inputTensorDesc).GetLengths())); size_t C = std::get<1>( @@ -246,7 +247,7 @@ miopenConvolutionABBackwardWeightsGetWorkSpaceSize(const miopenAlphaBetaCase_t a size_t K_, miopenDataType_t data_type_, miopenAlphaBetaCase_t alpha_beta_case_) { - auto is_odd = [](int num) { return num % 2 != 0; }; + auto is_odd = [](size_t num) { return num % 2 != 0; }; size_t C_per_group = C_ / G_; size_t K_per_group = K_ / G_; @@ -383,7 +384,7 @@ extern "C" miopenStatus_t miopenGetConvolutionNdDescriptor(miopenConvolutionDesc { MIOPEN_LOG_FUNCTION(convDesc, requestedSpatialDim); return miopen::try_([&] { - int spatial_dim = miopen::deref(convDesc).GetSpatialDimension(); + int spatial_dim = int(miopen::deref(convDesc).GetSpatialDimension()); if(spatial_dim < requestedSpatialDim) { MIOPEN_THROW("requestedSpatialDim is larger than actual spatial dimension"); @@ -409,7 +410,7 @@ extern "C" miopenStatus_t miopenGetConvolutionSpatialDim(miopenConvolutionDescri { MIOPEN_LOG_FUNCTION(convDesc); return miopen::try_( - [&] { miopen::deref(spatialDim) = miopen::deref(convDesc).GetSpatialDimension(); }); + [&] { miopen::deref(spatialDim) = int(miopen::deref(convDesc).GetSpatialDimension()); }); } MIOPEN_EXPORT extern "C" miopenStatus_t @@ -447,11 +448,11 @@ miopenGetConvolutionNdForwardOutputDim(miopenConvolutionDescriptor_t convDesc, auto out_desc = miopen::deref(convDesc).GetForwardOutputTensor( miopen::deref(inputTensorDesc), miopen::deref(filterDesc)); - miopen::deref(nDim) = out_desc.GetNumDims(); + miopen::deref(nDim) = int(out_desc.GetNumDims()); for(unsigned i = 0; i < out_desc.GetNumDims(); ++i) { - outputTensorDimA[i] = out_desc.GetLengths()[i]; + outputTensorDimA[i] = int(out_desc.GetLengths()[i]); } }); } @@ -757,7 +758,7 @@ static inline void ReturnSolutions(const std::vector& solu *solution_count_ret = solutions.size(); if(solutions_ret != nullptr) { - for(auto i = 0; i < solutions.size(); ++i) + for(size_t i = 0; i < solutions.size(); ++i) solutions_ret[i] = solutions[i]; } } diff --git a/projects/miopen/src/ctc.cpp b/projects/miopen/src/ctc.cpp index 1b997d2775b2..99777e1a7585 100644 --- a/projects/miopen/src/ctc.cpp +++ b/projects/miopen/src/ctc.cpp @@ -38,15 +38,15 @@ void CTCLossDescriptor::CTCLoss(const Handle& handle, MIOPEN_THROW("probs tensor's dimension does not match gradients tensor's dimension"); } - int class_sz = probsDesc.GetLengths()[2]; - int batch_size = probsDesc.GetLengths()[1]; - int max_time_step = probsDesc.GetLengths()[0]; + size_t class_sz = probsDesc.GetLengths()[2]; + size_t batch_size = probsDesc.GetLengths()[1]; + size_t max_time_step = probsDesc.GetLengths()[0]; std::vector repeat(batch_size, 0); std::vector labels_offset(batch_size, 0); int max_label_len = 0; int total_label_len = 0; - for(int i = 0; i < batch_size; i++) + for(size_t i = 0; i < batch_size; i++) { if(inputLengths[i] > max_time_step) { @@ -77,18 +77,18 @@ void CTCLossDescriptor::CTCLoss(const Handle& handle, } } - int max_S_len = 2 * max_label_len + 1; - int lb_prime_offset = 4 * batch_size + total_label_len; - int problog_offset = lb_prime_offset + batch_size * max_S_len; + size_t max_S_len = 2 * size_t(max_label_len) + 1; + size_t lb_prime_offset = 4 * batch_size + size_t(total_label_len); + size_t problog_offset = lb_prime_offset + batch_size * max_S_len; if(probsDesc.GetType() == miopenHalf) { problog_offset *= 2; } - int alpha_offset = problog_offset + class_sz * batch_size * max_time_step; - int beta_offset = alpha_offset + max_time_step * batch_size * max_S_len; - int batch_bytes = 4 * batch_size; // batch size multiples sizeof(int) + size_t alpha_offset = problog_offset + class_sz * batch_size * max_time_step; + size_t beta_offset = alpha_offset + max_time_step * batch_size * max_S_len; + size_t batch_bytes = sizeof(int) * batch_size; (void)hipMemcpyWithStream(static_cast(workSpace), inputLengths, @@ -112,7 +112,7 @@ void CTCLossDescriptor::CTCLoss(const Handle& handle, handle.GetStream()); (void)hipMemcpyWithStream(static_cast(workSpace) + 4 * static_cast(batch_size), labels, - total_label_len * sizeof(int), + size_t(total_label_len) * sizeof(int), hipMemcpyHostToDevice, handle.GetStream()); @@ -131,8 +131,8 @@ void CTCLossDescriptor::CTCLoss(const Handle& handle, if(apply_softmax_layer) { std::vector sfm_size(4, 1); - sfm_size[0] = max_time_step * batch_size; - sfm_size[1] = class_sz; + sfm_size[0] = int(max_time_step * batch_size); + sfm_size[1] = int(class_sz); auto sfm_desc = miopen::TensorDescriptor(probsDesc.GetType(), sfm_size); float alpha = 1; @@ -147,7 +147,7 @@ void CTCLossDescriptor::CTCLoss(const Handle& handle, MIOPEN_SOFTMAX_LOG, MIOPEN_SOFTMAX_MODE_CHANNEL, 0, - problog_offset); + int(problog_offset)); if(handle.IsProfilingEnabled()) { time += handle.GetKernelTime(); @@ -180,7 +180,7 @@ void CTCLossDescriptor::CTCLoss(const Handle& handle, } else if(blank_label_id >= class_sz) { - blank_label = class_sz - 1; + blank_label = int(class_sz) - 1; } else { @@ -273,9 +273,9 @@ size_t CTCLossDescriptor::GetCTCLossWorkspaceSize(const Handle& handle, "The probability tensor's dimensions do not match the gradient tensor's dimensions"); } - int class_sz = probsDesc.GetLengths()[2]; - int batch_size = probsDesc.GetLengths()[1]; - int max_time_step = probsDesc.GetLengths()[0]; + int class_sz = int(probsDesc.GetLengths()[2]); + size_t batch_size = probsDesc.GetLengths()[1]; + int max_time_step = int(probsDesc.GetLengths()[0]); int max_label_len = 0; int total_label_len = 0; std::vector repeat(batch_size, 0); @@ -283,7 +283,7 @@ size_t CTCLossDescriptor::GetCTCLossWorkspaceSize(const Handle& handle, size_t wksp_sz_lb = 0; size_t wksp_sz_dat = 0; - for(int i = 0; i < batch_size; i++) + for(size_t i = 0; i < batch_size; i++) { if(inputLengths[i] > max_time_step) { @@ -325,21 +325,19 @@ size_t CTCLossDescriptor::GetCTCLossWorkspaceSize(const Handle& handle, wksp_sz_lb += batch_size; // labels - wksp_sz_lb += total_label_len; + wksp_sz_lb += size_t(total_label_len); // labels with blanks - wksp_sz_lb += static_cast(batch_size) * (2 * static_cast(max_label_len) + 1); + wksp_sz_lb += batch_size * (2 * size_t(max_label_len) + 1); // logsoftmax of probs - wksp_sz_dat += static_cast(max_time_step) * batch_size * class_sz; + wksp_sz_dat += size_t(max_time_step) * batch_size * size_t(class_sz); // alphas - wksp_sz_dat += static_cast(max_time_step) * batch_size * - (2 * static_cast(max_label_len) + 1); + wksp_sz_dat += size_t(max_time_step) * batch_size * (2 * size_t(max_label_len) + 1); // beta buffer - wksp_sz_dat += - 2 * static_cast(batch_size) * (2 * static_cast(max_label_len) + 1); + wksp_sz_dat += 2 * batch_size * (2 * size_t(max_label_len) + 1); size_t total_size = wksp_sz_dat * sizeof(float) + wksp_sz_lb * sizeof(int); if(total_size > handle.GetMaxMemoryAllocSize()) diff --git a/projects/miopen/src/db.cpp b/projects/miopen/src/db.cpp index 9ed3d5533a9d..99b67523060a 100644 --- a/projects/miopen/src/db.cpp +++ b/projects/miopen/src/db.cpp @@ -191,10 +191,10 @@ std::optional PlainTextDb::FindRecordUnsafe(const std::string& key, Re static void Copy(std::istream& from, std::ostream& to, std::streamoff count) { - constexpr auto buffer_size_limit = 4 * 1024 * 1024; - const auto buffer_size = std::min(buffer_size_limit, count); - auto buffer = std::vector(buffer_size, 0); - auto left = count; + constexpr auto buffer_size_limit{4 * 1024 * 1024}; + const auto buffer_size = std::min(buffer_size_limit, count); + auto buffer = std::vector(size_t(buffer_size), 0); + auto left = count; while(left > 0 && !from.eof()) { diff --git a/projects/miopen/src/execution_context.cpp b/projects/miopen/src/execution_context.cpp index b82f81395c29..c87f5dfd3d81 100644 --- a/projects/miopen/src/execution_context.cpp +++ b/projects/miopen/src/execution_context.cpp @@ -136,7 +136,7 @@ static bool CalculateIsAmdRocmOpencl(const miopen::ExecutionContext& context) static rocm_meta_version AmdRocmMetadataVersionGetEnv() { - rocm_meta_version val{env::value(MIOPEN_DEBUG_AMD_ROCM_METADATA_ENFORCE)}; + rocm_meta_version val = int(env::value(MIOPEN_DEBUG_AMD_ROCM_METADATA_ENFORCE)); if(!val.IsValid()) { MIOPEN_LOG_W("Incorrect MIOPEN_DEBUG_AMD_ROCM_ENFORCE_MDVERSION = " << val.getValue() diff --git a/projects/miopen/src/find_controls.cpp b/projects/miopen/src/find_controls.cpp index 962ea41556cd..e90b668ffd0f 100644 --- a/projects/miopen/src/find_controls.cpp +++ b/projects/miopen/src/find_controls.cpp @@ -84,7 +84,7 @@ FindEnforceAction GetFindEnforceActionImpl() if(str.empty()) return FindEnforceAction::Default_; for(auto& c : str) - c = toupper(static_cast(c)); + c = static_cast(toupper(c)); if(str == "NONE") { return FindEnforceAction::None; @@ -125,7 +125,7 @@ std::optional> GetEnvFindOnlySolverImpl() const auto solver_list = miopen::SplitDelim(slv_str, ';'); for(const auto& kinder : solver_list) { - auto numeric_id = std::strtoul(kinder.c_str(), nullptr, 10); + auto numeric_id = std::strtoull(kinder.c_str(), nullptr, 10); if(errno == ERANGE || numeric_id == 0) { // Assume string in the environment. Try to convert it to numeric id. errno = 0; @@ -209,7 +209,7 @@ std::optional GetFindModeValueImpl2(Variable variable) if(str.empty()) return std::nullopt; for(auto& c : str) - c = toupper(static_cast(c)); + c = static_cast(toupper(c)); if(str == "NORMAL") { return FindMode::Values::Normal; diff --git a/projects/miopen/src/find_db.cpp b/projects/miopen/src/find_db.cpp index 0d260cf1083f..ce3656c30e59 100644 --- a/projects/miopen/src/find_db.cpp +++ b/projects/miopen/src/find_db.cpp @@ -161,7 +161,7 @@ fs::path FindDbRecord_t::GetInstalledPathFile(const Handle& handle, } const auto db_id = handle.GetTargetProperties().DbId(); - const int real_cu_count = handle.GetMaxComputeUnits(); + const int real_cu_count = int(handle.GetMaxComputeUnits()); int closest_cu = std::numeric_limits::max(); for(const auto& entry : all_files) @@ -271,7 +271,7 @@ void FindDbRecord_t::CopyTo(std::vector& to) const { const auto range = content->As(); std::transform(range.begin(), range.end(), std::back_inserter(to), [](const auto& pair) { - return Solution{solver::Id{pair.first}, pair.second.time, pair.second.workspace}; + return Solution{solver::Id{pair.first}, pair.second.time, size_t(pair.second.workspace)}; }); } else diff --git a/projects/miopen/src/fusion.cpp b/projects/miopen/src/fusion.cpp index a7ce5f6980f8..a2d7aeba111a 100644 --- a/projects/miopen/src/fusion.cpp +++ b/projects/miopen/src/fusion.cpp @@ -116,9 +116,9 @@ miopenStatus_t ConvBiasActivFusion(const Handle& handle, MIOPEN_CHECK(fusePlanDesc.Compile(handle)); float alpha = 1.0f; float beta = 0.0f; - float activ_alpha = activationDesc.GetAlpha(); - float activ_beta = activationDesc.GetBeta(); - float activ_gamma = activationDesc.GetGamma(); + float activ_alpha = float(activationDesc.GetAlpha()); + float activ_beta = float(activationDesc.GetBeta()); + float activ_gamma = float(activationDesc.GetGamma()); // Set the Args MIOPEN_CHECK(convOp->SetArgs(fusionArgs, &falpha1, &beta, w)); @@ -164,8 +164,8 @@ AllocateBuffersAndMakeFusionInvokeParams(const Handle& handle, if(conv_id != -1) { - const auto conv_problem = - problem.GetConvProblem(conv_id, conv::Direction::Forward, bias_id != -1 ? 1 : 0); + const auto conv_problem = problem.GetConvProblem( + size_t(conv_id), conv::Direction::Forward, bias_id != -1 ? 1 : 0); gfx90aaltimpl = conv_problem.GetConv().attribute.gfx90aFp16alt.GetFwd(); in_desc = conv_problem.GetIn(); @@ -176,11 +176,13 @@ AllocateBuffersAndMakeFusionInvokeParams(const Handle& handle, bias_ptr = allocate_buffer(conv_problem.GetBiasSize()); MIOPEN_LOG_I("bias addr: " << bias_ptr << ", size: " << conv_problem.GetBiasSize()); - params.SetArg(bias_id, std::make_unique(bias_ptr)); + params.SetArg(size_t(bias_id), + std::make_unique(bias_ptr)); } auto wei_ptr = allocate_buffer(conv_problem.GetWeightsSize()); - params.SetArg(conv_id, std::make_unique(wei_ptr)); + params.SetArg(size_t(conv_id), + std::make_unique(wei_ptr)); MIOPEN_LOG_I("weight addr: " << wei_ptr << ", size: " << conv_problem.GetWeightsSize()); } @@ -194,22 +196,22 @@ AllocateBuffersAndMakeFusionInvokeParams(const Handle& handle, if(activ_fwd_id != -1) { const auto& activ_op = - dynamic_cast(*plan.op_map[activ_fwd_id]); + dynamic_cast(*plan.op_map[size_t(activ_fwd_id)]); - params.SetArg(activ_fwd_id, + params.SetArg(size_t(activ_fwd_id), std::make_unique( alpha, beta, gamma, activ_op.activMode)); } else if(activ_bwd_id != -1) { const auto& activ_op = - dynamic_cast(*plan.op_map[activ_bwd_id]); + dynamic_cast(*plan.op_map[size_t(activ_bwd_id)]); const auto space = activ_op.input_desc.GetNumBytes(); auto x = allocate_buffer(space); auto y = allocate_buffer(space); - params.SetArg(activ_bwd_id, + params.SetArg(size_t(activ_bwd_id), std::make_unique( y, x, alpha, beta, gamma)); } @@ -218,14 +220,14 @@ AllocateBuffersAndMakeFusionInvokeParams(const Handle& handle, if(tensor_add_op_id != -1) { const auto& tensor_add_op = - dynamic_cast(*plan.op_map[tensor_add_op_id]); + dynamic_cast(*plan.op_map[size_t(tensor_add_op_id)]); assert(&tensor_add_op); float alpha = 1.0f; const auto space = tensor_add_op.tensor_desc.GetNumBytes(); auto ptr = allocate_buffer(space); - params.SetArg(tensor_add_op_id, + params.SetArg(size_t(tensor_add_op_id), std::make_unique(alpha, ptr)); } @@ -238,8 +240,8 @@ AllocateBuffersAndMakeFusionInvokeParams(const Handle& handle, if(bn_inf_id != -1) { - const auto& bn_op = - dynamic_cast(*plan.op_map[bn_inf_id]); + const auto& bn_op = dynamic_cast( + *plan.op_map[size_t(bn_inf_id)]); out_desc = in_desc = bn_op.input_desc; @@ -259,7 +261,7 @@ AllocateBuffersAndMakeFusionInvokeParams(const Handle& handle, else if(bn_fwd_id != -1) { const auto& bn_op = - dynamic_cast(*plan.op_map[bn_fwd_id]); + dynamic_cast(*plan.op_map[size_t(bn_fwd_id)]); out_desc = in_desc = bn_op.input_desc; @@ -291,7 +293,7 @@ AllocateBuffersAndMakeFusionInvokeParams(const Handle& handle, else if(bn_bwd_id != -1) { const auto& bn_op = - dynamic_cast(*plan.op_map[bn_bwd_id]); + dynamic_cast(*plan.op_map[size_t(bn_bwd_id)]); out_desc = in_desc = bn_op.input_desc; @@ -482,7 +484,7 @@ miopenStatus_t FusionPlanDescriptor::GetOp(int op_idx, std::shared_ptr algos = {miopenConvolutionFwdAlgoDirect, miopenConvolutionFwdAlgoWinograd}; retAlgoCount = std::min(reqAlgoCount, static_cast(algos.size())); - for(auto idx = 0; idx < retAlgoCount; idx++) + for(size_t idx = 0; idx < retAlgoCount; idx++) { ptrAlgos[idx] = algos[idx]; } @@ -561,7 +563,7 @@ miopenStatus_t ConvForwardOpDescriptor::SetArgs(OperatorArgs& args, float falpha = alpha != nullptr ? *reinterpret_cast(alpha) : 1.0f; float fbeta = beta != nullptr ? *reinterpret_cast(beta) : 0.0f; auto op_args = std::make_unique(falpha, fbeta, w); - args.SetArg(GetIdx(), std::move(op_args)); + args.SetArg(size_t(GetIdx()), std::move(op_args)); return miopenStatusSuccess; } @@ -576,7 +578,7 @@ miopenStatus_t ActivFwdFusionOpDescriptor::SetArgs(OperatorArgs& args, { auto op_args = std::make_unique( activAlpha, activBeta, activGamma, activMode); - args.SetArg(GetIdx(), std::move(op_args)); + args.SetArg(size_t(GetIdx()), std::move(op_args)); return miopenStatusSuccess; } @@ -598,7 +600,7 @@ miopenStatus_t ActivBwdFusionOpDescriptor::SetArgs(OperatorArgs& args, { auto op_args = std::make_unique( y, x, activAlpha, activBeta, activGamma); - args.SetArg(GetIdx(), std::move(op_args)); + args.SetArg(size_t(GetIdx()), std::move(op_args)); return miopenStatusSuccess; } @@ -621,7 +623,7 @@ miopenStatus_t BatchNormInferenceFusionOpDescriptor::SetArgs(OperatorArgs& args, { auto op_args = std::make_unique( bnScale, bnBias, estimatedMean, estimatedVariance, epsilon); - args.SetArg(GetIdx(), std::move(op_args)); + args.SetArg(size_t(GetIdx()), std::move(op_args)); return miopenStatusSuccess; } @@ -659,7 +661,7 @@ miopenStatus_t BatchNormFwdTrainFusionOpDescriptor::SetArgs(OperatorArgs& args, bnBias, expAvgFactor, epsilon); - args.SetArg(GetIdx(), std::move(op_args)); + args.SetArg(size_t(GetIdx()), std::move(op_args)); return miopenStatusSuccess; } @@ -686,7 +688,7 @@ miopenStatus_t BatchNormBwdTrainFusionOpDescriptor::SetArgs(OperatorArgs& args, { auto op_args = std::make_unique( x, bnScale, bnBias, resBnScaleDiff, resBnBiasDiff, savedMean, savedInvVariance); - args.SetArg(GetIdx(), std::move(op_args)); + args.SetArg(size_t(GetIdx()), std::move(op_args)); return miopenStatusSuccess; } miopenStatus_t @@ -711,7 +713,7 @@ miopenStatus_t BiasFusionOpDescriptor::SetArgs(OperatorArgs& args, ConstData_t bdata) { auto op_args = std::make_unique(bdata); - args.SetArg(GetIdx(), std::move(op_args)); + args.SetArg(size_t(GetIdx()), std::move(op_args)); return miopenStatusSuccess; } @@ -725,7 +727,7 @@ miopenStatus_t TensorScaleAddOpDescriptor::SetArgs(OperatorArgs& args, float alpha, ConstData_t tensor_ptr) { auto op_args = std::make_unique(alpha, tensor_ptr); - args.SetArg(GetIdx(), std::move(op_args)); + args.SetArg(size_t(GetIdx()), std::move(op_args)); return miopenStatusSuccess; } @@ -989,7 +991,7 @@ std::vector GetSolutions(const FusionContext& ctx, // algorithm doesn't matter for our purpose here, so we stub it out interim.emplace_back(miopenConvSolution_t{pair.second.time, - pair.second.workspace, + size_t(pair.second.workspace), solver_id.Value(), miopenConvolutionAlgoDirect}); } diff --git a/projects/miopen/src/fusion/problem_description.cpp b/projects/miopen/src/fusion/problem_description.cpp index 2935f1adf4fa..bd2f3b816e0c 100644 --- a/projects/miopen/src/fusion/problem_description.cpp +++ b/projects/miopen/src/fusion/problem_description.cpp @@ -41,7 +41,7 @@ conv::ProblemDescription FusionDescription::GetConvProblem(conv::Direction dir, case conv::Direction::BackwardWeights: MIOPEN_THROW(miopenStatusNotImplemented); } }(); - return GetConvProblem(idx, dir, bias); + return GetConvProblem(size_t(idx), dir, bias); } } // namespace miopen diff --git a/projects/miopen/src/getitem_api.cpp b/projects/miopen/src/getitem_api.cpp index 094f44620f8f..81442ae76748 100644 --- a/projects/miopen/src/getitem_api.cpp +++ b/projects/miopen/src/getitem_api.cpp @@ -59,7 +59,7 @@ static void LogCmdGetitem(const miopenTensorDescriptor_t dyDesc, std::string dy_s; auto dy_dims = miopen::deref(dyDesc).GetLengths(); - for(int i = 0; i < dy_dims.size(); i++) + for(size_t i = 0; i < dy_dims.size(); i++) { dy_s += std::to_string(dy_dims[i]); if(i != dy_dims.size() - 2) @@ -71,7 +71,7 @@ static void LogCmdGetitem(const miopenTensorDescriptor_t dyDesc, { std::string index_s; auto index_dims = miopen::deref(indexDescs[i]).GetLengths(); - for(int j = 0; j < index_dims.size(); j++) + for(size_t j = 0; j < index_dims.size(); j++) { index_s += std::to_string(index_dims[j]); if(j != index_dims.size() - 2) @@ -83,7 +83,7 @@ static void LogCmdGetitem(const miopenTensorDescriptor_t dyDesc, std::string dx_s; auto dx_dims = miopen::deref(dxDesc).GetLengths(); - for(int i = 0; i < dx_dims.size(); i++) + for(size_t i = 0; i < dx_dims.size(); i++) { dx_s += std::to_string(dx_dims[i]); if(i != dx_dims.size() - 2) @@ -93,7 +93,7 @@ static void LogCmdGetitem(const miopenTensorDescriptor_t dyDesc, ss << " -dx " << dx_s; std::string dims_s; - for(int i = 0; i < dimCount; i++) + for(uint32_t i = 0; i < dimCount; i++) { dims_s += std::to_string(dims[i]); if(i != dimCount - 2) @@ -102,7 +102,7 @@ static void LogCmdGetitem(const miopenTensorDescriptor_t dyDesc, ss << " -dims" << dims_s; std::string slices_s; - for(int i = 0; i < sliceCount; i++) + for(uint32_t i = 0; i < sliceCount; i++) { slices_s += std::to_string(slices[i]); if(i != sliceCount - 2) diff --git a/projects/miopen/src/include/miopen/batchnorm/problem_description.hpp b/projects/miopen/src/include/miopen/batchnorm/problem_description.hpp index 7230d93709e6..a4497822b868 100644 --- a/projects/miopen/src/include/miopen/batchnorm/problem_description.hpp +++ b/projects/miopen/src/include/miopen/batchnorm/problem_description.hpp @@ -357,7 +357,7 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, NetworkConfig MakeNetworkConfig() const override; template - static void Visit(Self&& self, std::function f) + static void Visit(Self&& self, std::function f) { // The column names match the driver command line argument names f(self.spatial_dim, "spatial_dim"); @@ -425,8 +425,8 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, std::string in_layout = "NCHW"; std::string out_layout = "NCHW"; std::string din_layout = "NCHW"; - std::size_t spatial_dim = 2; - std::size_t min_workgroups = 1; + unsigned spatial_dim = 2; + size_t min_workgroups = 1; ActivationDescriptor activDesc; @@ -435,7 +435,7 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, std::string ComputeOutLayout() const { return ComputeLayout(yOrDyDesc); } std::string ComputeDinLayout() const { return ComputeLayout(dxDesc); } - size_t GetSpatialDims() const { return spatial_dim; } + unsigned GetSpatialDims() const { return spatial_dim; } std::size_t GetBatchSize() const { return GetN5(GetSpatialDims(), xDesc.GetLengths()); } std::size_t GetChannel() const { return GetC5(GetSpatialDims(), xDesc.GetLengths()); } diff --git a/projects/miopen/src/include/miopen/buffer_info.hpp b/projects/miopen/src/include/miopen/buffer_info.hpp index 1757d586eaf7..6583137dc4f2 100644 --- a/projects/miopen/src/include/miopen/buffer_info.hpp +++ b/projects/miopen/src/include/miopen/buffer_info.hpp @@ -68,15 +68,22 @@ enum class LPart_t struct BuffInfo { size_t total_byte_size = 0; - int element_size = 4; + size_t element_size = 4; struct { unsigned int nk = 0, g = 0, c = 0, h = 0, w = 0; } stride{}, byte_stride{}, size{}; BuffInfo() {} - BuffInfo(MemLayout_t layout, int nk, int c, int h, int w, int g, int _element_size); - BuffInfo(MemLayout_t layout, int nk, int c, int h, int w, int _element_size) + BuffInfo(MemLayout_t layout, + unsigned nk, + unsigned c, + unsigned h, + unsigned w, + unsigned g, + size_t _element_size); + BuffInfo( + MemLayout_t layout, unsigned nk, unsigned c, unsigned h, unsigned w, size_t _element_size) : BuffInfo(layout, nk, c, h, w, 1, _element_size) { } @@ -95,7 +102,7 @@ template <> inline unsigned int FillStride(BuffInfo* b, unsigned int cum_stride) { b->stride.h = cum_stride; - b->byte_stride.h = cum_stride * b->element_size; + b->byte_stride.h = static_cast(cum_stride * b->element_size); return b->size.h * cum_stride; } @@ -103,7 +110,7 @@ template <> inline unsigned int FillStride(BuffInfo* b, unsigned int cum_stride) { b->stride.w = cum_stride; - b->byte_stride.w = cum_stride * b->element_size; + b->byte_stride.w = static_cast(cum_stride * b->element_size); return b->size.w * cum_stride; } @@ -111,7 +118,7 @@ template <> inline unsigned int FillStride(BuffInfo* b, unsigned int cum_stride) { b->stride.c = cum_stride; - b->byte_stride.c = cum_stride * b->element_size; + b->byte_stride.c = static_cast(cum_stride * b->element_size); return b->size.c * cum_stride; } @@ -119,7 +126,7 @@ template <> inline unsigned int FillStride(BuffInfo* b, unsigned int cum_stride) { b->stride.nk = cum_stride; - b->byte_stride.nk = cum_stride * b->element_size; + b->byte_stride.nk = static_cast(cum_stride * b->element_size); return b->size.nk * cum_stride; } @@ -127,7 +134,7 @@ template <> inline unsigned int FillStride(BuffInfo* b, unsigned int cum_stride) { b->stride.g = cum_stride; - b->byte_stride.g = cum_stride * b->element_size; + b->byte_stride.g = static_cast(cum_stride * b->element_size); return b->size.g * cum_stride; } diff --git a/projects/miopen/src/include/miopen/cat/problem_description.hpp b/projects/miopen/src/include/miopen/cat/problem_description.hpp index a445cbb50e35..d77a0b7c2eba 100644 --- a/projects/miopen/src/include/miopen/cat/problem_description.hpp +++ b/projects/miopen/src/include/miopen/cat/problem_description.hpp @@ -63,7 +63,7 @@ struct ProblemDescription : ProblemDescriptionBase } auto ydims = yDesc.GetLengths(); - ydims[dim] = 0; + ydims[size_t(dim)] = 0; for(int i = 0; i < xCount; i++) { auto& xdims = xDescs[i]->GetLengths(); @@ -74,7 +74,7 @@ struct ProblemDescription : ProblemDescriptionBase "CatForward: Tensor dimension lengths do not match."); } - for(int j = 0; j < ydims.size(); j++) + for(size_t j = 0; j < ydims.size(); j++) { if((j != dim) && (ydims[j] != xdims[j])) { @@ -82,10 +82,10 @@ struct ProblemDescription : ProblemDescriptionBase "CatForward: Tensor dimension lengths do not match."); } } - ydims[dim] += xdims[dim]; + ydims[size_t(dim)] += xdims[size_t(dim)]; } - if(ydims[dim] != yDesc.GetLengths()[dim]) + if(ydims[size_t(dim)] != yDesc.GetLengths()[size_t(dim)]) { MIOPEN_THROW(miopenStatusBadParm, "CatForward: Tensor dimension lengths do not match."); } diff --git a/projects/miopen/src/include/miopen/conv/asm_implicit_gemm.hpp b/projects/miopen/src/include/miopen/conv/asm_implicit_gemm.hpp index d57028086fcb..a350f108371b 100644 --- a/projects/miopen/src/include/miopen/conv/asm_implicit_gemm.hpp +++ b/projects/miopen/src/include/miopen/conv/asm_implicit_gemm.hpp @@ -168,13 +168,13 @@ HeuristicInitMacroTileNoPadGemmK(size_t gemm_m, size_t gemm_k, const std::vector>& tile_list) { - int m_per_block, n_per_block, k_per_block; + size_t m_per_block, n_per_block, k_per_block; bool found = false; // find exact divide for(const auto& tile : tile_list) { - int mpb, npb, kpb; + size_t mpb, npb, kpb; std::tie(mpb, npb, kpb) = tile; if(gemm_m % mpb == 0 && gemm_n % npb == 0 && gemm_k % kpb == 0) { @@ -189,12 +189,12 @@ HeuristicInitMacroTileNoPadGemmK(size_t gemm_m, if(!found) { size_t min_pad_pixel = std::numeric_limits::max(); - int mpb_pad = 0; - int npb_pad = 0; + size_t mpb_pad{0}; + size_t npb_pad{0}; // first try gemm_m, gemm_n padding for(const auto& tile : tile_list) { - int mpb, npb, kpb; + size_t mpb, npb, kpb; std::tie(mpb, npb, kpb) = tile; if(gemm_k % kpb != 0) continue; @@ -212,7 +212,7 @@ HeuristicInitMacroTileNoPadGemmK(size_t gemm_m, // second, we need find the max k_per_block among the same mpb/npb per block for(const auto& tile : tile_list) { - int mpb, npb, kpb; + size_t mpb, npb, kpb; std::tie(mpb, npb, kpb) = tile; if(mpb == mpb_pad && npb == npb_pad) { @@ -245,8 +245,8 @@ static inline int igemm_split_batch_size(const int hi, const int c, const int data_byte) { - size_t image_size_input = static_cast(c) * hi * wi * data_byte; - size_t image_size_output = static_cast(k) * ho * wo * data_byte; + size_t image_size_input = static_cast(c * hi * wi * data_byte); + size_t image_size_output = static_cast(k * ho * wo * data_byte); constexpr size_t max_tensor_size = 0xffffffffUL; size_t image_size = std::max(image_size_input, image_size_output); @@ -257,7 +257,7 @@ static inline int igemm_split_batch_size(const int hi, // Round up splits: we must find the largest multiple of n, max_n, s.t. // max_n * image_size <= max_tensor_size - size_t max_n = max_tensor_size / image_size; + int max_n = static_cast(max_tensor_size / image_size); if(max_n > n) { max_n = n % max_n; @@ -266,8 +266,8 @@ static inline int igemm_split_batch_size(const int hi, { // find the smallest multiple m of n such that (n / m) * image_size <= max_tensor_size. // once m is known, max_n := (n / m) - size_t m = std::ceil(n / max_n); // m >= n * (image_size / max_tensor_size) - size_t _sqrt_n = std::sqrt(n); + int m = int(std::ceil(n / max_n)); // m >= n * (image_size / max_tensor_size) + int _sqrt_n = int(std::sqrt(n)); while(n % max_n != 0) { if(n % m == 0) diff --git a/projects/miopen/src/include/miopen/conv/invokers/impl_gemm_dynamic.hpp b/projects/miopen/src/include/miopen/conv/invokers/impl_gemm_dynamic.hpp index 3248989c4bdc..0d02a3127293 100644 --- a/projects/miopen/src/include/miopen/conv/invokers/impl_gemm_dynamic.hpp +++ b/projects/miopen/src/include/miopen/conv/invokers/impl_gemm_dynamic.hpp @@ -49,21 +49,21 @@ ComputeDynamicIGemmForwardKernelArgs(const ProblemDescription& problem, con { std::vector opArgs; // clang-format off - int hi = problem.GetInHeight(); - int wi = problem.GetInWidth(); - int n = problem.GetInBatchSize(); - int k = problem.GetOutChannels(); - int c = problem.GetInChannels(); - int ho = problem.GetOutHeight(); - int wo = problem.GetOutWidth(); + int hi = int(problem.GetInHeight()); + int wi = int(problem.GetInWidth()); + int n = int(problem.GetInBatchSize()); + int k = int(problem.GetOutChannels()); + int c = int(problem.GetInChannels()); + int ho = int(problem.GetOutHeight()); + int wo = int(problem.GetOutWidth()); int stride_h = problem.GetKernelStrideH(); int stride_w = problem.GetKernelStrideW(); int dilation_h = problem.GetDilationH(); int dilation_w = problem.GetDilationW(); int pad_h = problem.GetPadH(); int pad_w = problem.GetPadW(); - int y = problem.GetWeightsHeight(); - int x = problem.GetWeightsWidth(); + int y = int(problem.GetWeightsHeight()); + int x = int(problem.GetWeightsWidth()); int pack0 = cfg; // clang-format on @@ -97,21 +97,21 @@ ComputeDynamicIGemmForwardKernelArgs( { std::vector opArgs; // clang-format off - int hi = problem.GetInHeight(); - int wi = problem.GetInWidth(); - int n = problem.GetInBatchSize(); - int k = problem.GetOutChannels(); - int c = problem.GetInChannels(); - int ho = problem.GetOutHeight(); - int wo = problem.GetOutWidth(); + int hi = int(problem.GetInHeight()); + int wi = int(problem.GetInWidth()); + int n = int(problem.GetInBatchSize()); + int k = int(problem.GetOutChannels()); + int c = int(problem.GetInChannels()); + int ho = int(problem.GetOutHeight()); + int wo = int(problem.GetOutWidth()); int stride_h = problem.GetKernelStrideH(); int stride_w = problem.GetKernelStrideW(); int dilation_h = problem.GetDilationH(); int dilation_w = problem.GetDilationW(); int pad_h = problem.GetPadH(); int pad_w = problem.GetPadW(); - int y = problem.GetWeightsHeight(); - int x = problem.GetWeightsWidth(); + int y = int(problem.GetWeightsHeight()); + int x = int(problem.GetWeightsWidth()); int group = problem.GetGroupCount(); int pack0 = 0; // clang-format on @@ -124,19 +124,19 @@ ComputeDynamicIGemmForwardKernelArgs( nxe == 0 ? (ho * wo) : ((ho * wo + nxb - 1) / nxb) * nxb; // pad to nxb modulo when nxe != 0 // init magic division parameters - uint32_t nb_n0 = cfg.tensor_b_cluster_lengths[2] * cfg.tensor_b_thread_lengths[2]; - uint32_t nb_n1b = cfg.tensor_b_cluster_lengths[3] * cfg.tensor_b_thread_lengths[3]; - uint32_t unmerge_sub_n = cfg.gemm_n_per_block / nxb; + uint32_t nb_n0 = uint32_t(cfg.tensor_b_cluster_lengths[2] * cfg.tensor_b_thread_lengths[2]); + uint32_t nb_n1b = uint32_t(cfg.tensor_b_cluster_lengths[3] * cfg.tensor_b_thread_lengths[3]); + uint32_t unmerge_sub_n = uint32_t(cfg.gemm_n_per_block / nxb); uint32_t unmerge_sub_n1 = unmerge_sub_n / nb_n0; - magic_div_u32_t mdiv_0 = magic_div_u32_gen(gemm_m / cfg.gemm_m_per_block); - magic_div_u32_t mdiv_1 = magic_div_u32_gen(b * unmerge_sub_n1 / nb_n1b); - magic_div_u32_t mdiv_2 = magic_div_u32_gen(y * x); - magic_div_u32_t mdiv_3 = magic_div_u32_gen(x); - magic_div_u32_t mdiv_4 = magic_div_u32_gen(b); - magic_div_u32_t mdiv_5 = magic_div_u32_gen(wo); - magic_div_u32_t mdiv_6 = - magic_div_u32_gen((n * b * (gemm_m)) / (cfg.gemm_m_per_block * cfg.gemm_n_per_block)); + magic_div_u32_t mdiv_0 = magic_div_u32_gen(uint32_t(gemm_m / cfg.gemm_m_per_block)); + magic_div_u32_t mdiv_1 = magic_div_u32_gen(uint32_t(b) * unmerge_sub_n1 / nb_n1b); + magic_div_u32_t mdiv_2 = magic_div_u32_gen(uint32_t(y * x)); + magic_div_u32_t mdiv_3 = magic_div_u32_gen(uint32_t(x)); + magic_div_u32_t mdiv_4 = magic_div_u32_gen(uint32_t(b)); + magic_div_u32_t mdiv_5 = magic_div_u32_gen(uint32_t(wo)); + magic_div_u32_t mdiv_6 = magic_div_u32_gen( + uint32_t((n * b * gemm_m) / (cfg.gemm_m_per_block * cfg.gemm_n_per_block))); uint32_t magic_0 = mdiv_0.magic; uint32_t magic_1 = mdiv_1.magic; diff --git a/projects/miopen/src/include/miopen/conv/problem_description.hpp b/projects/miopen/src/include/miopen/conv/problem_description.hpp index 2ba3fde25ac3..749d18be1c6d 100644 --- a/projects/miopen/src/include/miopen/conv/problem_description.hpp +++ b/projects/miopen/src/include/miopen/conv/problem_description.hpp @@ -72,7 +72,7 @@ constexpr TElement GetW3(unsigned spatial_dims, const std::vector& dat template constexpr auto GetCHWN(const std::vector& data) { - return miopen::tien<4>(data, 1); + return miopen::tien<4>(data, 1u); } template @@ -138,7 +138,7 @@ struct ProblemDescription : ProblemDescriptionBase } // Conv descriptor getters - unsigned GetSpatialDims() const { return conv.GetSpatialDimension(); } + unsigned GetSpatialDims() const { return static_cast(conv.GetSpatialDimension()); } int GetPadD() const { return GetD3(GetSpatialDims(), conv.GetConvPads()); } int GetPadH() const { return GetH3(GetSpatialDims(), conv.GetConvPads()); } int GetPadW() const { return GetW3(GetSpatialDims(), conv.GetConvPads()); } @@ -149,7 +149,7 @@ struct ProblemDescription : ProblemDescriptionBase int GetDilationH() const { return GetH3(GetSpatialDims(), conv.GetConvDilations()); } int GetDilationW() const { return GetW3(GetSpatialDims(), conv.GetConvDilations()); } int GetGroupCount() const { return conv.GetGroupCount(); } - int GetVectorLength() const { return in.GetVectorLength(); } + int GetVectorLength() const { return static_cast(in.GetVectorLength()); } // In getters miopenDataType_t GetInDataType() const { return in.GetType(); } @@ -363,7 +363,7 @@ struct ProblemDescription : ProblemDescriptionBase } template - static void Visit(Self&& self, std::function f) + static void Visit(Self&& self, std::function f) { // The column names match the driver command line argument names f(self.GetSpatialDims(), "spatial_dim"); @@ -376,17 +376,17 @@ struct ProblemDescription : ProblemDescriptionBase f(self.GetWeightsDepth(), "fil_d"); f(self.GetOutChannels(), "out_channels"); f(self.GetBatchSize(), "batchsize"); - f(self.GetPadH(), "pad_h"); - f(self.GetPadW(), "pad_w"); - f(self.GetPadD(), "pad_d"); - f(self.GetKernelStrideH(), "conv_stride_h"); - f(self.GetKernelStrideW(), "conv_stride_w"); - f(self.GetKernelStrideD(), "conv_stride_d"); - f(self.GetDilationH(), "dilation_h"); - f(self.GetDilationW(), "dilation_w"); - f(self.GetDilationD(), "dilation_d"); - f(self.GetBias(), "bias"); - f(self.GetGroupCount(), "group_count"); + f(size_t(self.GetPadH()), "pad_h"); + f(size_t(self.GetPadW()), "pad_w"); + f(size_t(self.GetPadD()), "pad_d"); + f(size_t(self.GetKernelStrideH()), "conv_stride_h"); + f(size_t(self.GetKernelStrideW()), "conv_stride_w"); + f(size_t(self.GetKernelStrideD()), "conv_stride_d"); + f(size_t(self.GetDilationH()), "dilation_h"); + f(size_t(self.GetDilationW()), "dilation_w"); + f(size_t(self.GetDilationD()), "dilation_d"); + f(size_t(self.GetBias()), "bias"); + f(size_t(self.GetGroupCount()), "group_count"); } template @@ -404,7 +404,7 @@ struct ProblemDescription : ProblemDescriptionBase template static void VisitAll(Self&& self, const Visitor& f) { - Visit(std::forward(self), [&](int64_t value, std::string name) { f(value, name); }); + Visit(std::forward(self), [&](size_t value, std::string name) { f(value, name); }); Visit(std::forward(self), [&](std::string value, std::string name) { f(value, name); }); } @@ -462,7 +462,7 @@ inline bool IsPointOutput3dStrideEqFilter(const ProblemDescription& problem, if(w_lens.size() != 5 || pads.size() != 3 || strides.size() != 3 || dilations.size() != 3) return false; - for(int i = 0; i < 3; ++i) + for(size_t i = 0; i < 3; ++i) { if(pads[i] != 0 || dilations[i] != 1) return false; diff --git a/projects/miopen/src/include/miopen/db.hpp b/projects/miopen/src/include/miopen/db.hpp index 2d9a02ccd378..34b378fe506f 100644 --- a/projects/miopen/src/include/miopen/db.hpp +++ b/projects/miopen/src/include/miopen/db.hpp @@ -352,7 +352,7 @@ class DbTimer if(logging) { const auto end = std::chrono::high_resolution_clock::now(); - MIOPEN_LOG_I2("Db::" << funcName << " time: " << (end - start).count() * .000001f + MIOPEN_LOG_I2("Db::" << funcName << " time: " << float((end - start).count()) * .000001f << " ms"); } return std::move(ret); // NOLINT(clang-analyzer-cplusplus.Move) diff --git a/projects/miopen/src/include/miopen/execution_context.hpp b/projects/miopen/src/include/miopen/execution_context.hpp index 79e4efccf698..3e74241d96db 100644 --- a/projects/miopen/src/include/miopen/execution_context.hpp +++ b/projects/miopen/src/include/miopen/execution_context.hpp @@ -137,9 +137,9 @@ struct ExecutionContext else { MIOPEN_LOG_I2("inexact embedded perf database search"); - const auto db_id = GetStream().GetTargetProperties().DbId(); - const int real_cu_count = GetStream().GetMaxComputeUnits(); - int closest_cu = std::numeric_limits::max(); + const auto db_id = GetStream().GetTargetProperties().DbId(); + const auto real_cu_count = GetStream().GetMaxComputeUnits(); + int closest_cu = std::numeric_limits::max(); fs::path best_path; for(auto const& entry : miopen_data()) { @@ -207,7 +207,7 @@ struct ExecutionContext { MIOPEN_LOG_I2("inexact perf database search"); const auto db_id = GetStream().GetTargetProperties().DbId(); - const int real_cu_count = GetStream().GetMaxComputeUnits(); + const int real_cu_count = static_cast(GetStream().GetMaxComputeUnits()); if(fs::is_directory(pdb_path)) { MIOPEN_LOG_I2("Iterating over perf db directory " << pdb_path); diff --git a/projects/miopen/src/include/miopen/find_db.hpp b/projects/miopen/src/include/miopen/find_db.hpp index ec1e533e8e71..dbbe409f41ce 100644 --- a/projects/miopen/src/include/miopen/find_db.hpp +++ b/projects/miopen/src/include/miopen/find_db.hpp @@ -184,7 +184,7 @@ class FindDbRecord_t const auto algo = solution.GetSolver().GetAlgo(problem.GetDirection()); record.content->SetValues( solution.GetSolver().ToString(), - FindDbData{solution.GetTime(), solution.GetWorkspaceSize(), algo}); + FindDbData{solution.GetTime(), int64_t(solution.GetWorkspaceSize()), algo}); } return result.solutions; diff --git a/projects/miopen/src/include/miopen/fusion/problem_description.hpp b/projects/miopen/src/include/miopen/fusion/problem_description.hpp index 014e337fb4fe..973bb2dcb577 100644 --- a/projects/miopen/src/include/miopen/fusion/problem_description.hpp +++ b/projects/miopen/src/include/miopen/fusion/problem_description.hpp @@ -221,25 +221,28 @@ struct FusionDescription : ProblemDescriptionBase { if(op->kind() == miopenFusionOpConvForward) { - const auto prob = GetConvProblem(op->GetIdx(), conv::Direction::Forward); + const auto prob = GetConvProblem(size_t(op->GetIdx()), + conv::Direction::Forward); net_config << prob.MakeNetworkConfig().ToString(); } else if(op->kind() == miopenFusionOpBatchNormInference) { - const auto prob = - GetBnProblem(op->GetIdx(), miopen::batchnorm::Direction::ForwardInference); + const auto prob = GetBnProblem(size_t(op->GetIdx()), + miopen::batchnorm::Direction::ForwardInference); net_config << prob.MakeNetworkConfig().ToString(); } else if(op->kind() == miopenFusionOpBatchNormFwdTrain) { const auto prob = - GetBnProblem(op->GetIdx(), miopen::batchnorm::Direction::ForwardTraining); + GetBnProblem(size_t(op->GetIdx()), + miopen::batchnorm::Direction::ForwardTraining); net_config << prob.MakeNetworkConfig().ToString(); } else if(op->kind() == miopenFusionOpBatchNormBwdTrain) { const auto prob = - GetBnProblem(op->GetIdx(), miopen::batchnorm::Direction::Backward); + GetBnProblem(size_t(op->GetIdx()), + miopen::batchnorm::Direction::Backward); net_config << prob.MakeNetworkConfig().ToString(); } else diff --git a/projects/miopen/src/include/miopen/fusion/utils.hpp b/projects/miopen/src/include/miopen/fusion/utils.hpp index 173b9ad1611f..ec2b87e861ec 100644 --- a/projects/miopen/src/include/miopen/fusion/utils.hpp +++ b/projects/miopen/src/include/miopen/fusion/utils.hpp @@ -36,7 +36,7 @@ inline int GetOpIdx(const std::vector>& op_m { auto it = std::find_if( op_map.cbegin(), op_map.cend(), [op](auto&& item) { return item->kind() == op; }); - return it == op_map.cend() ? -1 : std::distance(op_map.cbegin(), it); + return it == op_map.cend() ? -1 : int(std::distance(op_map.cbegin(), it)); } inline bool WinoCommonIsApplicable(const FusionContext& context, const FusionDescription& problem) @@ -71,7 +71,7 @@ inline bool WinoCommonIsApplicable(const FusionContext& context, const FusionDes }(); if(activ_idx != -1) { - const auto& activ_op = dynamic_cast(*desc.op_map[activ_idx]); + const auto& activ_op = dynamic_cast(*desc.op_map[size_t(activ_idx)]); const auto activ_mode = activ_op.activMode; if(!(activ_mode == miopenActivationRELU || activ_mode == miopenActivationLEAKYRELU)) return false; diff --git a/projects/miopen/src/include/miopen/generic_search.hpp b/projects/miopen/src/include/miopen/generic_search.hpp index 9c7b571ad2a8..fb11bead4f0a 100644 --- a/projects/miopen/src/include/miopen/generic_search.hpp +++ b/projects/miopen/src/include/miopen/generic_search.hpp @@ -489,11 +489,11 @@ auto GenericSearch(const Solver s, // terminate search when perf is less than cutoff float cutoff_time = context.generic_search_worst_time; if(cutoff_time < std::numeric_limits::max()) - cutoff_time *= env::value(MIOPEN_SEARCH_CUTOFF_MUL); + cutoff_time *= float(env::value(MIOPEN_SEARCH_CUTOFF_MUL)); // skip detailed measurement for configs slower than skip_time float skip_time = context.generic_search_best_time; if(skip_time < std::numeric_limits::max()) - skip_time *= env::value(MIOPEN_SEARCH_SKIP_PCT) / 100.0f; + skip_time *= float(env::value(MIOPEN_SEARCH_SKIP_PCT)) / 100.0f; bool rec_results = perf_solsp || using_search_cutoff; diff --git a/projects/miopen/src/include/miopen/getitem/problem_description.hpp b/projects/miopen/src/include/miopen/getitem/problem_description.hpp index fed4e78d2258..890c0194b2a2 100644 --- a/projects/miopen/src/include/miopen/getitem/problem_description.hpp +++ b/projects/miopen/src/include/miopen/getitem/problem_description.hpp @@ -66,7 +66,7 @@ struct ProblemDescription : ProblemDescriptionBase IsValidSlices(); } - ProblemDescription(const int32_t indexCount_, const TensorDescriptor* const* indexDescs_) + ProblemDescription(const uint32_t indexCount_, const TensorDescriptor* const* indexDescs_) : indexCount(indexCount_), indexDescs(indexDescs_) { IsValidIndexsLength(); @@ -74,7 +74,7 @@ struct ProblemDescription : ProblemDescriptionBase } const TensorDescriptor& GetDYDesc() const { return dyDesc; } - int32_t GetIndexCount() const { return indexCount; } + uint32_t GetIndexCount() const { return indexCount; } const TensorDescriptor& GetIndexDesc(int i) const { if(i >= indexCount) @@ -85,7 +85,7 @@ struct ProblemDescription : ProblemDescriptionBase } const TensorDescriptor& GetDXDesc() const { return dxDesc; } const TensorDescriptor& GetErrorDesc() const { return errorDesc; } - int32_t GetDimCount() const { return dimCount; } + uint32_t GetDimCount() const { return dimCount; } int32_t GetDim(int i) const { if(i >= indexCount) @@ -94,7 +94,7 @@ struct ProblemDescription : ProblemDescriptionBase } return dims[i]; } - int32_t GetSliceCount() const { return sliceCount; } + uint32_t GetSliceCount() const { return sliceCount; } int32_t GetSlice(int i) const { if(i >= sliceCount) @@ -103,7 +103,7 @@ struct ProblemDescription : ProblemDescriptionBase } return slices[i]; } - int32_t GetOffset() const { return offset; } + uint32_t GetOffset() const { return offset; } bool IsValidIndexsLength() const { diff --git a/projects/miopen/src/include/miopen/layernorm/problem_description.hpp b/projects/miopen/src/include/miopen/layernorm/problem_description.hpp index 61890a159fcc..dd1f9228f38e 100644 --- a/projects/miopen/src/include/miopen/layernorm/problem_description.hpp +++ b/projects/miopen/src/include/miopen/layernorm/problem_description.hpp @@ -199,7 +199,7 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, {}, {}, epsilon_, - xDesc_.GetLengths().size() - 1) + int(xDesc_.GetLengths().size()) - 1) { } @@ -224,7 +224,7 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, dwDesc_, {}, {}, - xDesc_.GetLengths().size() - 1) + int(xDesc_.GetLengths().size()) - 1) { } @@ -356,7 +356,7 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, NetworkConfig MakeNetworkConfig() const override; template - static void Visit(Self&& self, std::function f) + static void Visit(Self&& self, std::function f) { // The column names match the driver command line argument names f(static_cast(self.direction), "direction"); @@ -365,7 +365,7 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, f(self.GetDepth(), "in_d"); f(self.GetHeight(), "in_h"); f(self.GetWidth(), "in_w"); - f(self.normalized_dim, "normalized_dim"); + f(static_cast(self.normalized_dim), "normalized_dim"); f(static_cast(self.mode), "mode"); f(self.stride, "stride"); @@ -380,7 +380,7 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, template static void VisitAll(Self&& self, const Visitor& f) { - Visit(std::forward(self), [&](int64_t value, std::string name) { f(value, name); }); + Visit(std::forward(self), [&](uint64_t value, std::string name) { f(value, name); }); Visit(std::forward(self), [&](std::string value, std::string name) { f(value, name); }); } diff --git a/projects/miopen/src/include/miopen/mlo_internal.hpp b/projects/miopen/src/include/miopen/mlo_internal.hpp index 62ccab567922..d6a0208e04b9 100644 --- a/projects/miopen/src/include/miopen/mlo_internal.hpp +++ b/projects/miopen/src/include/miopen/mlo_internal.hpp @@ -136,25 +136,25 @@ struct AnyInvokeParams; MIOPEN_INTERNALS_EXPORT miopen::PerformanceDb GetDb(const miopen::ExecutionContext& ctx); template -size_t setTopDescFromMLDesc(int spatial_dims, TTo& to, const TensorDescriptor& tensor) +size_t setTopDescFromMLDesc(unsigned spatial_dims, TTo& to, const TensorDescriptor& tensor) { return SetDescFromMLDesc(spatial_dims, to, tensor, &TTo::setTopDescr); } template -size_t setBotDescFromMLDesc(int spatial_dims, TTo& to, const TensorDescriptor& tensor) +size_t setBotDescFromMLDesc(unsigned spatial_dims, TTo& to, const TensorDescriptor& tensor) { return SetDescFromMLDesc(spatial_dims, to, tensor, &TTo::setBotDescr); } template -size_t setTopDfDescFromMLDesc(int spatial_dims, TTo& to, const TensorDescriptor& tensor) +size_t setTopDfDescFromMLDesc(unsigned spatial_dims, TTo& to, const TensorDescriptor& tensor) { return SetDescFromMLDesc(spatial_dims, to, tensor, &TTo::setTopDfDescr); } template -size_t setBotDfDescFromMLDesc(int spatial_dims, TTo& to, const TensorDescriptor& tensor) +size_t setBotDfDescFromMLDesc(unsigned spatial_dims, TTo& to, const TensorDescriptor& tensor) { return SetDescFromMLDesc(spatial_dims, to, tensor, &TTo::setBotDfDescr); } @@ -408,8 +408,8 @@ struct mlo_construct_activ_lrn_pooling_common : mlo_construct_base stride, w_stride); - const int data_len = miopen::GetTypeSize(data_type); - const size_t size = + const int data_len = static_cast(miopen::GetTypeSize(data_type)); + const int size = (layout == "NCHW") ? batch * channels * depth * height * width * data_len : batch * batch_stride * channel_stride * stride * w_stride * data_len; @@ -419,7 +419,7 @@ struct mlo_construct_activ_lrn_pooling_common : mlo_construct_base _out_df_batch_stride = batch_stride; _out_df_channel_stride = channel_stride; _out_df_stride = stride; - _top_df_sz = size; + _top_df_sz = size_t(size); _out_df_layout = layout; _out_df_data_type = miopen::GetDataTypeName(data_type); } @@ -451,8 +451,8 @@ struct mlo_construct_activ_lrn_pooling_common : mlo_construct_base stride, w_stride); - const int data_len = miopen::GetTypeSize(data_type); - const size_t size = + const int data_len = static_cast(miopen::GetTypeSize(data_type)); + const int size = (layout == "NCHW") ? batch * channels * depth * height * width * data_len : batch * batch_stride * channel_stride * stride * w_stride * data_len; @@ -462,7 +462,7 @@ struct mlo_construct_activ_lrn_pooling_common : mlo_construct_base _in_df_batch_stride = batch_stride; _in_df_channel_stride = channel_stride; _in_df_stride = stride; - _bot_df_sz = size; + _bot_df_sz = size_t(size); _in_df_layout = layout; _in_df_data_type = miopen::GetDataTypeName(data_type); } diff --git a/projects/miopen/src/include/miopen/perf_field.hpp b/projects/miopen/src/include/miopen/perf_field.hpp index ce166287508e..14aacbf00525 100644 --- a/projects/miopen/src/include/miopen/perf_field.hpp +++ b/projects/miopen/src/include/miopen/perf_field.hpp @@ -48,12 +48,12 @@ struct PerfField struct FindDbData : solver::Serializable { float time; - std::size_t workspace; + int64_t workspace; std::string algorithm; FindDbData() : time(-1), workspace(-1), algorithm("") {} - FindDbData(float time_, std::size_t workspace_, const std::string& algorithm_) + FindDbData(float time_, int64_t workspace_, const std::string& algorithm_) : time(time_), workspace(workspace_), algorithm(algorithm_) { } diff --git a/projects/miopen/src/include/miopen/performance_config.hpp b/projects/miopen/src/include/miopen/performance_config.hpp index b84eead86c53..e685746f942d 100644 --- a/projects/miopen/src/include/miopen/performance_config.hpp +++ b/projects/miopen/src/include/miopen/performance_config.hpp @@ -75,7 +75,7 @@ struct PerfConfigBaseCK : PerfConfig void Serialize(std::ostream& stream) const final { const Derived& self = static_cast(*this); - stream.write(self.kernel_id.c_str(), self.kernel_id.length()); + stream.write(self.kernel_id.c_str(), std::streamsize(self.kernel_id.length())); } bool Deserialize(const std::string& s) final diff --git a/projects/miopen/src/include/miopen/problem_description.hpp b/projects/miopen/src/include/miopen/problem_description.hpp index 82ef07e8a09c..a216514de1c6 100644 --- a/projects/miopen/src/include/miopen/problem_description.hpp +++ b/projects/miopen/src/include/miopen/problem_description.hpp @@ -38,17 +38,17 @@ namespace miopen { // Tensor Helper APIs template size_t -SetDescFromMLDesc(int spatial_dims, TTo& to, const TensorDescriptor& tensor, const TFunc method) +SetDescFromMLDesc(unsigned spatial_dims, TTo& to, const TensorDescriptor& tensor, const TFunc method) { int n, c, d = 1, h, w; int ns, cs, hs, ws; if(spatial_dims == 3) - std::tie(n, c, d, h, w) = miopen::tien<5>(tensor.GetLengths(), 1); + std::tie(n, c, d, h, w) = miopen::tien<5>(tensor.GetLengths(), 1u); else - std::tie(n, c, h, w) = miopen::tien<4>(tensor.GetLengths(), 1); + std::tie(n, c, h, w) = miopen::tien<4>(tensor.GetLengths(), 1u); - std::tie(ns, cs, hs, ws) = miopen::tien<4>(tensor.GetStrides(), 0); + std::tie(ns, cs, hs, ws) = miopen::tien<4>(tensor.GetStrides(), 0u); (to.*method)("NCHW", tensor.GetType(), n, c, d, h, w, ns, cs, hs, ws); @@ -59,11 +59,12 @@ SetDescFromMLDesc(int spatial_dims, TTo& to, const TensorDescriptor& tensor, con // TODO remove this struct ProblemDescriptionCompatTemporary { - int spatial_dims = 2; - int n_inputs = 0; - int in_height = 0; - int in_width = 0; - int in_depth = 0; + unsigned spatial_dims = 2; + + int n_inputs = 0; + int in_height = 0; + int in_width = 0; + int in_depth = 0; // TODO add check to solver that vectorLength = 1 // int vectorLength = 1; int n_outputs = 0; @@ -86,7 +87,7 @@ struct ProblemDescriptionCompatTemporary int out_channel_stride = 0; int out_batch_stride = 0; - int GetSpatialDims() const { return spatial_dims; } + unsigned GetSpatialDims() const { return spatial_dims; } int GetInChannels() const { return n_inputs; } int GetInHeight() const { return in_height; } int GetInWidth() const { return in_width; } @@ -132,8 +133,8 @@ struct ProblemDescriptionCompatTemporary int w_stride) { batch_sz = batch; - const int data_len = GetTypeSize(data_type); - const size_t size = + const int data_len = int(GetTypeSize(data_type)); + const int size = (layout == "NCHW") ? batch * channels * depth * height * width * data_len : batch * batch_stride * channel_stride * stride * w_stride * data_len; @@ -145,10 +146,10 @@ struct ProblemDescriptionCompatTemporary out_batch_stride = batch_stride; out_channel_stride = channel_stride; out_stride = stride; - top_sz = size; + top_sz = size_t(size); out_layout = layout; out_data_type = data_type; - bias_sz = (bias != 0) ? (n_outputs * data_len) : 0; + bias_sz = (bias != 0) ? size_t(n_outputs * data_len) : 0; } /* @@ -168,8 +169,8 @@ struct ProblemDescriptionCompatTemporary int w_stride) { batch_sz = batch; - const int data_len = GetTypeSize(data_type); - const size_t size = + const int data_len = int(GetTypeSize(data_type)); + const int size = (layout == "NCHW") ? batch * channels * depth * height * width * data_len : batch * batch_stride * channel_stride * stride * w_stride * data_len; @@ -181,7 +182,7 @@ struct ProblemDescriptionCompatTemporary in_batch_stride = batch_stride; in_channel_stride = channel_stride; in_stride = stride; - bot_sz = size; + bot_sz = size_t(size); in_layout = layout; in_data_type = data_type; // _tens_layout = layout; @@ -261,14 +262,15 @@ struct UnifiedDescriptionConv2d if(!problem.Is2d()) MIOPEN_THROW(miopenStatusInternalError, "UnifiedDescriptionConv2d supports only 2D"); - const auto n_inputs_per_group = problem.GetInChannels() / problem.GetGroupCount(); - const auto n_outputs_per_group = problem.GetOutChannels() / problem.GetGroupCount(); + const auto group_count = size_t(problem.GetGroupCount()); + const auto n_inputs_per_group = problem.GetInChannels() / group_count; + const auto n_outputs_per_group = problem.GetOutChannels() / group_count; if(!problem.IsDirectionBackwardWrW()) { R = problem.GetWeightsHeight(); S = problem.GetWeightsWidth(); - U = problem.IsDirectionForward() ? problem.GetKernelStrideH() : 1; - V = problem.IsDirectionForward() ? problem.GetKernelStrideW() : 1; + U = problem.IsDirectionForward() ? size_t(problem.GetKernelStrideH()) : 1; + V = problem.IsDirectionForward() ? size_t(problem.GetKernelStrideW()) : 1; C = n_inputs_per_group; // Bwd: C and K is reversed in ProblemDescription. K = n_outputs_per_group; // Ditto. out_h = problem.GetOutHeight(); // Bwd: height/width is reversed in ProblemDescription. @@ -276,17 +278,17 @@ struct UnifiedDescriptionConv2d N = problem.GetBatchSize(); pad_h = problem.IsDirectionForward() ? problem.GetPadH() : problem.GetBackwardPadH(); pad_w = problem.IsDirectionForward() ? problem.GetPadW() : problem.GetBackwardPadW(); - input_stride_h = problem.IsDirectionForward() ? 1 : problem.GetKernelStrideH(); - input_stride_w = problem.IsDirectionForward() ? 1 : problem.GetKernelStrideW(); - filter_stride_h = problem.GetDilationH(); - filter_stride_w = problem.GetDilationW(); + input_stride_h = problem.IsDirectionForward() ? 1 : size_t(problem.GetKernelStrideH()); + input_stride_w = problem.IsDirectionForward() ? 1 : size_t(problem.GetKernelStrideW()); + filter_stride_h = size_t(problem.GetDilationH()); + filter_stride_w = size_t(problem.GetDilationW()); } else { // WrW R = problem.GetInHeight(); S = problem.GetInWidth(); - U = problem.GetDilationH(); - V = problem.GetDilationW(); + U = size_t(problem.GetDilationH()); + V = size_t(problem.GetDilationW()); C = problem.GetBatchSize(); K = n_inputs_per_group; out_h = problem.GetWeightsHeight(); @@ -296,8 +298,8 @@ struct UnifiedDescriptionConv2d pad_w = problem.GetPadW(); input_stride_h = 1; input_stride_w = 1; - filter_stride_h = problem.GetKernelStrideH(); - filter_stride_w = problem.GetKernelStrideW(); + filter_stride_h = size_t(problem.GetKernelStrideH()); + filter_stride_w = size_t(problem.GetKernelStrideW()); } } }; diff --git a/projects/miopen/src/include/miopen/ramdb.hpp b/projects/miopen/src/include/miopen/ramdb.hpp index 9d3a222b36c1..f365ed0bdc35 100644 --- a/projects/miopen/src/include/miopen/ramdb.hpp +++ b/projects/miopen/src/include/miopen/ramdb.hpp @@ -164,7 +164,7 @@ class DbTimer if(logging) { const auto end = std::chrono::high_resolution_clock::now(); - MIOPEN_LOG_I2("Db::" << funcName << " time: " << (end - start).count() * .000001f + MIOPEN_LOG_I2("Db::" << funcName << " time: " << float((end - start).count()) * .000001f << " ms"); } return std::move(ret); // NOLINT(clang-analyzer-cplusplus.Move) diff --git a/projects/miopen/src/include/miopen/rnn_util.hpp b/projects/miopen/src/include/miopen/rnn_util.hpp index fadc1c837924..b51a5f5f1daf 100644 --- a/projects/miopen/src/include/miopen/rnn_util.hpp +++ b/projects/miopen/src/include/miopen/rnn_util.hpp @@ -341,7 +341,7 @@ inline size_t ReductionWorkspaceSize(const Handle& handle, miopenReduceTensorIndices_t::MIOPEN_REDUCE_TENSOR_NO_INDICES, miopenIndicesType_t::MIOPEN_32BIT_INDICES}; - int bidirect_mp = is_bidirect ? 2 : 1; + size_t bidirect_mp = is_bidirect ? 2 : 1; size_t hy_stride = hsize * bidirect_mp * workspaceScale; diff --git a/projects/miopen/src/include/miopen/seq_tensor.hpp b/projects/miopen/src/include/miopen/seq_tensor.hpp index 60d9f51ff1e1..42f5bbf84965 100644 --- a/projects/miopen/src/include/miopen/seq_tensor.hpp +++ b/projects/miopen/src/include/miopen/seq_tensor.hpp @@ -147,7 +147,7 @@ struct SeqTensorDescriptor : miopenSeqTensorDescriptor private: std::size_t GetTensorRealByteSpaceSeqPacked() const; - static std::vector GetDefaultLayoutVector(int dims) + static std::vector GetDefaultLayoutVector(uint32_t dims) { std::vector layout_default(dims); std::iota(layout_default.begin(), layout_default.end(), 0); diff --git a/projects/miopen/src/include/miopen/softmax/problem_description.hpp b/projects/miopen/src/include/miopen/softmax/problem_description.hpp index d3f42e7ccdc1..541aadaf541e 100644 --- a/projects/miopen/src/include/miopen/softmax/problem_description.hpp +++ b/projects/miopen/src/include/miopen/softmax/problem_description.hpp @@ -106,16 +106,16 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, NetworkConfig MakeNetworkConfig() const override; template - static void Visit(Self&& self, std::function f) + static void Visit(Self&& self, std::function f) { // The column names match the driver command line argument names - f(static_cast(self.isForward), "forw"); + f(self.isForward, "forw"); f(self.GetBatchSize(), "batchsize"); f(self.GetChannels(), "in_channels"); f(self.GetHeight(), "in_h"); f(self.GetWidth(), "in_w"); - f(static_cast(self.algorithm), "algorithm"); - f(static_cast(self.mode), "mode"); + f(self.algorithm, "algorithm"); + f(self.mode, "mode"); } template @@ -128,7 +128,7 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, template static void VisitAll(Self&& self, const Visitor& f) { - Visit(std::forward(self), [&](int64_t value, std::string name) { f(value, name); }); + Visit(std::forward(self), [&](uint64_t value, std::string name) { f(value, name); }); Visit(std::forward(self), [&](std::string value, std::string name) { f(value, name); }); } diff --git a/projects/miopen/src/include/miopen/solution.hpp b/projects/miopen/src/include/miopen/solution.hpp index 3ebf8d4d29a5..51779edf9e74 100644 --- a/projects/miopen/src/include/miopen/solution.hpp +++ b/projects/miopen/src/include/miopen/solution.hpp @@ -127,7 +127,7 @@ struct Solution : miopenSolution kernels.reserve(programs.size()); - for(int i = 0; i < programs.size(); ++i) + for(size_t i = 0; i < programs.size(); ++i) { auto kernel = KernelInfo{}; kernel.program = programs[i]; diff --git a/projects/miopen/src/include/miopen/solver/implicitgemm_util.hpp b/projects/miopen/src/include/miopen/solver/implicitgemm_util.hpp index 3f4e3ecde5d7..f2f847cba11b 100644 --- a/projects/miopen/src/include/miopen/solver/implicitgemm_util.hpp +++ b/projects/miopen/src/include/miopen/solver/implicitgemm_util.hpp @@ -117,7 +117,7 @@ inline static uint32_t GetEPackLength(const ExecutionContext& ctx, bool isXdlopsInvoked) { // Based on data type, Es are packed - int EPACK = 1; + uint32_t EPACK = 1; if(problem.IsFp16()) // for fp16, either 2 or 4 Es could be packed { if(IsXdlopsSupport(ctx) && isXdlopsInvoked) @@ -163,11 +163,11 @@ static inline size_t ComputeLDSRequiredSize(const miopen::conv::ProblemDescripti // Multiplied worst_case_alignment_adjustment by 2 as // Both A and B matrix LDS size is increased. - const std::size_t lds_size = (static_cast(BPerBlock) + KPerBlock) * EPerBlock * - EPACKSize * GetTypeSize(problem.GetInDataType()) * 2 + - 2 * static_cast(worst_case_alignment_adjustment); + const auto lds_size = static_cast((BPerBlock + KPerBlock) * EPerBlock) * + EPACKSize * GetTypeSize(problem.GetInDataType()) * 2 + + 2 * worst_case_alignment_adjustment; - return lds_size; + return static_cast(lds_size); } template diff --git a/projects/miopen/src/include/miopen/solver/problem_description_interpreter.hpp b/projects/miopen/src/include/miopen/solver/problem_description_interpreter.hpp index 21f65887a3bd..ebe0dfdbffa8 100644 --- a/projects/miopen/src/include/miopen/solver/problem_description_interpreter.hpp +++ b/projects/miopen/src/include/miopen/solver/problem_description_interpreter.hpp @@ -46,7 +46,7 @@ struct ProblemInterpreter static int GetBatchN(const miopen::conv::ProblemDescription& problem) { - return problem.GetBatchSize(); + return static_cast(problem.GetBatchSize()); } static auto GetOutputLayout(const miopen::conv::ProblemDescription& problem) @@ -60,9 +60,9 @@ struct ProblemInterpreter static int GetOutputChannelK(const miopen::conv::ProblemDescription& problem) { if(problem.IsDirectionForward()) - return problem.GetOutChannels(); + return static_cast(problem.GetOutChannels()); else - return problem.GetInChannels(); + return static_cast(problem.GetInChannels()); } static auto GetInputLayout(const miopen::conv::ProblemDescription& problem) @@ -76,33 +76,33 @@ struct ProblemInterpreter static int GetInputChannelC(const miopen::conv::ProblemDescription& problem) { if(problem.IsDirectionForward()) - return problem.GetInChannels(); + return static_cast(problem.GetInChannels()); else - return problem.GetOutChannels(); + return static_cast(problem.GetOutChannels()); } static int GetInputDepthDi(const miopen::conv::ProblemDescription& problem) { if(problem.IsDirectionForward()) - return problem.GetInDepth(); + return static_cast(problem.GetInDepth()); else - return problem.GetOutDepth(); + return static_cast(problem.GetOutDepth()); } static int GetInputHeightHi(const miopen::conv::ProblemDescription& problem) { if(problem.IsDirectionForward()) - return problem.GetInHeight(); + return static_cast(problem.GetInHeight()); else - return problem.GetOutHeight(); + return static_cast(problem.GetOutHeight()); } static int GetInputWidthWi(const miopen::conv::ProblemDescription& problem) { if(problem.IsDirectionForward()) - return problem.GetInWidth(); + return static_cast(problem.GetInWidth()); else - return problem.GetOutWidth(); + return static_cast(problem.GetOutWidth()); } static auto GetInputCastType(const miopen::conv::ProblemDescription& problem) @@ -116,25 +116,25 @@ struct ProblemInterpreter static int GetOutputDepthDo(const miopen::conv::ProblemDescription& problem) { if(problem.IsDirectionForward()) - return problem.GetOutDepth(); + return static_cast(problem.GetOutDepth()); else - return problem.GetInDepth(); + return static_cast(problem.GetInDepth()); } static int GetOutputHeightHo(const miopen::conv::ProblemDescription& problem) { if(problem.IsDirectionForward()) - return problem.GetOutHeight(); + return static_cast(problem.GetOutHeight()); else - return problem.GetInHeight(); + return static_cast(problem.GetInHeight()); } static int GetOutputWidthWo(const miopen::conv::ProblemDescription& problem) { if(problem.IsDirectionForward()) - return problem.GetOutWidth(); + return static_cast(problem.GetOutWidth()); else - return problem.GetInWidth(); + return static_cast(problem.GetInWidth()); } static auto GetOutputCastType(const miopen::conv::ProblemDescription& problem) @@ -162,7 +162,7 @@ struct ProblemInterpreter static int GetFilterDepthZ(const miopen::conv::ProblemDescription& problem) { - return problem.GetWeightsDepth(); + return static_cast(problem.GetWeightsDepth()); } static auto GetFilterLayout(const miopen::conv::ProblemDescription& problem) @@ -172,12 +172,12 @@ struct ProblemInterpreter static int GetFilterHeightY(const miopen::conv::ProblemDescription& problem) { - return problem.GetWeightsHeight(); + return static_cast(problem.GetWeightsHeight()); } static int GetFilterWidthX(const miopen::conv::ProblemDescription& problem) { - return problem.GetWeightsWidth(); + return static_cast(problem.GetWeightsWidth()); } // adjust conv_stride_d to 1 if Do is 1 diff --git a/projects/miopen/src/include/miopen/tensor.hpp b/projects/miopen/src/include/miopen/tensor.hpp index 518147960e15..03ec879e969f 100644 --- a/projects/miopen/src/include/miopen/tensor.hpp +++ b/projects/miopen/src/include/miopen/tensor.hpp @@ -335,7 +335,7 @@ template constexpr auto GetNCDHW(unsigned spatial_dims, const std::vector& data) { if(spatial_dims == 3) - return miopen::tien<5>(data, 1); + return miopen::tien<5>(data, 1u); else return std::make_tuple(data[0], data[1], static_cast(1), data[2], data[3]); } diff --git a/projects/miopen/src/include/miopen/tensor_layout.hpp b/projects/miopen/src/include/miopen/tensor_layout.hpp index f5659d7dd3ef..5f289912106c 100644 --- a/projects/miopen/src/include/miopen/tensor_layout.hpp +++ b/projects/miopen/src/include/miopen/tensor_layout.hpp @@ -55,7 +55,7 @@ void tensor_layout_to_strides(const std::vector& len, len_layout.end(), std::back_inserter(strides), [&layout, &dim_to_len](char cur_layout_char) { - auto pos = layout.find(cur_layout_char); + int64_t pos = int64_t(layout.find(cur_layout_char)); if(pos == std::string::npos) { MIOPEN_THROW(std::string("mismatched layout string - ").append(layout)); @@ -101,7 +101,7 @@ void tensor_layout_to_strides(const std::vector& len, MIOPEN_THROW(std::string("mismatched layout string - ").append(base_layout)); } return std::accumulate( - base_layout.begin() + pos + 1, + base_layout.begin() + int64_t(pos) + 1, base_layout.end(), vector_size, [&dim_to_len](T accumulator, char l) { return accumulator * dim_to_len[l]; }); diff --git a/projects/miopen/src/include/miopen/utility/modified_z.hpp b/projects/miopen/src/include/miopen/utility/modified_z.hpp index bdd27f786e71..6cc6d3d01502 100644 --- a/projects/miopen/src/include/miopen/utility/modified_z.hpp +++ b/projects/miopen/src/include/miopen/utility/modified_z.hpp @@ -40,8 +40,8 @@ T mean(const std::vector& data) static_assert(std::is_floating_point_v); MIOPEN_THROW_IF(data.size() == 0, "Cannot find Mean of 0 length data"); - T sumOfValues = std::accumulate(data.begin(), data.end(), 0.0); - return sumOfValues / data.size(); + T sumOfValues = std::accumulate(data.begin(), data.end(), T(0)); + return sumOfValues / T(data.size()); } template @@ -52,7 +52,7 @@ T medianOfSortedData(const std::vector& sortedData) size_t size = sortedData.size(); - T median = (size % 2 == 0) ? (sortedData[size / 2 - 1] + sortedData[size / 2]) / 2.0 + T median = (size % 2 == 0) ? (sortedData[size / 2 - 1] + sortedData[size / 2]) / T(2) : sortedData[size / 2]; return median; diff --git a/projects/miopen/src/include/miopen/utility/transposing_solver.hpp b/projects/miopen/src/include/miopen/utility/transposing_solver.hpp index 05ef402c56e9..fb958e9a54f5 100644 --- a/projects/miopen/src/include/miopen/utility/transposing_solver.hpp +++ b/projects/miopen/src/include/miopen/utility/transposing_solver.hpp @@ -557,20 +557,20 @@ struct BatchedTransposeSolverImpl : TransposePseudoSolver const auto& lens = desc.GetLengths(); constexpr int n_dims = BatchedTransposeTraits::ndims; - const uint32_t n = static_cast(lens[0]); - const uint32_t c = static_cast(lens[1]); + const auto n = static_cast(lens[0]); + const auto c = static_cast(lens[1]); if constexpr(n_dims == 4) { - const uint32_t h = static_cast(lens[2]); - const uint32_t w = static_cast(lens[3]); + const auto h = static_cast(lens[2]); + const auto w = static_cast(lens[3]); return TransposeSolution(ctx, desc.GetType(), n, c, h, w); } else // n_dims == 5 { - const uint32_t d = static_cast(lens[2]); - const uint32_t h = static_cast(lens[3]); - const uint32_t w = static_cast(lens[4]); + const auto d = static_cast(lens[2]); + const auto h = static_cast(lens[3]); + const auto w = static_cast(lens[4]); return TransposeSolution(ctx, desc.GetType(), n, c, d, h, w); } } @@ -984,7 +984,7 @@ struct TransposingSolver : TransposingSolverGetSolution> in_transpose_ifs, diff --git a/projects/miopen/src/include/miopen/write_file.hpp b/projects/miopen/src/include/miopen/write_file.hpp index 4ed51dfb830a..f954b4ab79eb 100644 --- a/projects/miopen/src/include/miopen/write_file.hpp +++ b/projects/miopen/src/include/miopen/write_file.hpp @@ -35,21 +35,24 @@ namespace miopen { inline void WriteFile(std::string_view content, const fs::path& name) { std::ofstream f{name}; - if(f.write(content.data(), content.size()).fail()) + auto content_size = std::streamsize(content.size()); + if(f.write(content.data(), content_size).fail()) MIOPEN_THROW("Failed to write to file"); } inline void WriteFile(const std::vector& content, const fs::path& name) { std::ofstream f{name, std::ios::binary}; - if(f.write(content.data(), content.size()).fail()) + auto content_size = std::streamsize(content.size()); + if(f.write(content.data(), content_size).fail()) MIOPEN_THROW("Failed to write to file"); } inline void WriteFile(const std::vector& content, const fs::path& name) { std::ofstream f{name, std::ios::binary}; - if(f.write(reinterpret_cast(content.data()), content.size()).fail()) + auto content_size = std::streamsize(content.size()); + if(f.write(reinterpret_cast(content.data()), content_size).fail()) MIOPEN_THROW("Failed to write to file"); } diff --git a/projects/miopen/src/kthvalue_api.cpp b/projects/miopen/src/kthvalue_api.cpp index 03405e845eae..b6e0372a7547 100644 --- a/projects/miopen/src/kthvalue_api.cpp +++ b/projects/miopen/src/kthvalue_api.cpp @@ -31,7 +31,7 @@ inline std::ostream& operator<<(std::ostream& os, const std::vector& v) { os << '{'; - for(int i = 0; i < v.size(); ++i) + for(size_t i = 0; i < v.size(); ++i) { if(i != 0) os << ','; diff --git a/projects/miopen/src/layernorm/problem_description.cpp b/projects/miopen/src/layernorm/problem_description.cpp index c8498603847c..519a6502345e 100644 --- a/projects/miopen/src/layernorm/problem_description.cpp +++ b/projects/miopen/src/layernorm/problem_description.cpp @@ -58,7 +58,7 @@ size_t GetOuterSize(const TensorDescriptor& xDesc, int32_t normalized_dim, size_ return outer_size; } -size_t GetInnerSize(const TensorDescriptor& xDesc, int32_t normalized_dim) +size_t GetInnerSize(const TensorDescriptor& xDesc, uint32_t normalized_dim) { size_t inner_size = 1; for(size_t i = normalized_dim; i < xDesc.GetLengths().size(); ++i) diff --git a/projects/miopen/src/legacy_composable_kernel/composable_kernel/include/utility/config.hpp b/projects/miopen/src/legacy_composable_kernel/composable_kernel/include/utility/config.hpp index 0f0cd6572ec6..a95fbcbb2968 100644 --- a/projects/miopen/src/legacy_composable_kernel/composable_kernel/include/utility/config.hpp +++ b/projects/miopen/src/legacy_composable_kernel/composable_kernel/include/utility/config.hpp @@ -146,7 +146,7 @@ enum InMemoryDataOperationEnum_t }; // index type -using index_t = int32_t; +using index_t = uint32_t; } // namespace ck #endif diff --git a/projects/miopen/src/load_file.cpp b/projects/miopen/src/load_file.cpp index 4338f9a96d7a..ec11db58efc6 100644 --- a/projects/miopen/src/load_file.cpp +++ b/projects/miopen/src/load_file.cpp @@ -22,7 +22,7 @@ std::vector LoadFile(const fs::path& path) if(!in.is_open()) MIOPEN_THROW(path.string() + ": file opening error"); std::vector v(size); - if(in.read(v.data(), v.size()).fail()) + if(in.read(v.data(), std::streamsize(v.size())).fail()) MIOPEN_THROW(path.string() + ": file reading error"); return v; } diff --git a/projects/miopen/src/logger.cpp b/projects/miopen/src/logger.cpp index ae0a550bf0ef..7f72af017311 100644 --- a/projects/miopen/src/logger.cpp +++ b/projects/miopen/src/logger.cpp @@ -279,7 +279,7 @@ bool IsLoggingToRoctx() bool IsLogging(const LoggingLevel level, const bool disableQuieting) { - auto enabled_level = env::value(MIOPEN_LOG_LEVEL); + auto enabled_level = static_cast(env::value(MIOPEN_LOG_LEVEL)); if(IsLoggingDebugQuiet() && !disableQuieting) { // Disable all levels higher than fatal. diff --git a/projects/miopen/src/prelu_api.cpp b/projects/miopen/src/prelu_api.cpp index 5e670a0a2bcc..763276221f7d 100644 --- a/projects/miopen/src/prelu_api.cpp +++ b/projects/miopen/src/prelu_api.cpp @@ -31,7 +31,7 @@ inline std::ostream& operator<<(std::ostream& os, const std::vector& v) { os << '{'; - for(int i = 0; i < v.size(); ++i) + for(size_t i = 0; i < v.size(); ++i) { if(i != 0) os << ','; diff --git a/projects/miopen/src/problem.cpp b/projects/miopen/src/problem.cpp index 3ca996a1c020..3f3b242b2d60 100644 --- a/projects/miopen/src/problem.cpp +++ b/projects/miopen/src/problem.cpp @@ -512,7 +512,7 @@ std::vector Problem::FindSolutionsImpl(const Handle& handle, MakeConvInvokeParams(x_desc, x, w_desc, w, y_desc, y, workspace, workspace_size); auto results = - FindConvolution(ctx, conv_problem, invoke_ctx, max_solutions, options.attach_binaries); + FindConvolution(ctx, conv_problem, invoke_ctx, int(max_solutions), options.attach_binaries); auto db = MakeConvDbGetter(ctx); for(auto& result : results) @@ -654,7 +654,7 @@ namespace { inline bool IsValidFilterChannelNumber(const TensorDescriptor& x, const TensorDescriptor& w, const std::optional& layout, - const int groups) + const size_t groups) { if(layout == miopenTensorNCHW // || layout == miopenTensorNCHWc4 // @@ -675,7 +675,7 @@ inline bool IsValidFilterChannelNumber(const TensorDescriptor& x, inline bool IsValidGroupCount(const TensorDescriptor& x, const TensorDescriptor& w, const std::optional& layout, - const int groups) + const size_t groups) { if(groups > 1) // Optimize for speed { @@ -699,9 +699,9 @@ void Problem::ValidateGroupCount(const TensorDescriptor& x, const TensorDescriptor& w, const ConvolutionDescriptor& conv) { + assert(conv.group_count > 0); const auto layout = w.GetLayoutEnum(); - const auto groups = conv.group_count; - assert(groups > 0); + const size_t groups = static_cast(conv.group_count); const auto ok_c = IsValidFilterChannelNumber(x, w, layout, groups); const auto ok_g = IsValidGroupCount(x, w, layout, groups); @@ -711,7 +711,7 @@ void Problem::ValidateGroupCount(const TensorDescriptor& x, MIOPEN_LOG_W(w.GetLayout_str() << "w {" << w.ToString() << "}, " // << "x {" << x.ToString() << "}, " // - << "groups = " << conv.group_count); + << "groups = " << groups); if(!ok_c) MIOPEN_THROW(miopenStatusBadParm, "Invalid filter channel number"); if(!ok_g) @@ -942,7 +942,7 @@ miopenTensorArgumentId_t Problem::GetOutputId() const void FusedProblem::PropagateDescriptors() { - for(auto i = 0; i < problems.size(); ++i) + for(size_t i = 0; i < problems.size(); ++i) { auto& cur = problems[i]; diff --git a/projects/miopen/src/ramdb.cpp b/projects/miopen/src/ramdb.cpp index 68306869bc57..bcb67770f498 100644 --- a/projects/miopen/src/ramdb.cpp +++ b/projects/miopen/src/ramdb.cpp @@ -283,7 +283,8 @@ static void Measure(const std::string& funcName, TFunc&& func) const auto start = std::chrono::high_resolution_clock::now(); func(); const auto end = std::chrono::high_resolution_clock::now(); - MIOPEN_LOG_I("RamDb::" << funcName << " time: " << (end - start).count() * .000001f << " ms"); + MIOPEN_LOG_I("RamDb::" << funcName << " time: " << float((end - start).count()) * .000001f + << " ms"); } bool RamDb::ValidateUnsafe() diff --git a/projects/miopen/src/readonlyramdb.cpp b/projects/miopen/src/readonlyramdb.cpp index 75ad19212a65..596d980bc701 100644 --- a/projects/miopen/src/readonlyramdb.cpp +++ b/projects/miopen/src/readonlyramdb.cpp @@ -79,8 +79,8 @@ static auto Measure(const std::string& funcName, TFunc&& func) const auto start = std::chrono::high_resolution_clock::now(); func(); const auto end = std::chrono::high_resolution_clock::now(); - MIOPEN_LOG_I("ReadonlyRamDb::" << funcName << " time: " << (end - start).count() * .000001f - << " ms"); + MIOPEN_LOG_I("ReadonlyRamDb::" << funcName << " time: " + << float((end - start).count()) * .000001f << " ms"); } void ReadonlyRamDb::ParseAndLoadDb(std::istream& input_stream, bool warn_if_unreadable) diff --git a/projects/miopen/src/reducecalculation_api.cpp b/projects/miopen/src/reducecalculation_api.cpp index 08d9eb2fe76c..54e716a9d049 100644 --- a/projects/miopen/src/reducecalculation_api.cpp +++ b/projects/miopen/src/reducecalculation_api.cpp @@ -55,7 +55,7 @@ static void LogCmdReduceCalculation(const miopenTensorDescriptor_t xDesc, std::string input_sz; auto input = miopen::deref(xDesc).GetLengths(); - for(int32_t i = 0; i < input.size(); ++i) + for(uint32_t i = 0; i < input.size(); ++i) { input_sz += std::to_string(input[i]); if(i != input.size() - 1) diff --git a/projects/miopen/src/reduceextreme_api.cpp b/projects/miopen/src/reduceextreme_api.cpp index a8f640ed7b41..a2346e95c349 100644 --- a/projects/miopen/src/reduceextreme_api.cpp +++ b/projects/miopen/src/reduceextreme_api.cpp @@ -54,7 +54,7 @@ static void LogCmdReduceExtreme(const miopenTensorDescriptor_t xDesc, std::string input_sz; auto input = miopen::deref(xDesc).GetLengths(); - for(int32_t i = 0; i < input.size(); ++i) + for(uint32_t i = 0; i < input.size(); ++i) { input_sz += std::to_string(input[i]); if(i != input.size() - 1) diff --git a/projects/miopen/src/reducetensor.cpp b/projects/miopen/src/reducetensor.cpp index 8becd7fa005a..dbc20f0f45be 100644 --- a/projects/miopen/src/reducetensor.cpp +++ b/projects/miopen/src/reducetensor.cpp @@ -57,19 +57,21 @@ struct ReductionKernelConfigurator ReductionKernelConfigurator() = default; ReductionKernelConfigurator(int blockSize, int warpSize) - : blockSize_(blockSize), warpSize_(warpSize) { - GredDirectThreadWiseUpperReductionLen = warpSize; - GredDirectWarpWiseUpperReductionLen = blockSize; - GredBlockWiseUpperReductionLen = static_cast(blockSize) * 4; + blockSize_ = static_cast(blockSize); + warpSize_ = static_cast(warpSize); + + GredDirectThreadWiseUpperReductionLen = warpSize_; + GredDirectWarpWiseUpperReductionLen = blockSize_; + GredBlockWiseUpperReductionLen = blockSize_ * 4; GredUpperNumBlocksPerReduction = 32; - numWarpsPerBlock = blockSize / warpSize; + numWarpsPerBlock = blockSize_ / warpSize_; }; - int blockSize_; - int warpSize_; - int numWarpsPerBlock; + std::size_t blockSize_; + std::size_t warpSize_; + std::size_t numWarpsPerBlock; std::size_t GredDirectThreadWiseUpperReductionLen; std::size_t GredDirectWarpWiseUpperReductionLen; @@ -366,33 +368,34 @@ static std::pair get_padding_need(ReductionMethod_t reduceImpl, { bool src_need_padding = false; bool dst_need_padding = false; - int copySliceLen; + size_t copySliceLen; int reduceSizePerBlock; switch(reduceImpl) { case Reduce_DirectThreadWise: - copySliceLen = tunable->GredThreadBufferLength; - src_need_padding = (invariantLen < static_cast(GridSize) * BlockSize || + copySliceLen = size_t(tunable->GredThreadBufferLength); + src_need_padding = (invariantLen < static_cast(GridSize * BlockSize) || toReduceLen % copySliceLen > 0); - dst_need_padding = (invariantLen < static_cast(GridSize) * BlockSize); + dst_need_padding = (invariantLen < static_cast(GridSize * BlockSize)); break; case Reduce_DirectWarpWise: - copySliceLen = warpSize * tunable->GredAccessesPerThreadInWarp; + copySliceLen = size_t(warpSize * tunable->GredAccessesPerThreadInWarp); src_need_padding = (invariantLen < GridSize * BlockSize / warpSize || toReduceLen % copySliceLen > 0); dst_need_padding = (invariantLen < GridSize * BlockSize / warpSize); break; case Reduce_BlockWise: - copySliceLen = BlockSize * tunable->GredAccessesPerThreadInBlock; + copySliceLen = size_t(BlockSize * tunable->GredAccessesPerThreadInBlock); src_need_padding = (toReduceLen % copySliceLen > 0); break; case Reduce_MultiBlock: - copySliceLen = BlockSize * tunable->GredAccessesPerThreadInBlock; - reduceSizePerBlock = - (((toReduceLen + BlkGroupSize - 1) / BlkGroupSize + copySliceLen - 1) / copySliceLen) * - copySliceLen; - src_need_padding = (toReduceLen < static_cast(reduceSizePerBlock) * BlkGroupSize); + copySliceLen = size_t(BlockSize * tunable->GredAccessesPerThreadInBlock); + reduceSizePerBlock = int( + (((toReduceLen + size_t(BlkGroupSize) - 1) / size_t(BlkGroupSize) + copySliceLen - 1) / + copySliceLen) * + copySliceLen); + src_need_padding = (toReduceLen < static_cast(reduceSizePerBlock * BlkGroupSize)); break; default: MIOPEN_THROW("Invalid reduction method ID!"); break; }; @@ -453,7 +456,7 @@ std::size_t ReduceTensorDescriptor::GetWorkspaceSize(const Handle& handle, if(inDescLengths.size() != outDescLengths.size()) MIOPEN_THROW("The number of dimensions of the input and output tensor should match."); - for(int i = 0; i < inDescLengths.size(); i++) + for(uint32_t i = 0; i < inDescLengths.size(); i++) { if(outDescLengths[i] != 1 && outDescLengths[i] != inDescLengths[i]) { @@ -468,7 +471,7 @@ std::size_t ReduceTensorDescriptor::GetWorkspaceSize(const Handle& handle, const tunable_generic_reduction* tunable = &default_tunable_generic_reduction; int blockSize = tunable->BlockSize; - detail::ReductionKernelConfigurator configurator(blockSize, handle.GetWavefrontWidth()); + detail::ReductionKernelConfigurator configurator(blockSize, int(handle.GetWavefrontWidth())); auto workspace_size = configurator.getWorkspaceSize(invariantLength, toReduceLength); @@ -479,10 +482,10 @@ std::size_t ReduceTensorDescriptor::GetWorkspaceSize(const Handle& handle, (reduceOp == MIOPEN_REDUCE_TENSOR_MIN || reduceOp == MIOPEN_REDUCE_TENSOR_MAX || reduceOp == MIOPEN_REDUCE_TENSOR_AMAX); - std::size_t wsSizeInBytes = - !need_indices ? workspace_size * detail::GetDataTypeSize(inDesc.GetType()) - : workspace_size * (detail::GetDataTypeSize(inDesc.GetType()) + sizeof(int)) + - 64 + sizeof(int) + workspaceAlignRequirementBytes; + size_t datatype_size = size_t(detail::GetDataTypeSize(inDesc.GetType())); + size_t wsSizeInBytes = !need_indices ? workspace_size * datatype_size + : workspace_size * (datatype_size + sizeof(int)) + 64 + + sizeof(int) + workspaceAlignRequirementBytes; // dynamic reduction use one additional page for storing tensor descriptors wsSizeInBytes += 4096; @@ -501,7 +504,7 @@ std::size_t ReduceTensorDescriptor::GetIndicesSize(const TensorDescriptor& inDes if(inDescLengths.size() != outDescLengths.size()) MIOPEN_THROW("The number of dimensions of the input and output tensor should match."); - for(int i = 0; i < inDescLengths.size(); i++) + for(uint32_t i = 0; i < inDescLengths.size(); i++) { if(outDescLengths[i] != 1 && outDescLengths[i] != inDescLengths[i]) { @@ -551,7 +554,7 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, const tunable_generic_reduction* tunable = &default_tunable_generic_reduction; const int blockSize = tunable->BlockSize; - detail::ReductionKernelConfigurator configurator(blockSize, handle.GetWavefrontWidth()); + detail::ReductionKernelConfigurator configurator(blockSize, int(handle.GetWavefrontWidth())); const bool need_indices = (reduceIndicesOpt == MIOPEN_REDUCE_TENSOR_FLATTENED_INDICES) && @@ -567,7 +570,7 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, if(inDescLengths.size() != outDescLengths.size()) MIOPEN_THROW("The number of dimensions of the input and output tensor should match."); - for(int i = 0; i < inDescLengths.size(); i++) + for(uint32_t i = 0; i < inDescLengths.size(); i++) { if(outDescLengths[i] != 1 && outDescLengths[i] != inDescLengths[i]) { @@ -593,15 +596,15 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, if(need_indices && workspace != nullptr) { - auto aTypeSize = detail::GetDataTypeSize(aDesc.GetType()); + size_t aTypeSize = size_t(detail::GetDataTypeSize(aDesc.GetType())); auto workspace_size = configurator.getWorkspaceSize(invariantLength, toReduceLength); ws_buf2_bytes_offset = ((workspace_size * aTypeSize + 63) / 64) * 64; - }; + } const ReductionMethod_t reduceImpl = configurator.getReductionMethod(invariantLength, toReduceLength); - const int gridSize = configurator.getGridSize(invariantLength, toReduceLength); + const size_t gridSize = configurator.getGridSize(invariantLength, toReduceLength); const int blkGroupSize = (reduceImpl == Reduce_MultiBlock) ? static_cast(gridSize / invariantLength) : 0; @@ -612,7 +615,7 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, for(int i = 0; i < inDescLengths.size(); i++) { - if(outDescLengths[i] == 1) + if(outDescLengths[size_t(i)] == 1) toReduceDims.push_back(i); else invariantDims.push_back(i); @@ -634,7 +637,7 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, : *reinterpret_cast(beta); { // use dynamic reduction - const int origReduceLen = toReduceLength; + const auto origReduceLen = toReduceLength; int p_inLengths[6] = {0}; int p_inStrides[6] = {0}; @@ -642,7 +645,7 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, int p_outStrides[6] = {0}; int pos = 0; - for(int i = 0; i < outDescLengths.size(); i++) + for(uint32_t i = 0; i < outDescLengths.size(); i++) { // invariant dimensions if(outDescLengths[i] > 1) @@ -655,7 +658,7 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, }; }; - for(int i = 0; i < outDescLengths.size(); i++) + for(uint32_t i = 0; i < outDescLengths.size(); i++) { // toReduce dimensions if(outDescLengths[i] == 1) @@ -674,7 +677,7 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, const std::vector vld = {static_cast(tunable->BlockSize), 1, 1}; const std::vector vgd1 = {static_cast(tunable->BlockSize), 1, 1}; - const std::vector vgd2 = {static_cast(gridSize) * tunable->BlockSize, 1, 1}; + const std::vector vgd2 = {gridSize * static_cast(tunable->BlockSize), 1, 1}; std::string algo_name = "dynamic_generic_reduction"; @@ -719,9 +722,9 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, auto use_padding = detailDynamic::get_padding_need(reduceImpl, invariantLength, toReduceLength, - gridSize, + int(gridSize), tunable->BlockSize, - handle.GetWavefrontWidth(), + int(handle.GetWavefrontWidth()), blkGroupSize, tunable); @@ -817,20 +820,21 @@ void ReduceTensorDescriptor::ReduceTensor(const Handle& handle, if(useTwoCalls) { - const auto toReduceLength_2 = blkGroupSize; + const size_t toReduceLength_2 = size_t(blkGroupSize); const int gridSize_2 = static_cast(configurator.getGridSize_2(invariantLength, toReduceLength_2)); const std::vector vgd2_2 = { - static_cast(gridSize_2) * tunable->BlockSize, size_t{1}, size_t{1}}; - const auto reduceImpl2 = configurator.GetReductionMethod_2(toReduceLength_2); - const auto use_padding2 = detailDynamic::get_padding_need(reduceImpl2, - invariantLength, - toReduceLength_2, - gridSize_2, - tunable->BlockSize, - handle.GetWavefrontWidth(), - 1, - tunable); + static_cast(gridSize_2 * tunable->BlockSize), size_t{1}, size_t{1}}; + const auto reduceImpl2 = configurator.GetReductionMethod_2(toReduceLength_2); + const auto use_padding2 = + detailDynamic::get_padding_need(reduceImpl2, + invariantLength, + toReduceLength_2, + gridSize_2, + tunable->BlockSize, + int(handle.GetWavefrontWidth()), + 1, + tunable); std::string param2 = param + " -DCK_PARAM_SRC2D_PADDING=" + std::to_string(static_cast(use_padding2.first)) + diff --git a/projects/miopen/src/rnn.cpp b/projects/miopen/src/rnn.cpp index 30764a0985d1..d5d178ab3001 100644 --- a/projects/miopen/src/rnn.cpp +++ b/projects/miopen/src/rnn.cpp @@ -115,22 +115,22 @@ size_t RNNDescriptor::biasOffsetCalculation(const TensorDescriptor& /*xDesc*/, if(dirMode != 0u) { - layerJump += (hsize * 2) * nHiddenTensorsPerLayer * (layer / 2) * 2; + layerJump += (hsize * 2) * nHiddenTensorsPerLayer * (uint32_t(layer) / 2) * 2; if(biasID >= nHiddenTensorsPerLayer) { layerJump += hsize * nHiddenTensorsPerLayer; } - layerJump += (layer % 2 == 1) ? nHiddenTensorsPerLayer * hsize : 0; + layerJump += (uint32_t(layer) % 2 == 1) ? nHiddenTensorsPerLayer * hsize : 0; - layerJump += hsize * biasID; + layerJump += hsize * uint32_t(biasID); } else { - layerJump += (hsize * 2) * nHiddenTensorsPerLayer * layer; + layerJump += (hsize * 2) * nHiddenTensorsPerLayer * uint32_t(layer); - layerJump += hsize * biasID; + layerJump += hsize * uint32_t(biasID); } return layerJump; @@ -153,18 +153,18 @@ size_t RNNDescriptor::paramsOffsetCalculation(const TensorDescriptor& xDesc, { layerJump += (inputVectorLen * hsize + hsize * hsize) * nHiddenTensorsPerLayer * 2; layerJump += - (hsize * hsize * 2 + hsize * hsize) * nHiddenTensorsPerLayer * (layer / 2 - 1) * 2; + (hsize * hsize * 2 + hsize * hsize) * nHiddenTensorsPerLayer * (uint32_t(layer) / 2 - 1) * 2; if(paramID >= nHiddenTensorsPerLayer) { layerJump += hsize * hsize * 2 * nHiddenTensorsPerLayer * 2; layerJump += (layer % 2 == 1) ? nHiddenTensorsPerLayer * (hsize * hsize) : 0; - layerJump += (hsize * hsize) * (paramID - nHiddenTensorsPerLayer); + layerJump += (hsize * hsize) * (uint32_t(paramID) - nHiddenTensorsPerLayer); } else { layerJump += (layer % 2 == 1) ? nHiddenTensorsPerLayer * (2 * hsize * hsize) : 0; - layerJump += (2 * hsize * hsize) * paramID; + layerJump += (2 * hsize * hsize) * uint32_t(paramID); } } else @@ -176,12 +176,12 @@ size_t RNNDescriptor::paramsOffsetCalculation(const TensorDescriptor& xDesc, layerJump += (inputVectorLen * hsize) * nHiddenTensorsPerLayer * 2; } layerJump += (layer == 1) ? nHiddenTensorsPerLayer * (hsize * hsize) : 0; - layerJump += (hsize * hsize) * (paramID - nHiddenTensorsPerLayer); + layerJump += (hsize * hsize) * (uint32_t(paramID) - nHiddenTensorsPerLayer); } else { layerJump += (layer == 1) ? nHiddenTensorsPerLayer * (inputVectorLen * hsize) : 0; - layerJump += (inputVectorLen * hsize) * paramID; + layerJump += (inputVectorLen * hsize) * uint32_t(paramID); } } } @@ -191,8 +191,8 @@ size_t RNNDescriptor::paramsOffsetCalculation(const TensorDescriptor& xDesc, if(layer > 0) { layerJump += (inputVectorLen * hsize + hsize * hsize) * nHiddenTensorsPerLayer; - layerJump += (hsize * hsize * 2) * nHiddenTensorsPerLayer * (layer - 1); - layerJump += (hsize * hsize) * paramID; + layerJump += (hsize * hsize * 2) * nHiddenTensorsPerLayer * (uint32_t(layer) - 1); + layerJump += (hsize * hsize) * uint32_t(paramID); } else { @@ -202,11 +202,11 @@ size_t RNNDescriptor::paramsOffsetCalculation(const TensorDescriptor& xDesc, { layerJump += (inputVectorLen * hsize) * nHiddenTensorsPerLayer; } - layerJump += (hsize * hsize) * (paramID - nHiddenTensorsPerLayer); + layerJump += (hsize * hsize) * (uint32_t(paramID) - nHiddenTensorsPerLayer); } else { - layerJump += (inputVectorLen * hsize) * paramID; + layerJump += (inputVectorLen * hsize) * uint32_t(paramID); } } } @@ -231,24 +231,24 @@ std::vector RNNDescriptor::pTensorLengthsCalculation(const TensorDescriptor { if(paramID >= nHiddenTensorsPerLayer) { - tdim[0] = tdim[1] = hsize; + tdim[0] = tdim[1] = int(hsize); } else { - tdim[0] = hsize; - tdim[1] = hsize * 2; + tdim[0] = int(hsize); + tdim[1] = int(hsize * 2); } } else // IS the input layer { if(paramID >= nHiddenTensorsPerLayer) { - tdim[0] = tdim[1] = hsize; + tdim[0] = tdim[1] = int(hsize); } else { - tdim[0] = hsize; - tdim[1] = inputVectorLen; + tdim[0] = int(hsize); + tdim[1] = int(inputVectorLen); } } } @@ -256,18 +256,18 @@ std::vector RNNDescriptor::pTensorLengthsCalculation(const TensorDescriptor { if(layer > 0) // NOT the input layer { - tdim[0] = tdim[1] = hsize; + tdim[0] = tdim[1] = int(hsize); } else { if(paramID >= nHiddenTensorsPerLayer) { - tdim[0] = tdim[1] = hsize; + tdim[0] = tdim[1] = int(hsize); } else { - tdim[0] = hsize; - tdim[1] = inputVectorLen; + tdim[0] = int(hsize); + tdim[1] = int(inputVectorLen); } } } @@ -337,8 +337,8 @@ RNNDescriptor::RNNDescriptor(int hsz, typeSize = dType == miopenHalf ? 2 : 4; } - hsize = hsz; - nLayers = layers; + hsize = size_t(hsz); + nLayers = size_t(layers); inputMode = inMode; dirMode = bidir; rnnMode = rmode; @@ -533,7 +533,7 @@ size_t RNNDescriptor::GetWorkspaceSize(const Handle& handle, } SeqTensorDescriptor xSeqTDesc = - makeSeqTensorDescriptor(xDesc, seqLength, miopenRNNDataSeqMajorNotPadded); + makeSeqTensorDescriptor(xDesc, size_t(seqLength), miopenRNNDataSeqMajorNotPadded); if(CheckDynamicAlgoSelection(handle, xSeqTDesc, miopenRNNTraining)) { @@ -573,7 +573,7 @@ size_t RNNDescriptor::GetReserveSize(size_t batchLenSum) const x /= 2; x += nLayers * batchLenSum * hsize * typeSize; } - if(!float_equal(miopen::deref(dropoutDesc).dropout, 0)) + if(!float_equal(miopen::deref(dropoutDesc).dropout, 0.f)) { x += (nLayers - 1) * batchLenSum * hsize * typeSize; x += (nLayers - 1) * batchLenSum * hsize * sizeof(bool); @@ -614,7 +614,7 @@ size_t RNNDescriptor::GetReserveSize(const Handle& handle, MIOPEN_THROW(miopenStatusBadParm, "Data type mismatch between descriptors"); } SeqTensorDescriptor xSeqTDesc = - makeSeqTensorDescriptor(xDesc, seqLength, miopenRNNDataSeqMajorNotPadded); + makeSeqTensorDescriptor(xDesc, size_t(seqLength), miopenRNNDataSeqMajorNotPadded); if(CheckDynamicAlgoSelection(handle, xSeqTDesc, miopenRNNTraining)) { @@ -644,7 +644,7 @@ size_t RNNDescriptor::GetParamsSize(size_t inputVector) const inputVector = 0; } - int bi = dirMode == miopenRNNbidirection ? 2 : 1; + size_t bi = (dirMode == miopenRNNbidirection) ? 2 : 1; auto sz = nHiddenTensorsPerLayer * hsize * bi * (inputVector + hsize + (nLayers - 1) * (bi + 1) * hsize); #if(MIO_RNN_DEBUG == 1) @@ -690,7 +690,7 @@ size_t RNNDescriptor::GetRNNInputSuperTensorSize(const Handle& /* handle */, else { auto maxBatchSize = xDesc[0].GetLengths()[0]; - inputBatchLenSum = seqLength * maxBatchSize; + inputBatchLenSum = size_t(seqLength) * maxBatchSize; } auto x = inputBatchLenSum * xDesc[0].GetLengths()[1] * typeSize; @@ -725,10 +725,10 @@ void RNNDescriptor::GetParamsDescriptor(const Handle& /* handle */, inputVectorLen = 0; // Create weight super tensor descriptor - int bi = (dirMode == miopenRNNbidirection) ? 2 : 1; + size_t bi = (dirMode == miopenRNNbidirection) ? 2 : 1; std::vector weight_lens(2, 0); - weight_lens[0] = inputVectorLen + ((nLayers - 1) * (bi + 1) + 1) * hsize; - weight_lens[1] = bi * hsize * nHiddenTensorsPerLayer; + weight_lens[0] = int(inputVectorLen + ((nLayers - 1) * (bi + 1) + 1) * hsize); + weight_lens[1] = int(bi * hsize * nHiddenTensorsPerLayer); if(biasMode == miopenRNNwithBias) { weight_lens[0] += (nLayers * 2); @@ -812,7 +812,7 @@ void RNNDescriptor::GetLayerParam(const Handle& handle, #endif // Copy over data to previously allocated param tensor - miopen::CopyTensor(handle, paramDesc, w, paramDesc, param, poffset, 0); + miopen::CopyTensor(handle, paramDesc, w, paramDesc, param, int(poffset), 0); } void RNNDescriptor::GetLayerBias(const Handle& handle, @@ -854,7 +854,7 @@ void RNNDescriptor::GetLayerBias(const Handle& handle, #endif // Copy over data to previously allocated param tensor - miopen::CopyTensor(handle, biasDesc, w, biasDesc, bias, boffset, 0); + miopen::CopyTensor(handle, biasDesc, w, biasDesc, bias, int(boffset), 0); } void RNNDescriptor::SetLayerParam(const Handle& handle, @@ -884,7 +884,7 @@ void RNNDescriptor::SetLayerParam(const Handle& handle, // 2. Calculate the strides for the matrix std::vector pstride(2, 1); - pstride[1] = paramDesc.GetLengths()[0]; + pstride[1] = int(paramDesc.GetLengths()[0]); std::vector intLens(paramDesc.GetLengths().begin(), paramDesc.GetLengths().end()); @@ -906,7 +906,7 @@ void RNNDescriptor::SetLayerParam(const Handle& handle, #endif // 4. Copy over data to previously allocated param tensor - miopen::CopyTensor(handle, paramDesc, param, paramSrc, w, 0, poffset); + miopen::CopyTensor(handle, paramDesc, param, paramSrc, w, 0, int(poffset)); } void RNNDescriptor::SetLayerBias(const Handle& handle, @@ -959,7 +959,7 @@ void RNNDescriptor::SetLayerBias(const Handle& handle, #endif // 4. Copy over data to previously allocated param tensor - miopen::CopyTensor(handle, biasSrc, bias, biasDesc, w, 0, boffset); + miopen::CopyTensor(handle, biasSrc, bias, biasDesc, w, 0, int(boffset)); } void RNNDescriptor::SetPaddingmode(miopenRNNPaddingMode_t padding) @@ -1140,7 +1140,7 @@ RNNDescriptor::makeSeqTensorDescriptor(c_array_view 0; --i) - lens_per_seq.push_back(seq); + lens_per_seq.push_back(size_t(seq)); }; std::vector batch_cache = @@ -1153,8 +1153,8 @@ RNNDescriptor::makeSeqTensorDescriptor(c_array_view yDescArray{output_descs.data(), seq_len}; return RNNBackwardDataPackedTensors(handle, - seq_len, + int(seq_len), yDescArray, dy, dhy, @@ -1355,7 +1355,7 @@ void RNNDescriptor::RNNVanillaBackwardWeights(const Handle& handle, miopen::c_array_view yDescArray{output_descs.data(), seq_len}; return RNNBackwardWeightsPackedTensors(handle, - seq_len, + int(seq_len), xDescArray, x, hDesc, diff --git a/projects/miopen/src/solver/pooling/backward2d.cpp b/projects/miopen/src/solver/pooling/backward2d.cpp index 6c2ddbcd709d..e040fabd6a89 100644 --- a/projects/miopen/src/solver/pooling/backward2d.cpp +++ b/projects/miopen/src/solver/pooling/backward2d.cpp @@ -45,7 +45,7 @@ struct kernel_params kernel_stride_h = pd.strides[0]; std::tie(batch_sz, n_inputs, in_height, in_width) = - miopen::tien<4>(problem.GetXDesc().GetLengths(), 1); + miopen::tien<4>(problem.GetXDesc().GetLengths(), 1u); out_pix_tile0 = config.out_pix_tile0; out_pix_tile1 = config.out_pix_tile1; diff --git a/projects/miopen/src/solver/pooling/backwardNd.cpp b/projects/miopen/src/solver/pooling/backwardNd.cpp index 7154126788df..88604a0563e9 100644 --- a/projects/miopen/src/solver/pooling/backwardNd.cpp +++ b/projects/miopen/src/solver/pooling/backwardNd.cpp @@ -69,7 +69,7 @@ ConvSolution PoolingBackwardNd::GetSolution(const ExecutionContext&, const auto& top = problem.GetYDesc(); std::size_t batch_sz, n_inputs, in_height, in_width; - std::tie(batch_sz, n_inputs, in_height, in_width) = miopen::tien<4>(bot.GetLengths(), 1); + std::tie(batch_sz, n_inputs, in_height, in_width) = miopen::tien<4>(bot.GetLengths(), 1u); const int pooling_method = (problem.GetPooling().GetMode() == miopenPoolingMax) ? MLO_POOLING_OP_MAX diff --git a/projects/miopen/src/solver/pooling/forward2d.cpp b/projects/miopen/src/solver/pooling/forward2d.cpp index bb1cd522e810..3044df7d5f35 100644 --- a/projects/miopen/src/solver/pooling/forward2d.cpp +++ b/projects/miopen/src/solver/pooling/forward2d.cpp @@ -154,7 +154,7 @@ ConvSolution PoolingForward2d::GetSolution(const ExecutionContext&, int batch_sz, n_outputs; std::tie(batch_sz, n_outputs, std::ignore, std::ignore) = - miopen::tien<4>(problem.GetYDesc().GetLengths(), 1); + miopen::tien<4>(problem.GetYDesc().GetLengths(), 1u); const auto& pool_d = problem.GetPooling(); const auto wsp_index = pool_d.GetWorkspaceIndexMode(); diff --git a/projects/miopen/src/solver/pooling/forwardNaive.cpp b/projects/miopen/src/solver/pooling/forwardNaive.cpp index 1fc736242a96..06651e798747 100644 --- a/projects/miopen/src/solver/pooling/forwardNaive.cpp +++ b/projects/miopen/src/solver/pooling/forwardNaive.cpp @@ -40,7 +40,7 @@ inline uint32_t RoundUpNearestPower2Positive(uint32_t v) v |= v >> 4; v |= v >> 8; v |= v >> 16; - return std::max(++v, 1U); // Shut clang-tidy. + return std::max(++v, 1u); // Shut clang-tidy. } #endif diff --git a/projects/miopen/src/solver/tensorOp/tensor_op_helpers.hpp b/projects/miopen/src/solver/tensorOp/tensor_op_helpers.hpp index 67f2b58cfba2..5513ced9dd9d 100644 --- a/projects/miopen/src/solver/tensorOp/tensor_op_helpers.hpp +++ b/projects/miopen/src/solver/tensorOp/tensor_op_helpers.hpp @@ -81,7 +81,7 @@ inline std::tuple GetRDBLCKandREADTYPEHIP(size_t len, miope { if(type == miopenHalf) { - return (len % 2 == 0) ? std::make_tuple(2U, "half2") : std::make_tuple(1U, "half"); + return (len % 2 == 0) ? std::make_tuple(2U, "half2") : std::make_tuple(1u, "half"); } const std::string data_type = GetDataType(type); size_t RD_BLCK = (len % 4 == 0) ? 4 : (len % 2 == 0) ? 2 : 1; diff --git a/projects/miopen/tools/sqlite2txt/main.cpp b/projects/miopen/tools/sqlite2txt/main.cpp index 7b501efbd5e0..8a91b4921bd5 100644 --- a/projects/miopen/tools/sqlite2txt/main.cpp +++ b/projects/miopen/tools/sqlite2txt/main.cpp @@ -26,7 +26,7 @@ std::unique_ptr PrepareStatement(sqlite3* { sqlite3_stmt* stmt; const char* tail; - if(sqlite3_prepare_v2(db, sql.c_str(), sql.length(), &stmt, &tail) != SQLITE_OK || + if(sqlite3_prepare_v2(db, sql.c_str(), (int)sql.length(), &stmt, &tail) != SQLITE_OK || stmt == nullptr) { std::cerr << "Error while preparing SQL statement: " << sqlite3_errmsg(db) << std::endl;