Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions backends/metax_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,8 @@ target_link_libraries(${TARGET_NAME} ${MACA_PATH}/lib/libmccl.so)
target_link_libraries(${TARGET_NAME} ${MACA_PATH}/lib/libmcFlashAttn.so)
target_link_libraries(${TARGET_NAME} ${MACA_PATH}/lib/libmcpti.so)

target_link_options(${TARGET_NAME} PRIVATE "-T${CMAKE_SOURCE_DIR}/my_script.ld")

if(WITH_CINN)
message(STATUS "[MetaX] Linking CINN object library")
target_link_libraries(${TARGET_NAME} $<TARGET_OBJECTS:metax_cinn_obj>)
Expand Down
8 changes: 8 additions & 0 deletions backends/metax_gpu/my_script.lds
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
SECTIONS
{
OVERLAY :
{
.mc_fatbin { *(.mc_fatbin) }
}
}
INSERT AFTER .comment;
73 changes: 73 additions & 0 deletions backends/metax_gpu/patch/paddle.patch
Original file line number Diff line number Diff line change
Expand Up @@ -1117,3 +1117,76 @@ index 368cb21c21..f0f99fbd2f 100644
return (getLaneId() == 0) ? 0ULL : (1ULL << getLaneId()) - 1ULL;
#else
unsigned mask;
diff --git a/paddle/phi/kernels/gpu/multiclass_nms3_kernel.cu b/paddle/phi/kernels/gpu/multiclass_nms3_kernel.cu
index 7ced1fdc17..e49759ebb4 100644
--- a/paddle/phi/kernels/gpu/multiclass_nms3_kernel.cu
+++ b/paddle/phi/kernels/gpu/multiclass_nms3_kernel.cu
@@ -302,11 +302,11 @@ void SortScoresPerClassGPU(gpuStream_t stream,
begin_bit,
end_bit,
stream);
-#ifdef PADDLE_WITH_HIP
- PADDLE_ENFORCE_GPU_SUCCESS(hipGetLastError());
-#else
- PADDLE_ENFORCE_GPU_SUCCESS(cudaGetLastError());
-#endif
+// #ifdef PADDLE_WITH_HIP
+// PADDLE_ENFORCE_GPU_SUCCESS(hipGetLastError());
+// #else
+// PADDLE_ENFORCE_GPU_SUCCESS(cudaGetLastError());
+// #endif
}

/* ===========
@@ -611,11 +611,11 @@ void AllClassNMSGPU(gpuStream_t stream,
score_shift,
caffe_semantics);

-#ifdef PADDLE_WITH_HIP
- PADDLE_ENFORCE_GPU_SUCCESS(hipGetLastError());
-#else
- PADDLE_ENFORCE_GPU_SUCCESS(cudaGetLastError());
-#endif
+// #ifdef PADDLE_WITH_HIP
+// PADDLE_ENFORCE_GPU_SUCCESS(hipGetLastError());
+// #else
+// PADDLE_ENFORCE_GPU_SUCCESS(cudaGetLastError());
+// #endif
}

/* ==================
@@ -769,11 +769,11 @@ void GatherNMSOutputsGPU(gpuStream_t stream,
reinterpret_cast<int*>(nmsed_valid_mask),
clip_boxes,
T_SCORE(score_shift));
-#ifdef PADDLE_WITH_HIP
- PADDLE_ENFORCE_GPU_SUCCESS(hipGetLastError());
-#else
- PADDLE_ENFORCE_GPU_SUCCESS(cudaGetLastError());
-#endif
+// #ifdef PADDLE_WITH_HIP
+// PADDLE_ENFORCE_GPU_SUCCESS(hipGetLastError());
+// #else
+// PADDLE_ENFORCE_GPU_SUCCESS(cudaGetLastError());
+// #endif
}

template <typename T_SCORE>
@@ -818,11 +818,11 @@ void SortScoresPerImageGPU(gpuStream_t stream,
begin_bit,
end_bit,
stream);
-#ifdef PADDLE_WITH_HIP
- PADDLE_ENFORCE_GPU_SUCCESS(hipGetLastError());
-#else
- PADDLE_ENFORCE_GPU_SUCCESS(cudaGetLastError());
-#endif
+// #ifdef PADDLE_WITH_HIP
+// PADDLE_ENFORCE_GPU_SUCCESS(hipGetLastError());
+// #else
+// PADDLE_ENFORCE_GPU_SUCCESS(cudaGetLastError());
+// #endif
}

template <typename T>

Loading