1919#include < thread>
2020#include < random>
2121
22+ #if USING_CUDA
23+ #include " c10/cuda/CUDACachingAllocator.h"
24+ #endif
25+
2226#ifdef __linux__
2327#include < malloc.h>
2428#endif
@@ -220,11 +224,13 @@ WarmUpResult NormalEngine::prefillWarmUp(const EngineInitParams& params) {
220224 rtp_llm::setTraceMemory (true );
221225 executor_.reset (new NormalExecutor (params, nullptr , true , false , 0 , exec_init_params_));
222226 THROW_IF_STATUSOR_ERROR (preRun (fake_input, preRunMode::prefill_warm_up));
223- const auto device_status = getGpuExecStatus ();
227+ const auto max_consumed = getGpuExecStatus (). device_memory_status . max_consumed_bytes ;
224228 rtp_llm::setTraceMemory (false );
225229 (void )executor_.reset (nullptr );
226- return WarmUpResult (
227- {device_status.device_memory_status .available_bytes , device_status.device_memory_status .max_consumed_bytes });
230+ cudaDeviceSynchronize ();
231+ c10::cuda::CUDACachingAllocator::emptyCache ();
232+ const auto device_status = getGpuExecStatus ();
233+ return WarmUpResult ({device_status.device_memory_status .available_bytes , max_consumed});
228234#endif
229235}
230236
@@ -250,11 +256,13 @@ WarmUpResult NormalEngine::decodeWarmUp(const EngineInitParams& params) {
250256 }
251257 executor_.reset (new NormalExecutor (params, cache_manager, true , false , 0 , exec_init_params_));
252258 THROW_IF_STATUSOR_ERROR (preRun (fake_input, preRunMode::decode_warm_up));
253- const auto device_status = getGpuExecStatus ();
259+ const auto max_consumed = getGpuExecStatus (). device_memory_status . max_consumed_bytes ;
254260 rtp_llm::setTraceMemory (false );
255261 (void )executor_.reset (nullptr );
256- return WarmUpResult (
257- {device_status.device_memory_status .available_bytes , device_status.device_memory_status .max_consumed_bytes });
262+ cudaDeviceSynchronize ();
263+ c10::cuda::CUDACachingAllocator::emptyCache ();
264+ const auto device_status = getGpuExecStatus ();
265+ return WarmUpResult ({device_status.device_memory_status .available_bytes , max_consumed});
258266#endif
259267}
260268
0 commit comments