@@ -165,18 +165,15 @@ void AfterCompression(GBDTGPUMemManager &manager, BagCsrManager &csrManager, Bag
165165 if (optimiseSetKey == false )
166166 SetKey<<<numSeg, blockSize, sizeof (uint) * 2 , (*(cudaStream_t*)pStream)>>>
167167 (csrManager.pEachCsrFeaStartPos , csrManager.pEachCsrFeaLen , csrManager.getMutableCsrKey ());
168- // (csrManager.pEachCsrFeaStartPos, csrManager.pEachCsrFeaLen, pCSRMultableKey);
169168 else {
170169 if (numSeg < 1000000 )
171170 SetKey<<<numSeg, blockSize, sizeof (uint) * 2 , (*(cudaStream_t*)pStream)>>>
172171 (csrManager.pEachCsrFeaStartPos , csrManager.pEachCsrFeaLen , csrManager.getMutableCsrKey ());
173- // (csrManager.pEachCsrFeaStartPos, csrManager.pEachCsrFeaLen, pCSRMultableKey);
174172 else {
175173 int numSegEachBlk = numSeg/10000 ;
176174 int numofBlkSetKey = (numSeg + numSegEachBlk - 1 ) / numSegEachBlk;
177175 SetKey<<<numofBlkSetKey, blockSize, 0 , (*(cudaStream_t*)pStream)>>> (csrManager.pEachCsrFeaStartPos , csrManager.pEachCsrFeaLen ,
178176 numSegEachBlk, numSeg, csrManager.getMutableCsrKey ());
179- // numSegEachBlk, numSeg, pCSRMultableKey);
180177 }
181178 }
182179 cudaStreamSynchronize ((*(cudaStream_t*)pStream));
@@ -304,15 +301,13 @@ void AllNode2CompGD(GBDTGPUMemManager &manager, BagCsrManager &csrManager, BagMa
304301 cudaDeviceSynchronize ();
305302 thrust::exclusive_scan (thrust::device, csrManager.getCsrLen (), csrManager.getCsrLen () + csrManager.curNumCsr , csrManager.getMutableCsrStart ());
306303 cudaDeviceSynchronize ();
307- uint *pCsrNewLen_d;// = (uint*)(indexComp.histogram_d.addr);
308- // uint *pCsrNewLen_d = (uint*)(indexComp.histogram_d.addr);
309- checkCudaErrors (cudaMallocHost ((void **)&pCsrNewLen_d, sizeof (uint) * csrManager.curNumCsr * 2 ));
304+ uint *pCsrNewLen_d = (uint*)(indexComp.histogram_d .addr );
310305 checkCudaErrors (cudaMemset (pCsrNewLen_d, 0 , sizeof (uint) * csrManager.curNumCsr * 2 ));
311306 checkCudaErrors (cudaMemset (csrManager.pEachCsrFeaLen , 0 , sizeof (uint) * bagManager.m_numFea * numofSNode));
312307 dim3 dimNumofBlockToCsrLen;
313308 uint blockSizeCsrLen = 128 ;
314309
315- cudaDeviceSynchronize ();
310+ cudaDeviceSynchronize ();
316311 dimNumofBlockToCsrLen.x = (numofDenseValue_previous + blockSizeCsrLen - 1 ) / blockSizeCsrLen;
317312 newCsrLenFvalue<<<dimNumofBlockToCsrLen, blockSizeCsrLen, blockSizeCsrLen * sizeof (uint)>>> (
318313 csrManager.preFvalueInsId , numofDenseValue_previous,
@@ -321,7 +316,6 @@ cudaDeviceSynchronize();
321316 csrManager.getCsrFvalue (), csrManager.curNumCsr ,
322317 csrManager.pEachCsrFeaStartPos , bagManager.m_pPreNumSN_h [0 ],
323318 bagManager.m_numFea , csrManager.getCsrKey (), pCsrNewLen_d, pCsrId2Pid);
324- // bagManager.m_numFea, pCSRKey, pCsrNewLen_d, pCsrId2Pid);
325319 cudaDeviceSynchronize ();
326320
327321 GETERROR (" after newCsrLenFvalue" );
@@ -356,9 +350,7 @@ cudaDeviceSynchronize();
356350 int blockSizeLoadCsrLen;
357351 dim3 dimNumofBlockToLoadCsrLen;
358352 conf.ConfKernel (csrManager.curNumCsr * 2 , blockSizeLoadCsrLen, dimNumofBlockToLoadCsrLen);
359- // uint *pCsrMarker = (uint*)indexComp.partitionMarker.addr;
360- uint *pCsrMarker;
361- checkCudaErrors (cudaMalloc ((void **)&pCsrMarker, sizeof (uint) * csrManager.curNumCsr * 2 ));
353+ uint *pCsrMarker = (uint*)indexComp.partitionMarker .addr ;
362354 checkCudaErrors (cudaMemset (pCsrMarker, 0 , sizeof (uint) * csrManager.curNumCsr * 2 ));
363355 map2One<<<dimNumofBlockToLoadCsrLen, blockSizeLoadCsrLen>>> (pCsrNewLen_d, csrManager.curNumCsr * 2 , pCsrMarker);
364356 GETERROR (" after map2One" );
@@ -369,14 +361,14 @@ checkCudaErrors(cudaMalloc((void**)&pCsrMarker, sizeof(uint) * csrManager.curNum
369361 checkCudaErrors (cudaMemcpy (&csrManager.curNumCsr , pCsrMarker + csrManager.curNumCsr * 2 - 1 , sizeof (uint), cudaMemcpyDefault));
370362
371363 checkCudaErrors (cudaMemset (csrManager.getMutableCsrLen (), 0 , sizeof (uint) * csrManager.curNumCsr ));
372- cudaDeviceSynchronize ();
364+ cudaDeviceSynchronize ();
373365 loadDenseCsr<<<dimNumofBlockToLoadCsrLen, blockSizeLoadCsrLen>>> (pCsrFvalueSpare, pCsrNewLen_d,
374366 previousNumCsr * 2 , csrManager.curNumCsr , pCsrMarker,
375367 csrManager.getMutableCsrFvalue (), csrManager.getMutableCsrLen ());
376368 GETERROR (" after loadDenseCsr" );
377369 printf (" done load dense csr: number of csr is %d\n " , csrManager.curNumCsr );
378370 thrust::exclusive_scan (thrust::device, csrManager.pEachCsrFeaLen , csrManager.pEachCsrFeaLen + numofSNode * bagManager.m_numFea , csrManager.pEachCsrFeaStartPos );
379- cudaDeviceSynchronize ();
371+ cudaDeviceSynchronize ();
380372
381373
382374 thrust::exclusive_scan (thrust::device, csrManager.pEachNodeSizeInCsr , csrManager.pEachNodeSizeInCsr + numofSNode, csrManager.pEachCsrNodeStartPos );
@@ -385,8 +377,6 @@ cudaDeviceSynchronize();
385377 thrust::exclusive_scan (thrust::device, csrManager.getCsrLen (), csrManager.getCsrLen () + csrManager.curNumCsr , pCsrStartCurRound);
386378 PROCESS_ERROR (csrManager.curNumCsr <= bagManager.m_numFeaValue );
387379 cudaDeviceSynchronize ();
388- checkCudaErrors (cudaFree (pCsrMarker));
389- printf (" exit if\n " );
390380 }
391381 else
392382 {
0 commit comments