2525#include < mutex> // for once_flag, call_once
2626#include < vector> // for vector
2727
28+ #include " common.h" // for HumanMemUnit
2829#include " compressed_iterator.h" // for CompressedByteT
2930#include " cuda_context.cuh" // for CUDAContext
3031#include " cuda_dr_utils.h" // for GetGlobalCuDriverApi
32+ #include " cuda_rt_utils.h" // for CurrentDevice
3133#include " device_compression.h"
3234#include " device_vector.cuh" // for DeviceUVector
3335#include " nvtx_utils.h" // for xgboost_NVTX_FN_RANGE
@@ -290,7 +292,6 @@ void DecompressSnappy(dh::CUDAStreamView stream, SnappyDecomprMgr const& mgr,
290292 dh::device_vector<void *> d_out_ptrs (n_chunks);
291293 dh::safe_cuda (cudaMemcpyAsync (d_out_ptrs.data ().get (), h_out_ptrs.data (),
292294 dh::ToSpan (d_out_ptrs).size_bytes (), cudaMemcpyDefault, stream));
293- CHECK (curt::SupportsPageableMem () || curt::SupportsAts ());
294295 // Run nvcomp
295296 SafeNvComp (nvcompBatchedSnappyDecompressAsync (
296297 mgr_impl->d_in_chunk_ptrs .data ().get (), mgr_impl->d_in_chunk_sizes .data ().get (),
@@ -383,8 +384,11 @@ void DecompressSnappy(dh::CUDAStreamView stream, SnappyDecomprMgr const& mgr,
383384 auto n_bytes = thrust::reduce (cuctx->CTP (), out_sizes.cbegin (), out_sizes.cend ());
384385 auto n_total_bytes = p_out->size ();
385386 auto ratio = static_cast <double >(n_total_bytes) / in.size_bytes ();
386- LOG (DEBUG ) << " [snappy] Input: " << in.size_bytes () << " , need:" << n_bytes
387- << " allocated:" << n_total_bytes << " ratio:" << ratio;
387+ auto ratio_act = static_cast <double >(n_bytes) / in.size_bytes ();
388+ LOG (DEBUG ) << " [snappy] Input: " << common::HumanMemUnit (in.size_bytes ())
389+ << " , need:" << common::HumanMemUnit (n_bytes)
390+ << " , allocated:" << common::HumanMemUnit (n_total_bytes) << " , ratio:" << ratio
391+ << " , actual ratio:" << ratio_act;
388392
389393 /* *
390394 * Meta
@@ -470,10 +474,7 @@ SnappyDecomprMgr::~SnappyDecomprMgr() = default;
470474SnappyDecomprMgrImpl* SnappyDecomprMgr::Impl () const { return nullptr ; }
471475
472476[[nodiscard]] bool SnappyDecomprMgr::Empty () const { return true ; }
473- [[nodiscard]] std::size_t SnappyDecomprMgr::DecompressedBytes () const {
474- common::AssertNvCompSupport ();
475- return 0 ;
476- }
477+ [[nodiscard]] std::size_t SnappyDecomprMgr::DecompressedBytes () const { return 0 ; }
477478
478479// Round-trip compression
479480void DecompressSnappy (dh::CUDAStreamView, SnappyDecomprMgr const &,
@@ -482,15 +483,22 @@ void DecompressSnappy(dh::CUDAStreamView, SnappyDecomprMgr const&,
482483}
483484
484485[[nodiscard]] CuMemParams CompressSnappy (Context const *,
485- common::Span<common::CompressedByteT const >,
486+ common::Span<common::CompressedByteT const > in ,
486487 dh::DeviceUVector<std::uint8_t >*, std::size_t ) {
488+ if (in.empty ()) {
489+ return {};
490+ }
487491 common::AssertNvCompSupport ();
488492 return {};
489493}
490494
491495[[nodiscard]] common::RefResourceView<std::uint8_t > CoalesceCompressedBuffersToHost (
492- dh::CUDAStreamView, std::shared_ptr<HostPinnedMemPool>, CuMemParams const &,
496+ dh::CUDAStreamView, std::shared_ptr<HostPinnedMemPool>, CuMemParams const & in_params ,
493497 dh::DeviceUVector<std::uint8_t > const &, CuMemParams*) {
498+ std::size_t n_total_bytes = in_params.TotalSrcBytes ();
499+ if (n_total_bytes == 0 ) {
500+ return {};
501+ }
494502 common::AssertNvCompSupport ();
495503 return {};
496504}
0 commit comments