|
8 | 8 | #include <cudf/ast/expressions.hpp> |
9 | 9 | #include <cudf/concatenate.hpp> |
10 | 10 | #include <cudf/detail/nvtx/ranges.hpp> |
| 11 | +#include <cudf/detail/utilities/integer_utils.hpp> |
11 | 12 | #include <cudf/io/parquet.hpp> |
12 | 13 | #include <cudf/io/text/byte_range_info.hpp> |
13 | 14 | #include <cudf/join/filtered_join.hpp> |
|
19 | 20 | #include <rmm/mr/owning_wrapper.hpp> |
20 | 21 | #include <rmm/mr/pool_memory_resource.hpp> |
21 | 22 |
|
| 23 | +#include <numeric> |
22 | 24 | #include <string> |
23 | 25 | #include <vector> |
24 | 26 |
|
|
27 | 29 | * @brief Definitions for utilities for `hybrid_scan_io` example |
28 | 30 | */ |
29 | 31 |
|
| 32 | +bool get_boolean(std::string input) |
| 33 | +{ |
| 34 | + std::transform(input.begin(), input.end(), input.begin(), ::toupper); |
| 35 | + |
| 36 | + // Check if the input string matches to any of the following |
| 37 | + return input == "ON" or input == "TRUE" or input == "YES" or input == "Y" or input == "T"; |
| 38 | +} |
| 39 | + |
30 | 40 | std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(bool is_pool_used) |
31 | 41 | { |
32 | 42 | if (is_pool_used) { |
@@ -91,68 +101,130 @@ void check_tables_equal(cudf::table_view const& lhs_table, |
91 | 101 | } |
92 | 102 | } |
93 | 103 |
|
94 | | -cudf::host_span<uint8_t const> fetch_footer_bytes(cudf::host_span<uint8_t const> buffer) |
| 104 | +std::unique_ptr<cudf::io::datasource::buffer> fetch_footer_bytes(cudf::io::datasource& datasource) |
95 | 105 | { |
96 | 106 | CUDF_FUNC_RANGE(); |
97 | 107 |
|
98 | 108 | using namespace cudf::io::parquet; |
99 | 109 |
|
100 | 110 | constexpr auto header_len = sizeof(file_header_s); |
101 | 111 | constexpr auto ender_len = sizeof(file_ender_s); |
102 | | - size_t const len = buffer.size(); |
| 112 | + size_t const len = datasource.size(); |
103 | 113 |
|
104 | | - auto const header_buffer = cudf::host_span<uint8_t const>(buffer.data(), header_len); |
105 | | - auto const header = reinterpret_cast<file_header_s const*>(header_buffer.data()); |
106 | | - auto const ender_buffer = |
107 | | - cudf::host_span<uint8_t const>(buffer.data() + len - ender_len, ender_len); |
108 | | - auto const ender = reinterpret_cast<file_ender_s const*>(ender_buffer.data()); |
| 114 | + auto header_buffer = datasource.host_read(0, header_len); |
| 115 | + auto const header = reinterpret_cast<file_header_s const*>(header_buffer->data()); |
| 116 | + auto ender_buffer = datasource.host_read(len - ender_len, ender_len); |
| 117 | + auto const ender = reinterpret_cast<file_ender_s const*>(ender_buffer->data()); |
109 | 118 | CUDF_EXPECTS(len > header_len + ender_len, "Incorrect data source"); |
110 | 119 | constexpr uint32_t parquet_magic = (('P' << 0) | ('A' << 8) | ('R' << 16) | ('1' << 24)); |
111 | 120 | CUDF_EXPECTS(header->magic == parquet_magic && ender->magic == parquet_magic, |
112 | 121 | "Corrupted header or footer"); |
113 | 122 | CUDF_EXPECTS(ender->footer_len != 0 && ender->footer_len <= (len - header_len - ender_len), |
114 | 123 | "Incorrect footer length"); |
115 | 124 |
|
116 | | - return cudf::host_span<uint8_t const>(buffer.data() + len - ender->footer_len - ender_len, |
117 | | - ender->footer_len); |
| 125 | + return datasource.host_read(len - ender->footer_len - ender_len, ender->footer_len); |
118 | 126 | } |
119 | 127 |
|
120 | | -cudf::host_span<uint8_t const> fetch_page_index_bytes( |
121 | | - cudf::host_span<uint8_t const> buffer, cudf::io::text::byte_range_info const page_index_bytes) |
| 128 | +std::unique_ptr<cudf::io::datasource::buffer> fetch_page_index_bytes( |
| 129 | + cudf::io::datasource& datasource, cudf::io::text::byte_range_info const page_index_bytes) |
122 | 130 | { |
123 | | - return cudf::host_span<uint8_t const>( |
124 | | - reinterpret_cast<uint8_t const*>(buffer.data()) + page_index_bytes.offset(), |
125 | | - page_index_bytes.size()); |
| 131 | + return datasource.host_read(page_index_bytes.offset(), page_index_bytes.size()); |
126 | 132 | } |
127 | 133 |
|
128 | | -std::vector<rmm::device_buffer> fetch_byte_ranges( |
129 | | - cudf::host_span<uint8_t const> host_buffer, |
130 | | - cudf::host_span<cudf::io::text::byte_range_info const> byte_ranges, |
131 | | - rmm::cuda_stream_view stream, |
132 | | - rmm::device_async_resource_ref mr) |
| 134 | +cudf::host_span<uint8_t const> make_host_span( |
| 135 | + std::reference_wrapper<cudf::io::datasource::buffer const> buffer) |
133 | 136 | { |
134 | | - CUDF_FUNC_RANGE(); |
| 137 | + return cudf::host_span<uint8_t const>{static_cast<uint8_t const*>(buffer.get().data()), |
| 138 | + buffer.get().size()}; |
| 139 | +} |
135 | 140 |
|
| 141 | +std::tuple<std::vector<rmm::device_buffer>, |
| 142 | + std::vector<cudf::device_span<uint8_t const>>, |
| 143 | + std::future<void>> |
| 144 | +fetch_byte_ranges(cudf::io::datasource& datasource, |
| 145 | + cudf::host_span<cudf::io::text::byte_range_info const> byte_ranges, |
| 146 | + rmm::cuda_stream_view stream, |
| 147 | + rmm::device_async_resource_ref mr) |
| 148 | +{ |
136 | 149 | static std::mutex mutex; |
137 | 150 |
|
138 | | - std::vector<rmm::device_buffer> buffers(byte_ranges.size()); |
| 151 | + // Allocate device spans for each column chunk |
| 152 | + std::vector<cudf::device_span<uint8_t const>> column_chunk_data{}; |
| 153 | + column_chunk_data.reserve(byte_ranges.size()); |
| 154 | + |
| 155 | + auto total_size = std::accumulate( |
| 156 | + byte_ranges.begin(), byte_ranges.end(), std::size_t{0}, [&](auto acc, auto const& range) { |
| 157 | + return acc + range.size(); |
| 158 | + }); |
| 159 | + |
| 160 | + // Allocate single device buffer for all column chunks |
| 161 | + std::vector<rmm::device_buffer> column_chunk_buffers{}; |
| 162 | + column_chunk_buffers.emplace_back(total_size, stream, mr); |
| 163 | + auto buffer_data = static_cast<uint8_t*>(column_chunk_buffers.back().data()); |
| 164 | + std::ignore = std::accumulate( |
| 165 | + byte_ranges.begin(), byte_ranges.end(), std::size_t{0}, [&](auto acc, auto const& range) { |
| 166 | + column_chunk_data.emplace_back(buffer_data + acc, static_cast<size_t>(range.size())); |
| 167 | + return acc + range.size(); |
| 168 | + }); |
| 169 | + |
| 170 | + std::vector<std::future<size_t>> device_read_tasks{}; |
| 171 | + std::vector<std::future<size_t>> host_read_tasks{}; |
| 172 | + device_read_tasks.reserve(byte_ranges.size()); |
| 173 | + host_read_tasks.reserve(byte_ranges.size()); |
139 | 174 | { |
140 | 175 | std::lock_guard<std::mutex> lock(mutex); |
141 | 176 |
|
142 | | - std::transform( |
143 | | - byte_ranges.begin(), byte_ranges.end(), buffers.begin(), [&](auto const& byte_range) { |
144 | | - auto const chunk_offset = host_buffer.data() + byte_range.offset(); |
145 | | - auto const chunk_size = static_cast<size_t>(byte_range.size()); |
146 | | - auto buffer = rmm::device_buffer(chunk_size, stream, mr); |
147 | | - cudf::detail::cuda_memcpy_async( |
148 | | - cudf::device_span<uint8_t>{static_cast<uint8_t*>(buffer.data()), chunk_size}, |
149 | | - cudf::host_span<uint8_t const>{chunk_offset, chunk_size}, |
150 | | - stream); |
151 | | - return buffer; |
152 | | - }); |
| 177 | + for (size_t chunk = 0; chunk < byte_ranges.size();) { |
| 178 | + auto const io_offset = static_cast<size_t>(byte_ranges[chunk].offset()); |
| 179 | + auto io_size = static_cast<size_t>(byte_ranges[chunk].size()); |
| 180 | + size_t next_chunk = chunk + 1; |
| 181 | + while (next_chunk < byte_ranges.size()) { |
| 182 | + size_t const next_offset = byte_ranges[next_chunk].offset(); |
| 183 | + if (next_offset != io_offset + io_size) { break; } |
| 184 | + io_size += byte_ranges[next_chunk].size(); |
| 185 | + next_chunk++; |
| 186 | + } |
| 187 | + |
| 188 | + if (io_size != 0) { |
| 189 | + auto dest = const_cast<uint8_t*>(column_chunk_data[chunk].data()); |
| 190 | + // Directly read the column chunk data to the device |
| 191 | + // buffer if supported |
| 192 | + if (datasource.supports_device_read() and datasource.is_device_read_preferred(io_size)) { |
| 193 | + device_read_tasks.emplace_back( |
| 194 | + datasource.device_read_async(io_offset, io_size, dest, stream)); |
| 195 | + } else { |
| 196 | + // Read the column chunk data to the host buffer and |
| 197 | + // copy it to the device buffer |
| 198 | + host_read_tasks.emplace_back( |
| 199 | + std::async(std::launch::deferred, [&datasource, io_offset, io_size, dest, stream]() { |
| 200 | + auto host_buffer = datasource.host_read(io_offset, io_size); |
| 201 | + cudf::detail::cuda_memcpy_async( |
| 202 | + cudf::device_span<uint8_t>{dest, io_size}, |
| 203 | + cudf::host_span<uint8_t const>{host_buffer->data(), io_size}, |
| 204 | + stream); |
| 205 | + return io_size; |
| 206 | + })); |
| 207 | + } |
| 208 | + } |
| 209 | + chunk = next_chunk; |
| 210 | + } |
153 | 211 | } |
154 | 212 |
|
155 | | - return buffers; |
| 213 | + auto sync_function = [](decltype(host_read_tasks) host_read_tasks, |
| 214 | + decltype(device_read_tasks) device_read_tasks) { |
| 215 | + for (auto& task : host_read_tasks) { |
| 216 | + task.get(); |
| 217 | + } |
| 218 | + for (auto& task : device_read_tasks) { |
| 219 | + task.get(); |
| 220 | + } |
| 221 | + }; |
| 222 | + return {std::move(column_chunk_buffers), |
| 223 | + std::move(column_chunk_data), |
| 224 | + std::async(std::launch::deferred, |
| 225 | + sync_function, |
| 226 | + std::move(host_read_tasks), |
| 227 | + std::move(device_read_tasks))}; |
156 | 228 | } |
157 | 229 |
|
158 | 230 | std::unique_ptr<cudf::table> concatenate_tables(std::vector<std::unique_ptr<cudf::table>> tables, |
|
0 commit comments