|
1 | | -// Copyright (c) 2020-2021, NVIDIA CORPORATION. |
| 1 | +// Copyright (c) 2021-2022, NVIDIA CORPORATION. |
2 | 2 | // |
3 | 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | 4 | // you may not use this file except in compliance with the License. |
|
12 | 12 | // See the License for the specific language governing permissions and |
13 | 13 | // limitations under the License. |
14 | 14 |
|
15 | | -#include "node_cudf/groupby.hpp" |
16 | | -#include "node_cudf/table.hpp" |
17 | | -#include "node_cudf/utilities/error.hpp" |
18 | | -#include "node_cudf/utilities/napi_to_cpp.hpp" |
| 15 | +#include <node_cudf/groupby.hpp> |
| 16 | +#include <node_cudf/table.hpp> |
| 17 | +#include <node_cudf/utilities/error.hpp> |
| 18 | +#include <node_cudf/utilities/napi_to_cpp.hpp> |
19 | 19 |
|
20 | 20 | #include <cudf/groupby.hpp> |
21 | 21 | #include <cudf/types.hpp> |
@@ -48,6 +48,8 @@ Napi::Function GroupBy::Init(Napi::Env const& env, Napi::Object exports) { |
48 | 48 | InstanceMethod<&GroupBy::sum>("_sum"), |
49 | 49 | InstanceMethod<&GroupBy::var>("_var"), |
50 | 50 | InstanceMethod<&GroupBy::quantile>("_quantile"), |
| 51 | + InstanceMethod<&GroupBy::collect_list>("_collect_list"), |
| 52 | + InstanceMethod<&GroupBy::collect_set>("_collect_set"), |
51 | 53 | }); |
52 | 54 | } |
53 | 55 |
|
@@ -111,168 +113,147 @@ Napi::Value GroupBy::get_groups(Napi::CallbackInfo const& info) { |
111 | 113 | } |
112 | 114 |
|
113 | 115 | Napi::Value GroupBy::argmax(Napi::CallbackInfo const& info) { |
114 | | - auto args = _get_basic_args(info); |
115 | | - auto values = args.first; |
116 | | - auto mr = args.second; |
| 116 | + auto [values, mr] = _get_basic_args(info); |
117 | 117 | return _single_aggregation( |
118 | | - [&]() { return cudf::make_argmax_aggregation<cudf::groupby_aggregation>(); }, values, mr, info); |
| 118 | + info, values, mr, []() { return cudf::make_argmax_aggregation<cudf::groupby_aggregation>(); }); |
119 | 119 | } |
120 | 120 |
|
121 | 121 | Napi::Value GroupBy::argmin(Napi::CallbackInfo const& info) { |
122 | | - auto args = _get_basic_args(info); |
123 | | - auto values = args.first; |
124 | | - auto mr = args.second; |
| 122 | + auto [values, mr] = _get_basic_args(info); |
125 | 123 | return _single_aggregation( |
126 | | - [&]() { return cudf::make_argmin_aggregation<cudf::groupby_aggregation>(); }, values, mr, info); |
| 124 | + info, values, mr, []() { return cudf::make_argmin_aggregation<cudf::groupby_aggregation>(); }); |
127 | 125 | } |
128 | 126 |
|
129 | 127 | Napi::Value GroupBy::count(Napi::CallbackInfo const& info) { |
130 | | - auto args = _get_basic_args(info); |
131 | | - auto values = args.first; |
132 | | - auto mr = args.second; |
| 128 | + auto [values, mr] = _get_basic_args(info); |
133 | 129 | return _single_aggregation( |
134 | | - [&]() { return cudf::make_count_aggregation<cudf::groupby_aggregation>(); }, values, mr, info); |
| 130 | + info, values, mr, []() { return cudf::make_count_aggregation<cudf::groupby_aggregation>(); }); |
135 | 131 | } |
136 | 132 |
|
137 | 133 | Napi::Value GroupBy::max(Napi::CallbackInfo const& info) { |
138 | | - auto args = _get_basic_args(info); |
139 | | - auto values = args.first; |
140 | | - auto mr = args.second; |
| 134 | + auto [values, mr] = _get_basic_args(info); |
141 | 135 | return _single_aggregation( |
142 | | - [&]() { return cudf::make_max_aggregation<cudf::groupby_aggregation>(); }, values, mr, info); |
| 136 | + info, values, mr, []() { return cudf::make_max_aggregation<cudf::groupby_aggregation>(); }); |
143 | 137 | } |
144 | 138 |
|
145 | 139 | Napi::Value GroupBy::mean(Napi::CallbackInfo const& info) { |
146 | | - auto args = _get_basic_args(info); |
147 | | - auto values = args.first; |
148 | | - auto mr = args.second; |
| 140 | + auto [values, mr] = _get_basic_args(info); |
149 | 141 | return _single_aggregation( |
150 | | - [&]() { return cudf::make_mean_aggregation<cudf::groupby_aggregation>(); }, values, mr, info); |
| 142 | + info, values, mr, []() { return cudf::make_mean_aggregation<cudf::groupby_aggregation>(); }); |
151 | 143 | } |
152 | 144 |
|
153 | 145 | Napi::Value GroupBy::median(Napi::CallbackInfo const& info) { |
154 | | - auto args = _get_basic_args(info); |
155 | | - auto values = args.first; |
156 | | - auto mr = args.second; |
| 146 | + auto [values, mr] = _get_basic_args(info); |
157 | 147 | return _single_aggregation( |
158 | | - [&]() { return cudf::make_median_aggregation<cudf::groupby_aggregation>(); }, values, mr, info); |
| 148 | + info, values, mr, []() { return cudf::make_median_aggregation<cudf::groupby_aggregation>(); }); |
159 | 149 | } |
160 | 150 |
|
161 | 151 | Napi::Value GroupBy::min(Napi::CallbackInfo const& info) { |
162 | | - auto args = _get_basic_args(info); |
163 | | - auto values = args.first; |
164 | | - auto mr = args.second; |
| 152 | + auto [values, mr] = _get_basic_args(info); |
165 | 153 | return _single_aggregation( |
166 | | - [&]() { return cudf::make_min_aggregation<cudf::groupby_aggregation>(); }, values, mr, info); |
| 154 | + info, values, mr, []() { return cudf::make_min_aggregation<cudf::groupby_aggregation>(); }); |
167 | 155 | } |
168 | 156 |
|
169 | 157 | Napi::Value GroupBy::nth(Napi::CallbackInfo const& info) { |
170 | 158 | CallbackArgs args{info}; |
171 | | - |
172 | | - cudf::size_type n = args[0]; |
173 | | - |
174 | | - auto values = args[1]; |
175 | | - NODE_CUDA_EXPECT(Table::IsInstance(values), |
176 | | - "aggregation expects options to have a 'values' table"); |
177 | | - nv::Table* values_table = Table::Unwrap(values.ToObject()); |
178 | | - |
179 | | - auto mr = MemoryResource::IsInstance(info[2]) ? *MemoryResource::Unwrap(info[2].ToObject()) |
180 | | - : rmm::mr::get_current_device_resource(); |
181 | | - |
182 | | - return _single_aggregation( |
183 | | - [&]() { return cudf::make_nth_element_aggregation<cudf::groupby_aggregation>(n); }, |
184 | | - values_table, |
185 | | - mr, |
186 | | - info); |
| 159 | + auto [values, mr] = _get_basic_args(info); |
| 160 | + cudf::size_type n = args[2]; |
| 161 | + auto include_nulls = |
| 162 | + info[3].ToBoolean() ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE; |
| 163 | + return _single_aggregation(info, values, mr, [&]() { |
| 164 | + return cudf::make_nth_element_aggregation<cudf::groupby_aggregation>(n, include_nulls); |
| 165 | + }); |
187 | 166 | } |
188 | 167 |
|
189 | 168 | Napi::Value GroupBy::nunique(Napi::CallbackInfo const& info) { |
190 | | - auto args = _get_basic_args(info); |
191 | | - auto values = args.first; |
192 | | - auto mr = args.second; |
193 | | - return _single_aggregation( |
194 | | - [&]() { return cudf::make_nunique_aggregation<cudf::groupby_aggregation>(); }, |
195 | | - values, |
196 | | - mr, |
197 | | - info); |
| 169 | + auto [values, mr] = _get_basic_args(info); |
| 170 | + auto include_nulls = |
| 171 | + info[3].ToBoolean() ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE; |
| 172 | + return _single_aggregation(info, values, mr, [&]() { |
| 173 | + return cudf::make_nunique_aggregation<cudf::groupby_aggregation>(include_nulls); |
| 174 | + }); |
198 | 175 | } |
199 | 176 |
|
200 | 177 | Napi::Value GroupBy::std(Napi::CallbackInfo const& info) { |
201 | | - auto args = _get_basic_args(info); |
202 | | - auto values = args.first; |
203 | | - auto mr = args.second; |
204 | | - return _single_aggregation( |
205 | | - [&]() { return cudf::make_std_aggregation<cudf::groupby_aggregation>(); }, values, mr, info); |
| 178 | + auto [values, mr] = _get_basic_args(info); |
| 179 | + cudf::size_type ddof = info[3].IsNumber() ? info[3].ToNumber() : 1; |
| 180 | + return _single_aggregation(info, values, mr, [&]() { |
| 181 | + return cudf::make_std_aggregation<cudf::groupby_aggregation>(ddof); |
| 182 | + }); |
206 | 183 | } |
207 | 184 |
|
208 | 185 | Napi::Value GroupBy::sum(Napi::CallbackInfo const& info) { |
209 | | - auto args = _get_basic_args(info); |
210 | | - auto values = args.first; |
211 | | - auto mr = args.second; |
| 186 | + auto [values, mr] = _get_basic_args(info); |
212 | 187 | return _single_aggregation( |
213 | | - [&]() { return cudf::make_sum_aggregation<cudf::groupby_aggregation>(); }, values, mr, info); |
| 188 | + info, values, mr, []() { return cudf::make_sum_aggregation<cudf::groupby_aggregation>(); }); |
214 | 189 | } |
215 | 190 |
|
216 | 191 | Napi::Value GroupBy::var(Napi::CallbackInfo const& info) { |
217 | | - auto args = _get_basic_args(info); |
218 | | - auto values = args.first; |
219 | | - auto mr = args.second; |
220 | | - return _single_aggregation( |
221 | | - [&]() { return cudf::make_variance_aggregation<cudf::groupby_aggregation>(); }, |
222 | | - values, |
223 | | - mr, |
224 | | - info); |
| 192 | + auto [values, mr] = _get_basic_args(info); |
| 193 | + cudf::size_type ddof = info[3].IsNumber() ? info[3].ToNumber() : 1; |
| 194 | + return _single_aggregation(info, values, mr, [&]() { |
| 195 | + return cudf::make_variance_aggregation<cudf::groupby_aggregation>(ddof); |
| 196 | + }); |
225 | 197 | } |
226 | 198 |
|
227 | 199 | Napi::Value GroupBy::quantile(Napi::CallbackInfo const& info) { |
228 | 200 | CallbackArgs args{info}; |
| 201 | + auto [values, mr] = _get_basic_args(info); |
| 202 | + std::vector<double> quantiles{args[2]}; |
| 203 | + cudf::interpolation interp = args[3]; |
| 204 | + return _single_aggregation(info, values, mr, [&]() { |
| 205 | + return cudf::make_quantile_aggregation<cudf::groupby_aggregation>(quantiles, interp); |
| 206 | + }); |
| 207 | +} |
229 | 208 |
|
230 | | - double q = args[0]; |
231 | | - std::vector<double> qs{q}; |
232 | | - |
233 | | - auto values = args[1]; |
234 | | - NODE_CUDA_EXPECT(Table::IsInstance(values), |
235 | | - "GroupBy quantile_agg expects options to have a 'values' table"); |
236 | | - nv::Table* values_table = Table::Unwrap(values.ToObject()); |
237 | | - |
238 | | - cudf::interpolation interpolation = args[2]; |
239 | | - |
240 | | - auto mr = MemoryResource::IsInstance(info[3]) ? *MemoryResource::Unwrap(info[3].ToObject()) |
241 | | - : rmm::mr::get_current_device_resource(); |
| 209 | +Napi::Value GroupBy::collect_list(Napi::CallbackInfo const& info) { |
| 210 | + auto [values, mr] = _get_basic_args(info); |
| 211 | + auto include_nulls = |
| 212 | + info[2].ToBoolean() ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE; |
| 213 | + return _single_aggregation(info, values, mr, [&]() { |
| 214 | + return cudf::make_collect_list_aggregation<cudf::groupby_aggregation>(include_nulls); |
| 215 | + }); |
| 216 | +} |
242 | 217 |
|
243 | | - return _single_aggregation( |
244 | | - [&]() { return cudf::make_quantile_aggregation<cudf::groupby_aggregation>(qs, interpolation); }, |
245 | | - values_table, |
246 | | - mr, |
247 | | - info); |
| 218 | +Napi::Value GroupBy::collect_set(Napi::CallbackInfo const& info) { |
| 219 | + auto [values, mr] = _get_basic_args(info); |
| 220 | + auto include_nulls = |
| 221 | + info[2].ToBoolean() ? cudf::null_policy::INCLUDE : cudf::null_policy::EXCLUDE; |
| 222 | + auto nulls_equal = |
| 223 | + info[3].ToBoolean() ? cudf::null_equality::EQUAL : cudf::null_equality::UNEQUAL; |
| 224 | + auto nans_equal = |
| 225 | + info[4].ToBoolean() ? cudf::nan_equality::UNEQUAL : cudf::nan_equality::ALL_EQUAL; |
| 226 | + return _single_aggregation(info, values, mr, [&]() { |
| 227 | + return cudf::make_collect_set_aggregation<cudf::groupby_aggregation>( |
| 228 | + include_nulls, nulls_equal, nans_equal); |
| 229 | + }); |
248 | 230 | } |
249 | 231 |
|
250 | | -std::pair<nv::Table*, rmm::mr::device_memory_resource*> GroupBy::_get_basic_args( |
| 232 | +std::pair<Table::wrapper_t, rmm::mr::device_memory_resource*> GroupBy::_get_basic_args( |
251 | 233 | Napi::CallbackInfo const& info) { |
252 | 234 | CallbackArgs args{info}; |
253 | 235 |
|
254 | 236 | auto values = args[0]; |
255 | 237 | NODE_CUDA_EXPECT(Table::IsInstance(values), "aggregation expects to have a 'values' table"); |
256 | 238 |
|
257 | | - rmm::mr::device_memory_resource* mr = args[1]; |
258 | | - |
259 | | - return std::pair<Table*, rmm::mr::device_memory_resource*>(Table::Unwrap(values.ToObject()), mr); |
| 239 | + return std::make_pair(values.ToObject(), args[1]); |
260 | 240 | } |
261 | 241 |
|
262 | 242 | template <typename MakeAggregation> |
263 | | -Napi::Value GroupBy::_single_aggregation(MakeAggregation const& make_aggregation, |
264 | | - const nv::Table* const values_table, |
| 243 | +Napi::Value GroupBy::_single_aggregation(Napi::CallbackInfo const& info, |
| 244 | + Table::wrapper_t const& values_table, |
265 | 245 | rmm::mr::device_memory_resource* const mr, |
266 | | - Napi::CallbackInfo const& info) { |
| 246 | + MakeAggregation const& make_aggregation) { |
267 | 247 | auto env = info.Env(); |
268 | 248 |
|
269 | 249 | std::vector<cudf::groupby::aggregation_request> requests; |
| 250 | + requests.reserve(values_table->num_columns()); |
270 | 251 |
|
271 | 252 | for (cudf::size_type i = 0; i < values_table->num_columns(); ++i) { |
272 | 253 | auto request = cudf::groupby::aggregation_request(); |
273 | 254 | request.values = values_table->get_column(i).view(); |
274 | 255 | request.aggregations.push_back(std::move(make_aggregation())); |
275 | | - requests.emplace_back(std::move(request)); |
| 256 | + requests.push_back(std::move(request)); |
276 | 257 | } |
277 | 258 |
|
278 | 259 | std::pair<std::unique_ptr<cudf::table>, std::vector<cudf::groupby::aggregation_result>> result; |
|
0 commit comments