Skip to content

Commit dd1459e

Browse files
authored
[FEA] Add Graph spectral clustering methods (#369)
1 parent db82604 commit dd1459e

33 files changed

Lines changed: 978 additions & 450 deletions

modules/core/include/nv_node/utilities/napi_to_cpp.hpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,17 +42,23 @@ struct NapiToCPP {
4242
inline bool Has(Napi::Value key) const { return val.Has(key); }
4343
inline bool Has(const char* key) const { return val.Has(key); }
4444
inline bool Has(const std::string& key) const { return val.Has(key); }
45-
inline NapiToCPP Get(napi_value key) const {
46-
return Has(key) ? val.Get(key) : Env().Undefined();
45+
inline NapiToCPP Get(napi_value key) const { return GetOrDefault(key, Env().Undefined()); }
46+
inline NapiToCPP Get(Napi::Value key) const { return GetOrDefault(key, Env().Undefined()); }
47+
inline NapiToCPP Get(const char* key) const { return GetOrDefault(key, Env().Undefined()); }
48+
inline NapiToCPP Get(std::string const& key) const {
49+
return GetOrDefault(key, Env().Undefined());
4750
}
48-
inline NapiToCPP Get(Napi::Value key) const {
49-
return Has(key) ? val.Get(key) : Env().Undefined();
51+
inline NapiToCPP GetOrDefault(napi_value key, Napi::Value const& default_val) const {
52+
return Has(key) ? val.Get(key) : default_val;
5053
}
51-
inline NapiToCPP Get(const char* key) const {
52-
return Has(key) ? val.Get(key) : Env().Undefined();
54+
inline NapiToCPP GetOrDefault(Napi::Value key, Napi::Value const& default_val) const {
55+
return Has(key) ? val.Get(key) : default_val;
5356
}
54-
inline NapiToCPP Get(std::string const& key) const {
55-
return Has(key) ? val.Get(key) : Env().Undefined();
57+
inline NapiToCPP GetOrDefault(const char* key, Napi::Value const& default_val) const {
58+
return Has(key) ? val.Get(key) : default_val;
59+
}
60+
inline NapiToCPP GetOrDefault(std::string const& key, Napi::Value const& default_val) const {
61+
return Has(key) ? val.Get(key) : default_val;
5662
}
5763
};
5864

modules/cudf/src/column.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ Napi::Function Column::Init(Napi::Env const& env, Napi::Object exports) {
8585
// column/filling.cpp
8686
StaticMethod<&Column::sequence>("sequence"),
8787
// column/transform.cpp
88+
InstanceMethod<&Column::bools_to_mask>("boolsToMask"),
8889
InstanceMethod<&Column::nans_to_nulls>("nansToNulls"),
8990
// column/reduction.cpp
9091
InstanceMethod<&Column::min>("min"),
@@ -211,6 +212,8 @@ Column::Column(CallbackArgs const& args) : EnvLocalObjectWrap<Column>(args) {
211212
: props.Has("nullMask") ? props.Get("nullMask").As<Napi::Value>()
212213
: env.Null();
213214

215+
auto has_length = props.Has("length") && props.Get("length").IsNumber();
216+
214217
switch (type().id()) {
215218
case cudf::type_id::INT8:
216219
case cudf::type_id::INT16:
@@ -229,7 +232,10 @@ Column::Column(CallbackArgs const& args) : EnvLocalObjectWrap<Column>(args) {
229232
case cudf::type_id::TIMESTAMP_MICROSECONDS:
230233
case cudf::type_id::TIMESTAMP_NANOSECONDS: {
231234
data_ = Napi::Persistent(data_to_devicebuffer(env, props.Get("data"), type()));
232-
size_ = std::max(0, cudf::size_type(data_.Value()->size() / cudf::size_of(type())) - offset_);
235+
size_ =
236+
has_length
237+
? props.Get("length")
238+
: std::max(0, cudf::size_type(data_.Value()->size() / cudf::size_of(type())) - offset_);
233239
null_mask_ =
234240
Napi::Persistent(mask.IsNull() ? data_to_null_bitmask(env, props.Get("data"), size_)
235241
: mask_to_null_bitmask(env, mask, size_));
@@ -245,7 +251,8 @@ Column::Column(CallbackArgs const& args) : EnvLocalObjectWrap<Column>(args) {
245251
}
246252
}(props.Get("children").As<Napi::Array>());
247253
data_ = Napi::Persistent(DeviceBuffer::New(env));
248-
size_ = std::max(0, (num_children() > 0 ? child(0)->size() - 1 : 0) - offset_);
254+
size_ = has_length ? props.Get("length")
255+
: std::max(0, (num_children() > 0 ? child(0)->size() - 1 : 0) - offset_);
249256
null_mask_ = Napi::Persistent(mask_to_null_bitmask(env, mask, size_));
250257
break;
251258
}
@@ -258,10 +265,11 @@ Column::Column(CallbackArgs const& args) : EnvLocalObjectWrap<Column>(args) {
258265
}(props.Get("children").As<Napi::Array>());
259266
data_ = Napi::Persistent(DeviceBuffer::New(env));
260267
if (num_children() > 0) {
261-
size_ = std::max(0, child(0)->size() - offset_);
268+
size_ = has_length ? props.Get("length") : std::max(0, child(0)->size() - offset_);
262269
for (cudf::size_type i = 0; ++i < num_children();) {
263-
NODE_CUDF_EXPECT(
264-
child(i)->size() == size_, "Struct column children must be the same size", env);
270+
NODE_CUDF_EXPECT((child(i)->size() - offset_) == size_,
271+
"Struct column children must be the same size",
272+
env);
265273
}
266274
}
267275
null_mask_ = Napi::Persistent(mask_to_null_bitmask(env, mask, size_));
@@ -270,7 +278,7 @@ Column::Column(CallbackArgs const& args) : EnvLocalObjectWrap<Column>(args) {
270278
default: break;
271279
}
272280

273-
size_ = props.Has("length") ? props.Get("length") : size_;
281+
// size_ = props.Has("length") ? props.Get("length") : size_;
274282

275283
set_null_count([&]() -> cudf::size_type {
276284
if (!nullable()) { return 0; }

modules/cudf/src/column.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1249,13 +1249,20 @@ export interface Column<T extends DataType = any> {
12491249
*/
12501250
dropNans(memoryResource?: MemoryResource): Column<T>;
12511251

1252+
/**
1253+
* Compress the data from a Bool8 Column to bits and return a Buffer
1254+
*
1255+
* @param memoryResource The optional MemoryResource used to allocate the result column's device
1256+
* memory.
1257+
*/
1258+
boolsToMask(memoryResource?: MemoryResource): [DeviceBuffer, number];
1259+
12521260
/**
12531261
* convert NaN values in the column with Null values,
12541262
* while also updating the nullMask and nullCount values
12551263
*
12561264
* @param memoryResource The optional MemoryResource used to allocate the result column's device
12571265
* memory.
1258-
* @returns undefined if inplace=True, else updated column with Null values
12591266
*/
12601267
nansToNulls(memoryResource?: MemoryResource): Column<T>;
12611268

modules/cudf/src/column/transform.cpp

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,38 @@
1313
// limitations under the License.
1414

1515
#include <node_cudf/column.hpp>
16+
1617
#include <node_rmm/device_buffer.hpp>
1718

18-
#include <napi.h>
1919
#include <cudf/column/column.hpp>
2020
#include <cudf/column/column_view.hpp>
2121
#include <cudf/null_mask.hpp>
2222
#include <cudf/table/table_view.hpp>
2323
#include <cudf/transform.hpp>
2424
#include <cudf/types.hpp>
25+
2526
#include <rmm/device_buffer.hpp>
2627

28+
#include <napi.h>
29+
2730
namespace nv {
2831

32+
std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> Column::bools_to_mask(
33+
rmm::mr::device_memory_resource* mr) const {
34+
try {
35+
return cudf::bools_to_mask(*this, mr);
36+
} catch (std::exception const& e) { NAPI_THROW(Napi::Error::New(Env(), e.what())); }
37+
}
38+
39+
Napi::Value Column::bools_to_mask(Napi::CallbackInfo const& info) {
40+
rmm::mr::device_memory_resource* mr = CallbackArgs{info}[0];
41+
auto result = bools_to_mask(mr);
42+
auto ary = Napi::Array::New(Env(), 2);
43+
ary.Set(0u, DeviceBuffer::New(Env(), std::move(result.first)));
44+
ary.Set(1u, result.second);
45+
return ary;
46+
}
47+
2948
std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> Column::nans_to_nulls(
3049
rmm::mr::device_memory_resource* mr) const {
3150
try {

modules/cudf/src/column_accessor.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,12 @@ export class ColumnAccessor<T extends TypeMap = any> {
6161

6262
addColumns<R extends TypeMap>(data: ColumnsMap<R>|ColumnAccessor<R>) {
6363
data = (data instanceof ColumnAccessor) ? data._data : data;
64-
return new ColumnAccessor({...this._data, ...data} as ColumnsMap<T&R>);
64+
return new ColumnAccessor(
65+
{...this._data, ...data} as ColumnsMap<{
66+
[P in keyof(T & R)]: P extends keyof R ? R[P] //
67+
: P extends keyof T ? T[P] //
68+
: never
69+
}>);
6570
}
6671

6772
dropColumns<R extends keyof T>(names: readonly R[]) {
@@ -70,7 +75,7 @@ export class ColumnAccessor<T extends TypeMap = any> {
7075
for (const name of this.names) {
7176
if (!(name in namesMap)) { data[name] = this._data[name]; }
7277
}
73-
return new ColumnAccessor<Omit<T, R>>(data);
78+
return new ColumnAccessor(data as ColumnsMap<{[P in Exclude<keyof T, R>]: T[P]}>);
7479
}
7580

7681
selectByColumnName<R extends keyof T>(name: R) { return this.selectByColumnNames([name]); }

modules/cudf/src/data_frame.ts

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,8 @@ export class DataFrame<T extends TypeMap = any> {
312312
/**
313313
* Return a new DataFrame with new columns added.
314314
*
315-
* @param data mapping of names to new columns to add
315+
* @param {SeriesMap<R>|DataFrame<R>} data mapping of names to new columns to add, or a GPU
316+
* DataFrame object
316317
*
317318
* @example
318319
* ```typescript
@@ -323,13 +324,6 @@ export class DataFrame<T extends TypeMap = any> {
323324
* df.assign({b: Series.new(["foo", "bar", "bar"])})
324325
* // returns df {a: [1, 2, 3], b: ["foo", "bar", "bar"]}
325326
* ```
326-
*/
327-
assign<R extends TypeMap>(data: SeriesMap<R>): DataFrame<Omit<T, keyof R&string>&R>;
328-
329-
/**
330-
* Return a new DataFrame with new columns added.
331-
*
332-
* @param data a GPU DataFrame object
333327
*
334328
* @example
335329
* ```typescript
@@ -341,8 +335,6 @@ export class DataFrame<T extends TypeMap = any> {
341335
* df.assign(df1) // returns df {a: [1, 2, 3], b: ["foo", "bar", "bar"]}
342336
* ```
343337
*/
344-
assign<R extends TypeMap>(data: DataFrame<R>): DataFrame<Omit<T, keyof R&string>&R>;
345-
346338
assign<R extends TypeMap>(data: SeriesMap<R>|DataFrame<R>) {
347339
const columns = (data instanceof DataFrame) ? data._accessor : _seriesToColumns(data);
348340
return new DataFrame(this._accessor.addColumns(columns));

modules/cudf/src/node_cudf/column.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -686,10 +686,22 @@ struct Column : public EnvLocalObjectWrap<Column> {
686686
return sequence(env, size, init->operator cudf::scalar&(), step->operator cudf::scalar&(), mr);
687687
}
688688

689+
inline static Column::wrapper_t zeros(
690+
Napi::Env const& env,
691+
cudf::type_id type,
692+
cudf::size_type size,
693+
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) {
694+
auto zero = Scalar::New(env, Napi::Number::New(env, 0), cudf::data_type{type});
695+
return sequence(env, size, zero->operator cudf::scalar&(), zero->operator cudf::scalar&(), mr);
696+
}
697+
689698
// column/transform.cpp
690699
std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> nans_to_nulls(
691700
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
692701

702+
std::pair<std::unique_ptr<rmm::device_buffer>, cudf::size_type> bools_to_mask(
703+
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource()) const;
704+
693705
// column/copying.cpp
694706
Column::wrapper_t gather(
695707
Column const& gather_map,
@@ -909,6 +921,7 @@ struct Column : public EnvLocalObjectWrap<Column> {
909921
static Napi::Value sequence(Napi::CallbackInfo const& info);
910922

911923
// column/transform.cpp
924+
Napi::Value bools_to_mask(Napi::CallbackInfo const& info);
912925
Napi::Value nans_to_nulls(Napi::CallbackInfo const& info);
913926

914927
// column/reductions.cpp

modules/cudf/src/series.ts

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,7 @@ export class AbstractSeries<T extends DataType = any> {
948948
* For dictionary columns, the keys column component is copied and not trimmed if the gather
949949
* results in abandoned key elements.
950950
*
951-
* @param selection A Series of 8/16/32-bit signed or unsigned integer indices to gather.
951+
* @param indices A Series of 8/16/32-bit signed or unsigned integer indices to gather.
952952
* @param nullify_out_of_bounds If `true`, coerce rows that corresponds to out-of-bounds indices
953953
* in the selection to null. If `false`, skips all bounds checking for selection values. Pass
954954
* false if you are certain that the selection contains only valid indices for better
@@ -970,11 +970,11 @@ export class AbstractSeries<T extends DataType = any> {
970970
* c.gather(selection) // Bool8Series [true, true]
971971
* ```
972972
*/
973-
gather<R extends IndexType>(selection: Series<R>,
974-
nullify_out_of_bounds = false,
975-
memoryResource?: MemoryResource): Series<T> {
976-
return this.__construct(
977-
this._col.gather(selection._col, nullify_out_of_bounds, memoryResource));
973+
gather(indices: Series<IndexType>|number[],
974+
nullify_out_of_bounds = false,
975+
memoryResource?: MemoryResource): Series<T> {
976+
const map = Array.isArray(indices) ? Series.new(indices).cast(new Uint32) : indices;
977+
return this.__construct(this._col.gather(map._col, nullify_out_of_bounds, memoryResource));
978978
}
979979

980980
/**
@@ -1109,7 +1109,7 @@ export class AbstractSeries<T extends DataType = any> {
11091109
* ```
11101110
*/
11111111
scatter(value: T['scalarType'],
1112-
indices: Series<Int32>|number[],
1112+
indices: Series<IndexType>|number[],
11131113
check_bounds?: boolean,
11141114
memoryResource?: MemoryResource): Series<T>;
11151115
/**
@@ -1135,24 +1135,24 @@ export class AbstractSeries<T extends DataType = any> {
11351135
* ```
11361136
*/
11371137
scatter(values: Series<T>,
1138-
indices: Series<Int32>|number[],
1138+
indices: Series<IndexType>|number[],
11391139
check_bounds?: boolean,
11401140
memoryResource?: MemoryResource): Series<T>;
11411141

11421142
scatter(source: Series<T>|T['scalarType'],
1143-
indices: Series<Int32>|number[],
1143+
indices: Series<IndexType>|number[],
11441144
check_bounds = false,
11451145
memoryResource?: MemoryResource): Series<T> {
11461146
const dst = new Table({columns: [this._col]});
1147-
const idx = Series.new(indices).cast(new Int32)._col;
1147+
const map = Array.isArray(indices) ? Series.new(indices).cast(new Uint32) : indices;
11481148
if (source instanceof Series) {
11491149
const src = new Table({columns: [source.cast(this.type)._col]});
11501150
return this.__construct(
1151-
dst.scatterTable(src, idx, check_bounds, memoryResource).getColumnByIndex(0));
1151+
dst.scatterTable(src, map._col, check_bounds, memoryResource).getColumnByIndex(0));
11521152
}
11531153
const src = [new Scalar({type: this.type, value: source})];
11541154
return this.__construct(
1155-
dst.scatterScalar(src, idx, check_bounds, memoryResource).getColumnByIndex(0));
1155+
dst.scatterScalar(src, map._col, check_bounds, memoryResource).getColumnByIndex(0));
11561156
}
11571157

11581158
/**
@@ -1668,7 +1668,6 @@ function asColumn<T extends DataType>(value: any) {
16681668
if (Array.isArray(data)) {
16691669
return fromArrow<T>(arrow.Vector.from({
16701670
highWaterMark: Infinity,
1671-
nullValues: [undefined, null, NaN],
16721671
type: value.type ?? inferType(data),
16731672
// Slice `offset` from the Array before converting so
16741673
// we don't write unnecessary values with the Arrow builders.
@@ -1677,12 +1676,14 @@ function asColumn<T extends DataType>(value: any) {
16771676
}
16781677

16791678
// If `data.buffer` is a ArrayBuffer, copy it to a DeviceBuffer
1680-
if (data.buffer instanceof ArrayBuffer) {
1679+
if (ArrayBuffer.isView(value) || (data.buffer instanceof ArrayBuffer)) {
1680+
if (typeof data.length === 'number') { value.length = data.length; }
16811681
data = new DeviceBuffer(typeof offset !== 'number' ? data : data.subarray(offset));
16821682
offset = 0;
16831683
}
16841684
// If `data.buffer` is a DeviceBuffer, propagate its `byteOffset` to ColumnProps
16851685
else if (data.buffer instanceof DeviceBuffer) {
1686+
if (typeof data.length === 'number') { value.length = data.length; }
16861687
offset =
16871688
(typeof offset !== 'number' ? 0 : offset) + (data.byteOffset / data.BYTES_PER_ELEMENT);
16881689
}

0 commit comments

Comments
 (0)