Skip to content

Commit 73f04d9

Browse files
author
Rafał Hibner
committed
Count as last struct element
1 parent 6207751 commit 73f04d9

2 files changed

Lines changed: 42 additions & 41 deletions

File tree

cpp/src/arrow/compute/kernels/aggregate_tdigest.cc

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,13 @@ struct TDigestBaseImpl : public ScalarAggregator {
3636
explicit TDigestBaseImpl(std::unique_ptr<TDigest::Scaler> scaler, uint32_t buffer_size)
3737
: tdigest{std::move(scaler), buffer_size}, count{0}, all_valid{true} {
3838
auto output_size = tdigest.delta();
39-
out_type = struct_({field("mean", fixed_size_list(float64(), output_size), false),
40-
field("weight", fixed_size_list(float64(), output_size), false),
41-
field("count", uint64(), false), field("min", float64(), true),
42-
field("max", float64(), true)});
39+
out_type = struct_({
40+
field("mean", fixed_size_list(float64(), output_size), false),
41+
field("weight", fixed_size_list(float64(), output_size), false),
42+
field("min", float64(), true),
43+
field("max", float64(), true),
44+
field("count", uint64(), false),
45+
});
4346
}
4447

4548
Status MergeFrom(KernelContext*, KernelState&& src) override {
@@ -160,7 +163,7 @@ struct TDigestCentroidFinalizer : public TDigestBaseImpl {
160163
min = max = MakeNullScalar(float64());
161164
}
162165
*out = std::make_shared<StructScalar>(
163-
std::vector<std::shared_ptr<Scalar>>{mean, weight, count, min, max}, out_type);
166+
std::vector<std::shared_ptr<Scalar>>{mean, weight, min, max, count}, out_type);
164167
}
165168

166169
return Status::OK();
@@ -241,9 +244,9 @@ struct TDigestCentroidConsumerImpl : public TDigestFinalizer_T {
241244
auto weight_array =
242245
checked_cast<const FixedSizeListScalar*>(input_struct_scalar->value[1].get())
243246
->value;
244-
auto count = checked_cast<const UInt64Scalar*>(input_struct_scalar->value[2].get());
245-
auto min = checked_cast<const DoubleScalar*>(input_struct_scalar->value[3].get());
246-
auto max = checked_cast<const DoubleScalar*>(input_struct_scalar->value[4].get());
247+
auto min = checked_cast<const DoubleScalar*>(input_struct_scalar->value[2].get());
248+
auto max = checked_cast<const DoubleScalar*>(input_struct_scalar->value[3].get());
249+
auto count = checked_cast<const UInt64Scalar*>(input_struct_scalar->value[4].get());
247250
auto mean_double_array = checked_cast<const DoubleArray*>(mean_array.get());
248251
auto weight_double_array = checked_cast<const DoubleArray*>(weight_array.get());
249252
DCHECK_EQ(mean_double_array->length(), this->tdigest.delta());
@@ -411,7 +414,7 @@ struct TDigestCentroidTypeMatcher : public TypeMatcher {
411414

412415
static std::string ToStringStatic() {
413416
return "struct{mean:fixed_size_list<item: double>[N], weight:fixed_size_list<item: "
414-
"double>[N], count:int64, min:float64, max:float64}";
417+
"double>[N], min:float64, max:float64, count:int64}";
415418
}
416419
std::string ToString() const override { return ToStringStatic(); }
417420

cpp/src/arrow/compute/kernels/aggregate_test.cc

Lines changed: 30 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4559,8 +4559,8 @@ TEST(TestTDigestMapKernel, Options) {
45594559
auto output_type =
45604560
struct_({field("mean", fixed_size_list(float64(), 5), false),
45614561
field("weight", fixed_size_list(float64(), 5), false),
4562-
field("count", uint64(), false), field("min", float64(), true),
4563-
field("max", float64(), true)});
4562+
field("min", float64(), true), field("max", float64(), true),
4563+
field("count", uint64(), false)});
45644564
TDigestMapOptions keep_nulls(/*delta=*/5, /*buffer_size=*/500,
45654565
/*skip_nulls=*/false,
45664566
/*scaler=*/TDigestMapOptions::Scaler::K0);
@@ -4572,103 +4572,102 @@ TEST(TestTDigestMapKernel, Options) {
45724572
TDigestMap(ArrayFromJSON(input_type, "[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]"), keep_nulls),
45734573
ResultWith(ScalarFromJSON(output_type,
45744574
"{\"mean\":[1.5, 3.5, 5.5, null, null],\"weight\":[2, 2, "
4575-
"2, null, null],\"count\":6,\"min\":1.0,\"max\":6.0}")));
4575+
"2, null, null],\"min\":1.0,\"max\":6.0,\"count\":6}")));
45764576
EXPECT_THAT(
45774577
TDigestMap(ArrayFromJSON(input_type, "[1.0, 2.0, 3.0, 4.0, 5.0]"), keep_nulls),
45784578
ResultWith(ScalarFromJSON(output_type,
45794579
"{\"mean\":[1.5, 3.5, 5.0, null, null],\"weight\":[2, 2, "
4580-
"1, null, null],\"count\":5,\"min\":1.0,\"max\":5.0}")));
4580+
"1, null, null],\"min\":1.0,\"max\":5.0,\"count\":5}")));
45814581
EXPECT_THAT(
45824582
TDigestMap(ArrayFromJSON(input_type, "[1.0, 2.0, 3.0, 4.0]"), keep_nulls),
45834583
ResultWith(ScalarFromJSON(
45844584
output_type,
45854585
"{\"mean\":[1.0, 2.0, 3.0, 4.0, "
4586-
"null],\"weight\":[1,1,1,1,null],\"count\":4,\"min\":1.0,\"max\":4.0}")));
4586+
"null],\"weight\":[1,1,1,1,null],\"min\":1.0,\"max\":4.0,\"count\":4}")));
45874587

45884588
EXPECT_THAT(
45894589
TDigestMap(ArrayFromJSON(input_type, "[1.0, 2.0, 3.0]"), keep_nulls),
45904590
ResultWith(ScalarFromJSON(output_type,
45914591
"{\"mean\":[1.0,2.0,3.0,null,null],\"weight\":[1,"
4592-
"1,1,null,null],\"count\":3,\"min\":1.0,\"max\":3.0}")));
4592+
"1,1,null,null],\"min\":1.0,\"max\":3.0,\"count\":3}")));
45934593
EXPECT_THAT(TDigestMap(ArrayFromJSON(input_type, "[1.0, 2.0, 3.0, null]"), keep_nulls),
45944594
ResultWith(ScalarFromJSON(output_type, "null")));
45954595
EXPECT_THAT(TDigestMap(ScalarFromJSON(input_type, "1.0"), keep_nulls),
45964596
ResultWith(ScalarFromJSON(
45974597
output_type,
45984598
"{\"mean\":[1.0,null,null,null,null],\"weight\":["
4599-
"1,null,null,null,null],\"count\":1,\"min\":1.0,\"max\":1.0}")));
4599+
"1,null,null,null,null],\"min\":1.0,\"max\":1.0,\"count\":1}")));
46004600
EXPECT_THAT(TDigestMap(ScalarFromJSON(input_type, "null"), keep_nulls),
46014601
ResultWith(ScalarFromJSON(output_type, "null")));
46024602

46034603
EXPECT_THAT(
46044604
TDigestMap(ArrayFromJSON(input_type, "[1.0, 2.0, 3.0, null]"), skip_nulls),
46054605
ResultWith(ScalarFromJSON(output_type,
46064606
"{\"mean\":[1.0,2.0,3.0,null,null],\"weight\":[1,"
4607-
"1,1,null,null],\"count\":3,\"min\":1.0,\"max\":3.0}")));
4607+
"1,1,null,null],\"min\":1.0,\"max\":3.0,\"count\":3}")));
46084608
EXPECT_THAT(TDigestMap(ArrayFromJSON(input_type, "[1.0, 2.0, null]"), skip_nulls),
46094609
ResultWith(ScalarFromJSON(
46104610
output_type,
46114611
"{\"mean\":[1.0,2.0,null,null,null],\"weight\":["
4612-
"1,1,null,null,null],\"count\":2,\"min\":1.0,\"max\":2.0}")));
4612+
"1,1,null,null,null],\"min\":1.0,\"max\":2.0,\"count\":2}")));
46134613
EXPECT_THAT(TDigestMap(ScalarFromJSON(input_type, "1.0"), skip_nulls),
46144614
ResultWith(ScalarFromJSON(
46154615
output_type,
46164616
"{\"mean\":[1.0,null,null,null,null],\"weight\":["
4617-
"1,null,null,null,null],\"count\":1,\"min\":1.0,\"max\":1.0}")));
4617+
"1,null,null,null,null],\"min\":1.0,\"max\":1.0,\"count\":1}")));
46184618
EXPECT_THAT(TDigestMap(ScalarFromJSON(input_type, "null"), skip_nulls),
46194619
ResultWith(ScalarFromJSON(
46204620
output_type,
46214621
"{\"mean\":[null,null,null,null,null],\"weight\":"
4622-
"[null,null,null,null,null],\"count\":0,\"min\":null,\"max\":null}")));
4622+
"[null,null,null,null,null],\"min\":null,\"max\":null,\"count\":0}")));
46234623
}
46244624

46254625
TEST(TestTDigestReduceKernel, Basic) {
46264626
auto type = struct_({field("mean", fixed_size_list(float64(), 5), false),
46274627
field("weight", fixed_size_list(float64(), 5), false),
4628-
field("count", uint64(), false), field("min", float64(), true),
4629-
field("max", float64(), true)});
4628+
field("min", float64(), true), field("max", float64(), true),
4629+
field("count", uint64(), false)});
46304630
TDigestReduceOptions options(/*scaler=*/TDigestMapOptions::Scaler::K0);
46314631
EXPECT_THAT(
46324632
TDigestReduce(
46334633
ArrayFromJSON(type,
46344634
"["
46354635
"{\"mean\":[1.5, 3.5, 5.5, null, null],\"weight\":[2, "
4636-
"2, 2, null, null],\"count\":6,\"min\":1.0,\"max\":6.0},"
4636+
"2, 2, null, null],\"min\":1.0,\"max\":6.0,\"count\":6},"
46374637
"{\"mean\":[1.5, 3.5, 5.5, null, null],\"weight\":[2, "
4638-
"2, 2, null, null],\"count\":6,\"min\":1.0,\"max\":6.0}"
4638+
"2, 2, null, null],\"min\":1.0,\"max\":6.0,\"count\":6}"
46394639
"]"),
46404640
options),
46414641
ResultWith(ScalarFromJSON(type,
46424642
"{\"mean\":[1.5, 3.5, 5.5, null, null],\"weight\":[4, 4, "
4643-
"4, null, null],\"count\":12,\"min\":1.0,\"max\":6.0}")));
4643+
"4, null, null],\"min\":1.0,\"max\":6.0,\"count\":12}")));
46444644

46454645
EXPECT_THAT(
46464646
TDigestReduce(
46474647
ScalarFromJSON(type,
46484648
"{\"mean\":[1.5, 3.5, 5.5, null, null],\"weight\":[2, "
4649-
"2, 2, null, null],\"count\":6,\"min\":1.0,\"max\":6.0}"),
4649+
"2, 2, null, null],\"min\":1.0,\"max\":6.0,\"count\":6}"),
46504650
options),
46514651
ResultWith(ScalarFromJSON(type,
46524652
"{\"mean\":[1.5, 3.5, 5.5, null, null],\"weight\":[2, 2, "
4653-
"2, null, null],\"count\":6,\"min\":1.0,\"max\":6.0}")));
4653+
"2, null, null],\"min\":1.0,\"max\":6.0,\"count\":6}")));
46544654
}
46554655

46564656
TEST(TestTDigestQuantileKernel, Basic) {
4657-
auto input_type =
4658-
struct_({field("mean", fixed_size_list(float64(), 5), false),
4659-
field("weight", fixed_size_list(float64(), 5), false),
4660-
field("count", uint64(), false), field("min", float64(), true),
4661-
field("max", float64(), true)});
4657+
auto input_type = struct_({field("mean", fixed_size_list(float64(), 5), false),
4658+
field("weight", fixed_size_list(float64(), 5), false),
4659+
field("min", float64(), true), field("max", float64(), true),
4660+
field("count", uint64(), false)});
46624661

46634662
auto output_type = float64();
46644663

46654664
auto input_array =
46664665
ArrayFromJSON(input_type,
46674666
"["
46684667
"{\"mean\":[1.5, 3.5, 5.5, null, null],\"weight\":[2, "
4669-
"2, 2, null, null],\"count\":6,\"min\":1.0,\"max\":6.0},"
4668+
"2, 2, null, null],\"min\":1.0,\"max\":6.0,\"count\":6},"
46704669
"{\"mean\":[1.5, 3.5, 5.5, null, null],\"weight\":[2, "
4671-
"2, 2, null, null],\"count\":6,\"min\":1.0,\"max\":6.0}"
4670+
"2, 2, null, null],\"min\":1.0,\"max\":6.0,\"count\":6}"
46724671
"]");
46734672

46744673
TDigestQuantileOptions multiple(/*q=*/{0.1, 0.5, 0.9}, /*min_count=*/12);
@@ -4681,21 +4680,20 @@ TEST(TestTDigestQuantileKernel, Basic) {
46814680
}
46824681

46834682
TEST(TestTDigestMapReduceQuantileKernel, Basic) {
4684-
auto input_type =
4685-
struct_({field("mean", fixed_size_list(float64(), 5), false),
4686-
field("weight", fixed_size_list(float64(), 5), false),
4687-
field("count", uint64(), false), field("min", float64(), true),
4688-
field("max", float64(), true)});
4683+
auto input_type = struct_({field("mean", fixed_size_list(float64(), 5), false),
4684+
field("weight", fixed_size_list(float64(), 5), false),
4685+
field("min", float64(), true), field("max", float64(), true),
4686+
field("count", uint64(), false)});
46894687

46904688
auto output_type = float64();
46914689

46924690
auto input_array =
46934691
ArrayFromJSON(input_type,
46944692
"["
46954693
"{\"mean\":[1.5, 3.5, 5.5, null, null],\"weight\":[2, "
4696-
"2, 2, null, null],\"count\":6,\"min\":1.0,\"max\":6.0},"
4694+
"2, 2, null, null],\"min\":1.0,\"max\":6.0,\"count\":6},"
46974695
"{\"mean\":[1.5, 3.5, 5.5, null, null],\"weight\":[2, "
4698-
"2, 2, null, null],\"count\":6,\"min\":1.0,\"max\":6.0}"
4696+
"2, 2, null, null],\"min\":1.0,\"max\":6.0,\"count\":6}"
46994697
"]");
47004698

47014699
TDigestQuantileOptions multiple(/*q=*/{0.1, 0.5, 0.9}, /*min_count=*/12);

0 commit comments

Comments
 (0)