Skip to content

Commit 7e6aa10

Browse files
committed
Add gmv aggregate for ctx2
Signed-off-by: Andrew Stein <steinlink@gmail.com>
1 parent 538ce71 commit 7e6aa10

5 files changed

Lines changed: 172 additions & 13 deletions

File tree

.github/workflows/build.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ on:
2828
- docs/
2929
- examples/
3030
- rust/perspective-python/README.md
31-
pull_request_target:
31+
pull_request:
3232
branches:
3333
- master
3434
workflow_dispatch:

rust/perspective-python/perspective/tests/table/test_view.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,39 @@ def test_view_aggregate_gmv(self):
536536
"MV": [10000, 5900, -2000, -3200, 700, 7100, 800, -2400, -3900],
537537
}
538538

539+
def test_view_aggregate_gmv_split_by(self):
540+
data = {
541+
"division": [
542+
"D1", "D2", "D1", "D2", "D1", "D2", "D1", "D2", "D1", "D2",
543+
"D1", "D2", "D1", "D2", "D1", "D2", "D1", "D2", "D1", "D2",
544+
],
545+
"symbol": [
546+
"AAPL", "GOOG", "MSFT", "AAPL", "GOOG", "MSFT", "AAPL",
547+
"GOOG", "MSFT", "AAPL", "GOOG", "MSFT", "AAPL", "GOOG",
548+
"MSFT", "AAPL", "GOOG", "MSFT", "AAPL", "GOOG",
549+
],
550+
"MV": [
551+
1500, 1200, 1300, 1400, 1600, 1100, 1700, 1800, 1900, 2000,
552+
-2100, -2200, -2300, -2400, -2500, -2600, -2700, -2800,
553+
-2900, -3000,
554+
],
555+
}
556+
557+
tbl = Table(data)
558+
view = tbl.view(
559+
aggregates={"MV": "gmv"},
560+
group_by=["division"],
561+
split_by=["symbol"],
562+
columns=["MV"],
563+
)
564+
565+
assert view.to_columns() == {
566+
"__ROW_PATH__": [[], ["D1"], ["D2"]],
567+
"AAPL|MV": [2800, -2000, 800],
568+
"GOOG|MV": [5600, -3200, -2400],
569+
"MSFT|MV": [4600, 700, -3900],
570+
}
571+
539572
def test_view_aggregate_mean_from_schema(self):
540573
data = [
541574
{"a": "a", "x": 1, "y": 200},

rust/perspective-server/cpp/perspective/src/cpp/context_two.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,19 @@ t_ctx2::init() {
7777
);
7878

7979
m_trees[treeidx]->init();
80+
81+
// Tell the tree how many of its `m_pivots` are row pivots and
82+
// what the next missing row pivot is (if any). AGGTYPE_GMV uses
83+
// this to roll up across row dimensions that aren't embedded in
84+
// this particular tree — e.g. m_trees[0] has no row pivots, so
85+
// a tree-leaf at e.g. [AAPL] needs to be partitioned by
86+
// `row_pivots[0]` from the gstate to compute the gmv value.
87+
const auto& row_pivots = m_config.get_row_pivots();
88+
std::string next_row_pivot;
89+
if (treeidx < row_pivots.size()) {
90+
next_row_pivot = row_pivots[treeidx].colname();
91+
}
92+
m_trees[treeidx]->set_gmv_row_pivot_meta(treeidx, next_row_pivot);
8093
}
8194

8295
m_rtraversal = std::make_shared<t_traversal>(rtree());
@@ -1171,6 +1184,15 @@ t_ctx2::reset(bool reset_expressions) {
11711184
);
11721185
m_trees[treeidx]->init();
11731186
m_trees[treeidx]->set_deltas_enabled(get_feature_state(CTX_FEAT_DELTA));
1187+
1188+
// See [t_ctx2::init] — gmv needs to know how many row pivots
1189+
// this particular tree carries vs. what's still in the gstate.
1190+
const auto& row_pivots = m_config.get_row_pivots();
1191+
std::string next_row_pivot;
1192+
if (treeidx < row_pivots.size()) {
1193+
next_row_pivot = row_pivots[treeidx].colname();
1194+
}
1195+
m_trees[treeidx]->set_gmv_row_pivot_meta(treeidx, next_row_pivot);
11741196
}
11751197

11761198
m_rtraversal = std::make_shared<t_traversal>(rtree());

rust/perspective-server/cpp/perspective/src/cpp/sparse_tree.cpp

Lines changed: 97 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include <algorithm>
1515
#include <cmath>
1616
#include <fstream>
17+
#include <unordered_map>
1718
#include <perspective/base.h>
1819
#include <perspective/compat.h>
1920
#include <perspective/extract_aggregate.h>
@@ -86,11 +87,31 @@ t_stree::t_stree(
8687
m_aggspecs(aggspecs),
8788
m_schema(std::move(schema)),
8889
m_cur_aggidx(1),
89-
m_has_delta(false) {
90+
m_has_delta(false),
91+
m_num_row_pivots_in_tree(pivots.size()) {
9092
const auto& g_agg_str = cfg.get_grand_agg_str();
9193
m_grand_agg_str = g_agg_str.empty() ? "Grand Aggregate" : g_agg_str;
9294
}
9395

96+
void
97+
t_stree::set_gmv_row_pivot_meta(
98+
t_uindex num_row_pivots_in_tree,
99+
const std::string& next_row_pivot_name
100+
) {
101+
m_num_row_pivots_in_tree = num_row_pivots_in_tree;
102+
m_next_row_pivot_name = next_row_pivot_name;
103+
}
104+
105+
t_uindex
106+
t_stree::get_num_row_pivots_in_tree() const {
107+
return m_num_row_pivots_in_tree;
108+
}
109+
110+
const std::string&
111+
t_stree::get_next_row_pivot_name() const {
112+
return m_next_row_pivot_name;
113+
}
114+
94115
t_stree::~t_stree() {
95116
for (auto& iter : m_smap) {
96117
free(const_cast<char*>(iter.first));
@@ -1816,17 +1837,24 @@ t_stree::update_agg_table(
18161837
dst->set_scalar(dst_ridx, new_value);
18171838
} break;
18181839
case AGGTYPE_GMV: {
1819-
// Leaf nodes: plain `sum` of the underlying rows.
1820-
// Non-leaf nodes: sum over immediate children of
1821-
// `abs(sum(child_subtree))`. The child sums are recomputed
1822-
// from the gstate rather than read from already-populated
1823-
// child rows, so the parent does not see the abs-rolled-up
1824-
// child value — it sees the child's raw signed sum.
1840+
// The user-facing rule is "leaf = sum, parent = sum over
1841+
// immediate row children of |raw sum of child subtree|".
1842+
// Under split_by this is complicated by t_ctx2 sharding
1843+
// the data across several t_stree instances — see
1844+
// [t_stree::set_gmv_row_pivot_meta] for the metadata that
1845+
// distinguishes "the next pivot in this tree is a row
1846+
// pivot" from "the next missing row pivot lives in the
1847+
// gstate".
18251848
old_value.set(dst->get_scalar(dst_ridx));
18261849
const auto& col_name = spec.get_dependencies()[0].name();
1827-
auto dst_dtype = dst->get_dtype();
1828-
auto sum_reducer = [dst_dtype](std::vector<t_tscalar>& values
1829-
) -> t_tscalar {
1850+
const auto dst_dtype = dst->get_dtype();
1851+
const auto k_tree = m_num_row_pivots_in_tree;
1852+
const auto& next_row_pivot = m_next_row_pivot_name;
1853+
const bool has_missing_row_pivot = !next_row_pivot.empty();
1854+
const auto depth = get_depth(nidx);
1855+
1856+
auto sum_reducer =
1857+
[dst_dtype](std::vector<t_tscalar>& values) -> t_tscalar {
18301858
if (values.empty()) {
18311859
return mknone();
18321860
}
@@ -1842,7 +1870,14 @@ t_stree::update_agg_table(
18421870
return v;
18431871
};
18441872

1845-
if (is_leaf(nidx)) {
1873+
// row_path_len = min(depth, k_tree). A node is a "row-leaf"
1874+
// iff there are no row pivots missing from this tree AND
1875+
// all row pivots are consumed at this node's depth — i.e.
1876+
// depth >= k_tree.
1877+
const bool is_row_leaf =
1878+
!has_missing_row_pivot && depth >= k_tree;
1879+
1880+
if (is_row_leaf) {
18461881
auto pkeys = get_pkeys(nidx);
18471882
new_value.set(
18481883
reduce_from_gstate<std::function<
@@ -1854,7 +1889,10 @@ t_stree::update_agg_table(
18541889
sum_reducer
18551890
)
18561891
);
1857-
} else {
1892+
} else if (depth < k_tree) {
1893+
// Row-parent and this tree's next pivot is the next
1894+
// row pivot. Tree children at depth+1 are the
1895+
// immediate row children — use them directly.
18581896
t_tscalar rval;
18591897
rval.set(std::uint64_t(0));
18601898
rval.m_type = dst_dtype;
@@ -1873,6 +1911,53 @@ t_stree::update_agg_table(
18731911
}
18741912
}
18751913
new_value.set(rval);
1914+
} else {
1915+
// Row-parent but this tree's pivots beyond `depth`
1916+
// (if any) are column pivots, so the immediate row
1917+
// children aren't tree children. Read the missing
1918+
// row pivot column for this node's pkeys, partition
1919+
// by it, sum each partition, sum |partition_sum|.
1920+
auto pkeys = get_pkeys(nidx);
1921+
std::vector<t_tscalar> pivot_vals;
1922+
std::vector<t_tscalar> data_vals;
1923+
read_column_from_gstate(
1924+
gstate,
1925+
expression_master_table,
1926+
next_row_pivot,
1927+
pkeys,
1928+
pivot_vals
1929+
);
1930+
read_column_from_gstate(
1931+
gstate,
1932+
expression_master_table,
1933+
col_name,
1934+
pkeys,
1935+
data_vals
1936+
);
1937+
1938+
std::unordered_map<t_tscalar, t_tscalar> partials;
1939+
for (std::size_t i = 0;
1940+
i < pivot_vals.size() && i < data_vals.size();
1941+
++i) {
1942+
const auto& x = data_vals[i];
1943+
if (!x.is_valid() || x.is_nan()) {
1944+
continue;
1945+
}
1946+
auto& acc = partials[pivot_vals[i]];
1947+
if (!acc.is_valid()) {
1948+
acc.set(std::uint64_t(0));
1949+
acc.m_type = dst_dtype;
1950+
}
1951+
acc = acc.add(x.coerce_numeric_dtype(dst_dtype));
1952+
}
1953+
1954+
t_tscalar rval;
1955+
rval.set(std::uint64_t(0));
1956+
rval.m_type = dst_dtype;
1957+
for (const auto& kv : partials) {
1958+
rval = rval.add(kv.second.abs());
1959+
}
1960+
new_value.set(rval);
18761961
}
18771962
dst->set_scalar(dst_ridx, new_value);
18781963
} break;

rust/perspective-server/cpp/perspective/src/include/perspective/sparse_tree.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -468,6 +468,25 @@ class PERSPECTIVE_EXPORT t_stree {
468468
t_symtable m_symtable;
469469
bool m_has_delta;
470470
std::string m_grand_agg_str;
471+
472+
// Used by AGGTYPE_GMV under split_by. For t_ctx1 (group_by only) the
473+
// tree's pivots are exactly the row pivots, so the default
474+
// `m_num_row_pivots_in_tree == m_pivots.size()` makes every tree-leaf
475+
// a "real" leaf and the next-row-pivot lookup unused. For t_ctx2 each
476+
// m_trees[k] is initialized with the row+col split via
477+
// `set_gmv_row_pivot_meta(k, next_row_pivot_name)`, so the gmv kernel
478+
// can detect a "tree-leaf but row-parent" node and roll up across the
479+
// missing row pivot dimension via the gstate.
480+
t_uindex m_num_row_pivots_in_tree;
481+
std::string m_next_row_pivot_name;
482+
483+
public:
484+
void set_gmv_row_pivot_meta(
485+
t_uindex num_row_pivots_in_tree,
486+
const std::string& next_row_pivot_name
487+
);
488+
t_uindex get_num_row_pivots_in_tree() const;
489+
const std::string& get_next_row_pivot_name() const;
471490
};
472491

473492
} // end namespace perspective

0 commit comments

Comments
 (0)