1414#include < algorithm>
1515#include < cmath>
1616#include < fstream>
17+ #include < unordered_map>
1718#include < perspective/base.h>
1819#include < perspective/compat.h>
1920#include < perspective/extract_aggregate.h>
@@ -86,11 +87,31 @@ t_stree::t_stree(
8687 m_aggspecs (aggspecs),
8788 m_schema (std::move(schema)),
8889 m_cur_aggidx (1 ),
89- m_has_delta (false ) {
90+ m_has_delta (false ),
91+ m_num_row_pivots_in_tree (pivots.size()) {
9092 const auto & g_agg_str = cfg.get_grand_agg_str ();
9193 m_grand_agg_str = g_agg_str.empty () ? " Grand Aggregate" : g_agg_str;
9294}
9395
96+ void
97+ t_stree::set_gmv_row_pivot_meta (
98+ t_uindex num_row_pivots_in_tree,
99+ const std::string& next_row_pivot_name
100+ ) {
101+ m_num_row_pivots_in_tree = num_row_pivots_in_tree;
102+ m_next_row_pivot_name = next_row_pivot_name;
103+ }
104+
105+ t_uindex
106+ t_stree::get_num_row_pivots_in_tree () const {
107+ return m_num_row_pivots_in_tree;
108+ }
109+
110+ const std::string&
111+ t_stree::get_next_row_pivot_name () const {
112+ return m_next_row_pivot_name;
113+ }
114+
94115t_stree::~t_stree () {
95116 for (auto & iter : m_smap) {
96117 free (const_cast <char *>(iter.first ));
@@ -1816,17 +1837,24 @@ t_stree::update_agg_table(
18161837 dst->set_scalar (dst_ridx, new_value);
18171838 } break ;
18181839 case AGGTYPE_GMV: {
1819- // Leaf nodes: plain `sum` of the underlying rows.
1820- // Non-leaf nodes: sum over immediate children of
1821- // `abs(sum(child_subtree))`. The child sums are recomputed
1822- // from the gstate rather than read from already-populated
1823- // child rows, so the parent does not see the abs-rolled-up
1824- // child value — it sees the child's raw signed sum.
1840+ // The user-facing rule is "leaf = sum, parent = sum over
1841+ // immediate row children of |raw sum of child subtree|".
1842+ // Under split_by this is complicated by t_ctx2 sharding
1843+ // the data across several t_stree instances — see
1844+ // [t_stree::set_gmv_row_pivot_meta] for the metadata that
1845+ // distinguishes "the next pivot in this tree is a row
1846+ // pivot" from "the next missing row pivot lives in the
1847+ // gstate".
18251848 old_value.set (dst->get_scalar (dst_ridx));
18261849 const auto & col_name = spec.get_dependencies ()[0 ].name ();
1827- auto dst_dtype = dst->get_dtype ();
1828- auto sum_reducer = [dst_dtype](std::vector<t_tscalar>& values
1829- ) -> t_tscalar {
1850+ const auto dst_dtype = dst->get_dtype ();
1851+ const auto k_tree = m_num_row_pivots_in_tree;
1852+ const auto & next_row_pivot = m_next_row_pivot_name;
1853+ const bool has_missing_row_pivot = !next_row_pivot.empty ();
1854+ const auto depth = get_depth (nidx);
1855+
1856+ auto sum_reducer =
1857+ [dst_dtype](std::vector<t_tscalar>& values) -> t_tscalar {
18301858 if (values.empty ()) {
18311859 return mknone ();
18321860 }
@@ -1842,7 +1870,14 @@ t_stree::update_agg_table(
18421870 return v;
18431871 };
18441872
1845- if (is_leaf (nidx)) {
1873+ // row_path_len = min(depth, k_tree). A node is a "row-leaf"
1874+ // iff there are no row pivots missing from this tree AND
1875+ // all row pivots are consumed at this node's depth — i.e.
1876+ // depth >= k_tree.
1877+ const bool is_row_leaf =
1878+ !has_missing_row_pivot && depth >= k_tree;
1879+
1880+ if (is_row_leaf) {
18461881 auto pkeys = get_pkeys (nidx);
18471882 new_value.set (
18481883 reduce_from_gstate<std::function<
@@ -1854,7 +1889,10 @@ t_stree::update_agg_table(
18541889 sum_reducer
18551890 )
18561891 );
1857- } else {
1892+ } else if (depth < k_tree) {
1893+ // Row-parent and this tree's next pivot is the next
1894+ // row pivot. Tree children at depth+1 are the
1895+ // immediate row children — use them directly.
18581896 t_tscalar rval;
18591897 rval.set (std::uint64_t (0 ));
18601898 rval.m_type = dst_dtype;
@@ -1873,6 +1911,53 @@ t_stree::update_agg_table(
18731911 }
18741912 }
18751913 new_value.set (rval);
1914+ } else {
1915+ // Row-parent but this tree's pivots beyond `depth`
1916+ // (if any) are column pivots, so the immediate row
1917+ // children aren't tree children. Read the missing
1918+ // row pivot column for this node's pkeys, partition
1919+ // by it, sum each partition, sum |partition_sum|.
1920+ auto pkeys = get_pkeys (nidx);
1921+ std::vector<t_tscalar> pivot_vals;
1922+ std::vector<t_tscalar> data_vals;
1923+ read_column_from_gstate (
1924+ gstate,
1925+ expression_master_table,
1926+ next_row_pivot,
1927+ pkeys,
1928+ pivot_vals
1929+ );
1930+ read_column_from_gstate (
1931+ gstate,
1932+ expression_master_table,
1933+ col_name,
1934+ pkeys,
1935+ data_vals
1936+ );
1937+
1938+ std::unordered_map<t_tscalar, t_tscalar> partials;
1939+ for (std::size_t i = 0 ;
1940+ i < pivot_vals.size () && i < data_vals.size ();
1941+ ++i) {
1942+ const auto & x = data_vals[i];
1943+ if (!x.is_valid () || x.is_nan ()) {
1944+ continue ;
1945+ }
1946+ auto & acc = partials[pivot_vals[i]];
1947+ if (!acc.is_valid ()) {
1948+ acc.set (std::uint64_t (0 ));
1949+ acc.m_type = dst_dtype;
1950+ }
1951+ acc = acc.add (x.coerce_numeric_dtype (dst_dtype));
1952+ }
1953+
1954+ t_tscalar rval;
1955+ rval.set (std::uint64_t (0 ));
1956+ rval.m_type = dst_dtype;
1957+ for (const auto & kv : partials) {
1958+ rval = rval.add (kv.second .abs ());
1959+ }
1960+ new_value.set (rval);
18761961 }
18771962 dst->set_scalar (dst_ridx, new_value);
18781963 } break ;
0 commit comments