Skip to content

Commit a199e5d

Browse files
committed
ctx0 performance improvements
Signed-off-by: Andrew Stein <steinlink@gmail.com>
1 parent febaf25 commit a199e5d

19 files changed

Lines changed: 457 additions & 74 deletions

rust/perspective-server/cpp/perspective/src/cpp/context_grouped_pkey.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,9 @@ t_ctx_grouped_pkey::pprint() const {
676676
}
677677

678678
void
679-
t_ctx_grouped_pkey::notify(const t_data_table& flattened) {
679+
t_ctx_grouped_pkey::notify(
680+
const t_data_table& flattened, bool /* is_registration */
681+
) {
680682
PSP_TRACE_SENTINEL();
681683
PSP_VERBOSE_ASSERT(m_init, "touching uninited object");
682684

rust/perspective-server/cpp/perspective/src/cpp/context_one.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ t_ctx1::get_data(const std::vector<t_uindex>& rows) const {
276276
}
277277

278278
void
279-
t_ctx1::notify(const t_data_table& flattened) {
279+
t_ctx1::notify(const t_data_table& flattened, bool /* is_registration */) {
280280
PSP_TRACE_SENTINEL();
281281
PSP_VERBOSE_ASSERT(m_init, "touching uninited object");
282282

rust/perspective-server/cpp/perspective/src/cpp/context_two.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ t_ctx2::reset_sortby() {
509509
}
510510

511511
void
512-
t_ctx2::notify(const t_data_table& flattened) {
512+
t_ctx2::notify(const t_data_table& flattened, bool /* is_registration */) {
513513
for (t_uindex tree_idx = 0, loop_end = m_trees.size(); tree_idx < loop_end;
514514
++tree_idx) {
515515
if (is_rtree_idx(tree_idx) != 0U) {

rust/perspective-server/cpp/perspective/src/cpp/context_unit.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,14 +111,21 @@ t_ctxunit::notify(
111111
* @param flattened
112112
*/
113113
void
114-
t_ctxunit::notify(const t_data_table& flattened) {
114+
t_ctxunit::notify(const t_data_table& flattened, bool is_registration) {
115115
t_uindex nrecs = flattened.size();
116116
std::shared_ptr<const t_column> pkey_sptr =
117117
flattened.get_const_column("psp_pkey");
118118
const t_column* pkey_col = pkey_sptr.get();
119119

120120
m_has_delta = true;
121121

122+
// During `_register_context`, no subscriber exists yet — skip the
123+
// hopscotch_set insertions no observer can see. A unit context has no
124+
// traversal/sort state to build either, so this is effectively O(1).
125+
if (is_registration) {
126+
return;
127+
}
128+
122129
// TODO: pkey and idx are equal, except idx is not a t_tscalar. I don't
123130
// think there is a difference between accessing the pkey column and
124131
// creating a brand new scalar, as get_scalar always returns a copy. We

rust/perspective-server/cpp/perspective/src/cpp/context_zero.cpp

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ t_ctx0::notify(
184184
* @param flattened
185185
*/
186186
void
187-
t_ctx0::notify(const t_data_table& flattened) {
187+
t_ctx0::notify(const t_data_table& flattened, bool is_registration) {
188188
t_uindex nrecs = flattened.size();
189189
std::shared_ptr<const t_column> pkey_sptr =
190190
flattened.get_const_column("psp_pkey");
@@ -195,6 +195,40 @@ t_ctx0::notify(const t_data_table& flattened) {
195195

196196
m_has_delta = true;
197197

198+
// During `_register_context`, no subscriber exists yet to observe the
199+
// delta set produced here — skip populating `m_delta_pkeys` so we don't
200+
// allocate one hopscotch_set entry per row. The first real update goes
201+
// through the 6-arg `notify` which tracks deltas normally.
202+
const bool track_deltas = !is_registration;
203+
204+
// Fast path: unsorted, unfiltered, empty traversal (i.e. initial
205+
// registration of a pass-through ctx0). Skip the `m_new_elems`
206+
// hopscotch_map round-trip and the subsequent `step_end` rebuild of
207+
// `m_index`; append pkeys directly into `m_index`, then finalize
208+
// (pkey-sort + `m_pkeyidx` population). This matches the row order
209+
// the existing `add_row`/`step_end` path produces for an empty sort.
210+
const bool can_bulk_load = !m_config.has_filters()
211+
&& m_traversal->empty_sort_by() && m_traversal->size() == 0;
212+
if (can_bulk_load) {
213+
m_traversal->bulk_load_reserve(nrecs);
214+
if (track_deltas) {
215+
m_delta_pkeys.reserve(nrecs);
216+
}
217+
for (t_uindex idx = 0; idx < nrecs; ++idx) {
218+
t_tscalar pkey =
219+
m_symtable.get_interned_tscalar(pkey_col->get_scalar(idx));
220+
std::uint8_t op_ = *(op_col->get_nth<std::uint8_t>(idx));
221+
if (static_cast<t_op>(op_) == OP_INSERT) {
222+
m_traversal->bulk_load_append(pkey);
223+
}
224+
if (track_deltas) {
225+
add_delta_pkey(pkey);
226+
}
227+
}
228+
m_traversal->bulk_load_finalize();
229+
return;
230+
}
231+
198232
if (m_config.has_filters()) {
199233
t_mask msk = filter_table_for_config(flattened, m_config);
200234

@@ -219,8 +253,9 @@ t_ctx0::notify(const t_data_table& flattened) {
219253
break;
220254
}
221255

222-
// Add primary key to track row delta
223-
add_delta_pkey(pkey);
256+
if (track_deltas) {
257+
add_delta_pkey(pkey);
258+
}
224259
}
225260

226261
return;
@@ -242,8 +277,9 @@ t_ctx0::notify(const t_data_table& flattened) {
242277
break;
243278
}
244279

245-
// Add primary key to track row delta
246-
add_delta_pkey(pkey);
280+
if (track_deltas) {
281+
add_delta_pkey(pkey);
282+
}
247283
}
248284
}
249285

rust/perspective-server/cpp/perspective/src/cpp/flat_traversal.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
#include <perspective/scalar.h>
1818
#include <perspective/schema.h>
1919

20+
#include <algorithm>
21+
2022
namespace perspective {
2123

2224
t_ftrav::t_ftrav() : m_step_deletes(0), m_step_inserts(0) {
@@ -279,6 +281,14 @@ t_ftrav::step_begin() {
279281

280282
void
281283
t_ftrav::step_end() {
284+
// Fast path: if no incremental work happened this step, `m_index` and
285+
// `m_pkeyidx` are already in their final shape (either unchanged, or
286+
// populated directly via `bulk_load_append`). Skip the O(N) rebuild.
287+
if (m_step_inserts == 0 && m_step_deletes == 0) {
288+
m_new_elems.clear();
289+
return;
290+
}
291+
282292
// The new number of rows in this traversal
283293
t_index new_size = m_index->size() + m_step_inserts - m_step_deletes;
284294

@@ -447,4 +457,42 @@ t_ftrav::get_from_gstate(
447457
return gstate.get(*master_table, colname, pkey);
448458
}
449459

460+
void
461+
t_ftrav::bulk_load_reserve(t_uindex n) {
462+
m_index->reserve(n);
463+
m_pkeyidx.reserve(n);
464+
}
465+
466+
void
467+
t_ftrav::bulk_load_append(t_tscalar pkey) {
468+
// Pre-condition: `empty_sort_by()` and the caller has ownership of
469+
// step framing (i.e. we're inside a `step_begin`/`step_end` pair for
470+
// an initial registration). `m_step_inserts` is intentionally NOT
471+
// incremented so that `step_end` takes its short-circuit path.
472+
// `m_pkeyidx` is populated in `bulk_load_finalize` after sorting,
473+
// not here — the pkey-to-index mapping only has meaning once the
474+
// final sort order is determined.
475+
m_index->emplace_back();
476+
m_index->back().m_pkey = pkey;
477+
}
478+
479+
void
480+
t_ftrav::bulk_load_finalize() {
481+
// Match the order the existing `add_row`/`step_end` path would
482+
// produce for an empty sort spec: `cmp_mselem` with zero sort
483+
// columns falls through to `a.m_pkey < b.m_pkey`, so sort
484+
// `m_index` by pkey and then rebuild `m_pkeyidx` against the final
485+
// row positions.
486+
std::sort(
487+
m_index->begin(),
488+
m_index->end(),
489+
[](const t_mselem& a, const t_mselem& b) {
490+
return a.m_pkey < b.m_pkey;
491+
}
492+
);
493+
for (t_uindex i = 0, loop_end = m_index->size(); i < loop_end; ++i) {
494+
m_pkeyidx[(*m_index)[i].m_pkey] = i;
495+
}
496+
}
497+
450498
} // end namespace perspective

0 commit comments

Comments
 (0)