@@ -956,30 +956,45 @@ export function emdashLoader(): LiveLoader<EntryData, EntryFilter, CollectionFil
956956 ) } `
957957 : sql `` ;
958958
959- // Subtree filters use a *pivot-driven* plan rather than a correlated
960- // EXISTS. Each filter contributes a recursive `sub_n` CTE (the
961- // translation-group subtree walk) and a `matched_n` CTE of DISTINCT
962- // entry_ids tagged anywhere under that subtree, then JOINs `matched_n`
963- // to the collection. This resolves matching entries from
964- // content_taxonomies first — driven by idx_content_taxonomies_term,
965- // reading only the taggings under the subtree — instead of walking the
966- // collection in sort order and probing per candidate. The EXISTS plan
967- // was entry-driven and degraded toward O(table) reads when the selected
968- // subtree is sparse against the recency sort, which is exactly the
969- // faceted-browse case this operator exists for.
959+ // Subtree filters use a *pivot-driven* plan. Each filter contributes a
960+ // recursive `sub_n` CTE (the translation-group subtree walk) and a
961+ // `matched_n` CTE of DISTINCT entry_ids tagged anywhere under that
962+ // subtree. Matching entries are resolved from content_taxonomies first
963+ // (driven by idx_content_taxonomies_term, reading only the taggings
964+ // under the subtree), then the collection is probed by primary key.
965+ //
966+ // The join order is PINNED, not left to the planner. On SQLite/D1 the
967+ // only join strategy is a nested loop and a CTE cannot carry an index,
968+ // so if the planner is free to drive the outer loop from the
969+ // collection's recency index (to early-exit an `ORDER BY <recency>
970+ // LIMIT n`) it full-scans the unindexed `matched_n` once per candidate
971+ // row — O(rows_walked × matched_set). For a selection that is sparse
972+ // against the recency sort (the faceted-browse case this operator
973+ // exists for) rows_walked approaches the whole table: a production D1
974+ // trace showed this read ~3.4M rows / 4s where the old correlated
975+ // EXISTS read ~44k / 68ms. Leading the FROM with
976+ // `matched_0 CROSS JOIN <collection>` forces SQLite to drive from the
977+ // small matched set and probe the collection by id — it never reorders
978+ // across a CROSS JOIN. The tradeoff: a near-root selection covering most
979+ // rows now sorts the (large) matched set instead of early-exiting the
980+ // recency index, but that is a bounded O(n log n) and far rarer than the
981+ // sparse case the free plan made pathological.
982+ //
983+ // Postgres has a cost-based optimizer with hash joins, so it never
984+ // incurs the per-row scan and keeps a plain JOIN (and `CROSS JOIN ... ON`
985+ // is not valid Postgres syntax anyway).
970986 //
971987 // Multiple subtree taxonomies AND together as successive JOINs (set
972988 // intersection). `matched_n` is DISTINCT on entry_id (a text id) so each
973989 // JOIN is 1:1 — no row fan-out, no SELECT DISTINCT, so the Postgres
974990 // json-column DISTINCT hazard (which is why sibling filters use EXISTS)
975- // never arises here. Always pivot-driven: a near-root selection covering
976- // most rows is mildly suboptimal versus the old early-exit, but bounded,
977- // and rare next to the sparse case the EXISTS plan made pathological.
991+ // never arises here.
978992 const subtreeCteParts : ReturnType < typeof sql > [ ] = [ ] ;
979- const subtreeJoinParts : ReturnType < typeof sql > [ ] = [ ] ;
993+ const matchedAliases : ReturnType < typeof sql . ref > [ ] = [ ] ;
980994 subtreeFilters . forEach ( ( f , i ) => {
981995 const subAlias = sql . ref ( `sub_${ i } ` ) ;
982996 const matchedAlias = sql . ref ( `matched_${ i } ` ) ;
997+ matchedAliases . push ( matchedAlias ) ;
983998 subtreeCteParts . push (
984999 sql `${ subAlias } (grp) AS (${ subtreeGroupsCteBody ( f . name , f . roots , subAlias ) } )` ,
9851000 ) ;
@@ -990,9 +1005,6 @@ export function emdashLoader(): LiveLoader<EntryData, EntryFilter, CollectionFil
9901005 AND taxonomy_id IN (SELECT grp FROM ${ subAlias } )
9911006 )` ,
9921007 ) ;
993- subtreeJoinParts . push (
994- sql `JOIN ${ matchedAlias } ON ${ matchedAlias } .entry_id = ${ sql . ref ( tableName ) } .id` ,
995- ) ;
9961008 } ) ;
9971009 // `WITH RECURSIVE` covers the whole list; the non-recursive `matched_n`
9981010 // CTEs are permitted alongside the recursive `sub_n` ones on both
@@ -1002,8 +1014,27 @@ export function emdashLoader(): LiveLoader<EntryData, EntryFilter, CollectionFil
10021014 subtreeCteParts . length > 0
10031015 ? sql `WITH RECURSIVE ${ sql . join ( subtreeCteParts , sql `, ` ) } `
10041016 : sql `` ;
1005- const subtreeJoins =
1006- subtreeJoinParts . length > 0 ? sql `${ sql . join ( subtreeJoinParts , sql ` ` ) } ` : sql `` ;
1017+
1018+ // FROM clause. Without subtree filters it is the bare collection table.
1019+ // With them, SQLite leads with the first matched set and CROSS JOINs the
1020+ // collection to pin the matched-driven order (see above); any further
1021+ // subtree filters join by id. Postgres leaves ordering to its optimizer.
1022+ const tableRef = sql . ref ( tableName ) ;
1023+ let fromClause : ReturnType < typeof sql > ;
1024+ if ( matchedAliases . length === 0 ) {
1025+ fromClause = tableRef ;
1026+ } else if ( isPostgres ( db ) ) {
1027+ const joins = matchedAliases . map (
1028+ ( m ) => sql `JOIN ${ m } ON ${ m } .entry_id = ${ tableRef } .id` ,
1029+ ) ;
1030+ fromClause = sql `${ tableRef } ${ sql . join ( joins , sql ` ` ) } ` ;
1031+ } else {
1032+ const [ first , ...rest ] = matchedAliases ;
1033+ const restJoins = rest . map ( ( m ) => sql `JOIN ${ m } ON ${ m } .entry_id = ${ tableRef } .id` ) ;
1034+ fromClause = sql `${ first } CROSS JOIN ${ tableRef } ON ${ first } .entry_id = ${ tableRef } .id${
1035+ rest . length > 0 ? sql ` ${ sql . join ( restJoins , sql ` ` ) } ` : sql ``
1036+ } `;
1037+ }
10071038
10081039 // `_emdash_content_bylines.byline_id` stores the byline's
10091040 // translation_group (migration 040), so a credit spans every
@@ -1038,8 +1069,7 @@ export function emdashLoader(): LiveLoader<EntryData, EntryFilter, CollectionFil
10381069 : sql `LIMIT -1 OFFSET ${ offset } ` ;
10391070 }
10401071 result = await sql < Record < string , unknown > > `
1041- ${ withClause } SELECT ${ sql . ref ( tableName ) } .*, ${ termsSelect } , ${ bylinesSelect } FROM ${ sql . ref ( tableName ) }
1042- ${ subtreeJoins }
1072+ ${ withClause } SELECT ${ tableRef } .*, ${ termsSelect } , ${ bylinesSelect } FROM ${ fromClause }
10431073 WHERE deleted_at IS NULL
10441074 AND ${ statusCondition }
10451075 ${ localeFilter }
0 commit comments