From c31db931379ee73ed7d7eb15b177542671540c7d Mon Sep 17 00:00:00 2001 From: Brian Donovan Date: Thu, 2 Apr 2026 13:19:57 -0700 Subject: [PATCH 1/5] fix(chart): word cloud secondary sort prevents Druid TopN optimization when sort_by_metric enabled When sort_by_metric is true, buildQuery unconditionally appended a secondary ORDER BY series ASC alongside the metric sort. On Druid, any multi-column ORDER BY prevents the native TopN query optimization, forcing a full GroupBy scan that can cause dramatic slowdowns and timeouts on high-cardinality dimensions. Make the series sort mutually exclusive with sort_by_metric, and add test coverage for both sort behaviors. Fixes #39072 --- .../src/plugin/buildQuery.ts | 3 +- .../test/buildQuery.test.ts | 57 +++++++++++++++---- 2 files changed, 47 insertions(+), 13 deletions(-) diff --git a/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/buildQuery.ts b/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/buildQuery.ts index faf816d5eca4..8179bec91c8f 100644 --- a/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/buildQuery.ts +++ b/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/buildQuery.ts @@ -28,8 +28,7 @@ export default function buildQuery(formData: WordCloudFormData) { if (sort_by_metric && metric) { orderby.push([metric, false]); - } - if (series) { + } else if (series) { orderby.push([series, true]); } diff --git a/superset-frontend/plugins/plugin-chart-word-cloud/test/buildQuery.test.ts b/superset-frontend/plugins/plugin-chart-word-cloud/test/buildQuery.test.ts index c83f8993e81b..c99d87fb12eb 100644 --- a/superset-frontend/plugins/plugin-chart-word-cloud/test/buildQuery.test.ts +++ b/superset-frontend/plugins/plugin-chart-word-cloud/test/buildQuery.test.ts @@ -21,17 +21,52 @@ import { VizType } from '@superset-ui/core'; import { WordCloudFormData } from '../src'; import buildQuery from '../src/plugin/buildQuery'; -describe('WordCloud buildQuery', () => { - const formData: WordCloudFormData = { - datasource: '5__table', - granularity_sqla: 'ds', - series: 'foo', - viz_type: VizType.WordCloud, - }; +const basicFormData: WordCloudFormData = { + datasource: '5__table', + granularity_sqla: 'ds', + series: 'foo', + viz_type: VizType.WordCloud, +}; - test('should build columns from series in form data', () => { - const queryContext = buildQuery(formData); - const [query] = queryContext.queries; - expect(query.columns).toEqual(['foo']); +describe('plugin-chart-word-cloud', () => { + describe('buildQuery', () => { + test('should build columns from series in form data', () => { + const queryContext = buildQuery(basicFormData); + const [query] = queryContext.queries; + expect(query.columns).toEqual(['foo']); + }); + + test('should order by series ASC when sort_by_metric is false', () => { + const queryContext = buildQuery({ + ...basicFormData, + metric: 'count', + sort_by_metric: false, + row_limit: 100, + }); + const [query] = queryContext.queries; + expect(query.orderby).toEqual([['foo', true]]); + }); + + test('should order by metric DESC only when sort_by_metric is true', () => { + const queryContext = buildQuery({ + ...basicFormData, + metric: 'count', + sort_by_metric: true, + row_limit: 100, + }); + const [query] = queryContext.queries; + expect(query.orderby).toEqual([['count', false]]); + }); + + test('should not include secondary series sort when sort_by_metric is true', () => { + const queryContext = buildQuery({ + ...basicFormData, + metric: 'count', + sort_by_metric: true, + row_limit: 100, + }); + const [query] = queryContext.queries; + expect(query.orderby).toHaveLength(1); + }); }); }); From 61254544a831b0973c0a05aba70bdcfa3dbd0f7f Mon Sep 17 00:00:00 2001 From: Brian Donovan Date: Mon, 13 Apr 2026 12:47:13 -0700 Subject: [PATCH 2/5] fix(chart): add independent sort_by_series control for word cloud Make sort_by_metric and sort_by_series independent controls instead of the previous behavior where series sort was unconditionally appended alongside the metric sort. This allows users to choose metric-only sorting (enabling Druid TopN optimization) while optionally adding a secondary series sort for tie-breaking. Changes: - buildQuery.ts: sort_by_metric and sort_by_series are now independent - controlPanel.tsx: add sort_by_series checkbox (default: false) - buildQuery.test.ts: cover all four sort combinations - DB migration: set sort_by_series=true for existing word_cloud charts to preserve backward compatibility Fixes #39072 --- .../src/plugin/buildQuery.ts | 6 +- .../src/plugin/controlPanel.tsx | 15 ++++ .../test/buildQuery.test.ts | 26 +++++- ...add_sort_by_series_to_word_cloud_charts.py | 79 +++++++++++++++++++ 4 files changed, 120 insertions(+), 6 deletions(-) create mode 100644 superset/migrations/versions/2026-04-13_19-28_fd0c8583b46d_add_sort_by_series_to_word_cloud_charts.py diff --git a/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/buildQuery.ts b/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/buildQuery.ts index 8179bec91c8f..e90aab66ed76 100644 --- a/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/buildQuery.ts +++ b/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/buildQuery.ts @@ -21,14 +21,16 @@ import { buildQueryContext, QueryFormOrderBy } from '@superset-ui/core'; import { WordCloudFormData } from '../types'; export default function buildQuery(formData: WordCloudFormData) { - const { metric, sort_by_metric, series, row_limit } = formData; + const { metric, sort_by_metric, sort_by_series, series, row_limit } = + formData; const orderby: QueryFormOrderBy[] = []; const shouldApplyOrderBy = row_limit !== undefined && row_limit !== null && row_limit !== 0; if (sort_by_metric && metric) { orderby.push([metric, false]); - } else if (series) { + } + if (sort_by_series && series) { orderby.push([series, true]); } diff --git a/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/controlPanel.tsx index 9098a282c08c..11b3e01cf349 100644 --- a/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/controlPanel.tsx +++ b/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/controlPanel.tsx @@ -35,6 +35,21 @@ const config: ControlPanelConfig = { ['adhoc_filters'], ['row_limit'], ['sort_by_metric'], + [ + { + name: 'sort_by_series', + config: { + type: 'CheckboxControl', + label: t('Sort by series'), + default: false, + description: t( + 'Include a secondary sort by series name (ascending). ' + + 'This produces consistent ordering when multiple series have the ' + + 'same metric value, but may reduce query performance on some databases.', + ), + }, + }, + ], ], }, { diff --git a/superset-frontend/plugins/plugin-chart-word-cloud/test/buildQuery.test.ts b/superset-frontend/plugins/plugin-chart-word-cloud/test/buildQuery.test.ts index c99d87fb12eb..7ce8cab7be1a 100644 --- a/superset-frontend/plugins/plugin-chart-word-cloud/test/buildQuery.test.ts +++ b/superset-frontend/plugins/plugin-chart-word-cloud/test/buildQuery.test.ts @@ -36,15 +36,16 @@ describe('plugin-chart-word-cloud', () => { expect(query.columns).toEqual(['foo']); }); - test('should order by series ASC when sort_by_metric is false', () => { + test('should not include orderby when neither sort option is enabled', () => { const queryContext = buildQuery({ ...basicFormData, metric: 'count', sort_by_metric: false, + sort_by_series: false, row_limit: 100, }); const [query] = queryContext.queries; - expect(query.orderby).toEqual([['foo', true]]); + expect(query.orderby).toBeUndefined(); }); test('should order by metric DESC only when sort_by_metric is true', () => { @@ -52,21 +53,38 @@ describe('plugin-chart-word-cloud', () => { ...basicFormData, metric: 'count', sort_by_metric: true, + sort_by_series: false, row_limit: 100, }); const [query] = queryContext.queries; expect(query.orderby).toEqual([['count', false]]); }); - test('should not include secondary series sort when sort_by_metric is true', () => { + test('should order by series ASC only when sort_by_series is true', () => { + const queryContext = buildQuery({ + ...basicFormData, + metric: 'count', + sort_by_metric: false, + sort_by_series: true, + row_limit: 100, + }); + const [query] = queryContext.queries; + expect(query.orderby).toEqual([['foo', true]]); + }); + + test('should order by metric DESC then series ASC when both are true', () => { const queryContext = buildQuery({ ...basicFormData, metric: 'count', sort_by_metric: true, + sort_by_series: true, row_limit: 100, }); const [query] = queryContext.queries; - expect(query.orderby).toHaveLength(1); + expect(query.orderby).toEqual([ + ['count', false], + ['foo', true], + ]); }); }); }); diff --git a/superset/migrations/versions/2026-04-13_19-28_fd0c8583b46d_add_sort_by_series_to_word_cloud_charts.py b/superset/migrations/versions/2026-04-13_19-28_fd0c8583b46d_add_sort_by_series_to_word_cloud_charts.py new file mode 100644 index 000000000000..ed6a3643ad02 --- /dev/null +++ b/superset/migrations/versions/2026-04-13_19-28_fd0c8583b46d_add_sort_by_series_to_word_cloud_charts.py @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""add sort_by_series to word_cloud charts + +Revision ID: fd0c8583b46d +Revises: ce6bd21901ab +Create Date: 2026-04-13 19:28:19.021839 + +""" + +from alembic import op +from sqlalchemy import Column, Integer, String, Text +from sqlalchemy.ext.declarative import declarative_base + +from superset import db +from superset.migrations.shared.utils import paginated_update +from superset.utils import json + +# revision identifiers, used by Alembic. +revision = "fd0c8583b46d" +down_revision = "ce6bd21901ab" + +Base = declarative_base() + + +class Slice(Base): + __tablename__ = "slices" + id = Column(Integer, primary_key=True) + viz_type = Column(String(250)) + params = Column(Text) + + +def upgrade_params(params: dict) -> dict: + if "sort_by_series" not in params: + params["sort_by_series"] = True + return params + + +def downgrade_params(params: dict) -> dict: + params.pop("sort_by_series", None) + return params + + +def upgrade(): + bind = op.get_bind() + session = db.Session(bind=bind) + + for slc in paginated_update( + session.query(Slice).filter(Slice.viz_type == "word_cloud") + ): + params = json.loads(slc.params or "{}") + slc.params = json.dumps(upgrade_params(params)) + session.close() + + +def downgrade(): + bind = op.get_bind() + session = db.Session(bind=bind) + + for slc in paginated_update( + session.query(Slice).filter(Slice.viz_type == "word_cloud") + ): + params = json.loads(slc.params or "{}") + slc.params = json.dumps(downgrade_params(params)) + session.close() From 410a85358ba275a7609e7c2895a4ccbd5b24428a Mon Sep 17 00:00:00 2001 From: Brian Donovan Date: Tue, 14 Apr 2026 14:26:24 -0700 Subject: [PATCH 3/5] chore: retrigger CI From f8ed170eeb60251494706248c51fbd2469b6a0ad Mon Sep 17 00:00:00 2001 From: Brian Donovan Date: Wed, 15 Apr 2026 11:28:52 -0700 Subject: [PATCH 4/5] fix(chart): improve sort_by_series description wording MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove "secondary" from the description since sort_by_series and sort_by_metric are independent controls — either can be used alone. Clarify that the tiebreaker behavior only applies when both are enabled. Fixes prettier formatting (+ at end of line, not beginning). --- .../plugin-chart-word-cloud/src/plugin/controlPanel.tsx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/controlPanel.tsx b/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/controlPanel.tsx index 11b3e01cf349..98e5e624b7f5 100644 --- a/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/controlPanel.tsx +++ b/superset-frontend/plugins/plugin-chart-word-cloud/src/plugin/controlPanel.tsx @@ -43,9 +43,10 @@ const config: ControlPanelConfig = { label: t('Sort by series'), default: false, description: t( - 'Include a secondary sort by series name (ascending). ' - + 'This produces consistent ordering when multiple series have the ' - + 'same metric value, but may reduce query performance on some databases.', + 'Sort results by series name in ascending order. ' + + 'When combined with "Sort by metric", this acts as a tiebreaker ' + + 'for equal metric values. Adding this sort may reduce query ' + + 'performance on some databases.', ), }, }, From 2e1f747c7fa3e03cb7d225ee21f2b9a0d6a8fc49 Mon Sep 17 00:00:00 2001 From: Brian Donovan Date: Wed, 15 Apr 2026 11:32:21 -0700 Subject: [PATCH 5/5] fix(chart): add defensive JSON parsing to migration Guard against malformed or non-dict params in word_cloud slices during upgrade/downgrade. Skips invalid rows instead of aborting the entire migration. --- ..._add_sort_by_series_to_word_cloud_charts.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/superset/migrations/versions/2026-04-13_19-28_fd0c8583b46d_add_sort_by_series_to_word_cloud_charts.py b/superset/migrations/versions/2026-04-13_19-28_fd0c8583b46d_add_sort_by_series_to_word_cloud_charts.py index ed6a3643ad02..bf66a2264f32 100644 --- a/superset/migrations/versions/2026-04-13_19-28_fd0c8583b46d_add_sort_by_series_to_word_cloud_charts.py +++ b/superset/migrations/versions/2026-04-13_19-28_fd0c8583b46d_add_sort_by_series_to_word_cloud_charts.py @@ -62,8 +62,13 @@ def upgrade(): for slc in paginated_update( session.query(Slice).filter(Slice.viz_type == "word_cloud") ): - params = json.loads(slc.params or "{}") - slc.params = json.dumps(upgrade_params(params)) + try: + params = json.loads(slc.params or "{}") + if not isinstance(params, dict): + continue + slc.params = json.dumps(upgrade_params(params)) + except (json.JSONDecodeError, TypeError): + continue session.close() @@ -74,6 +79,11 @@ def downgrade(): for slc in paginated_update( session.query(Slice).filter(Slice.viz_type == "word_cloud") ): - params = json.loads(slc.params or "{}") - slc.params = json.dumps(downgrade_params(params)) + try: + params = json.loads(slc.params or "{}") + if not isinstance(params, dict): + continue + slc.params = json.dumps(downgrade_params(params)) + except (json.JSONDecodeError, TypeError): + continue session.close()