From 2d34edebcf41bb04d13fd0dc100b8015c4660f73 Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Fri, 3 Apr 2026 17:55:15 -0400 Subject: [PATCH 1/6] fix(mcp): strip json_metadata and position_json from get_dashboard_info response The get_dashboard_info MCP tool returned excessively large responses (up to 2.4MB) due to raw json_metadata (~0.5MB of color schemes, cross-filter scopes, etc.) and position_json (internal layout tree). Changes: - Remove json_metadata and position_json raw fields from DashboardInfo - Extract only useful filter info into structured native_filters field - Add cross_filters_enabled boolean field - Replace full ChartInfo with lightweight DashboardChartSummary in dashboard context (id, name, viz_type, datasource_name, url, description) - Update dashboard_serializer, serialize_dashboard_object, and generate_dashboard/add_chart_to_existing_dashboard to use new models - Add comprehensive tests for filter extraction and response slimming --- .../mcp_service/common/schema_discovery.py | 4 +- superset/mcp_service/dashboard/schemas.py | 146 +++++++++++++++-- .../tool/add_chart_to_existing_dashboard.py | 4 +- .../dashboard/tool/generate_dashboard.py | 4 +- .../mcp_service/utils/permissions_utils.py | 4 - .../dashboard/test_dashboard_schemas.py | 153 +++++++++++++++++- .../dashboard/tool/test_dashboard_tools.py | 46 +++--- 7 files changed, 312 insertions(+), 49 deletions(-) diff --git a/superset/mcp_service/common/schema_discovery.py b/superset/mcp_service/common/schema_discovery.py index dda1a0d5412c..2fd13bc8314e 100644 --- a/superset/mcp_service/common/schema_discovery.py +++ b/superset/mcp_service/common/schema_discovery.py @@ -169,9 +169,9 @@ def _get_sqlalchemy_type_name(col_type: Any) -> str: "dashboard_title": "Dashboard display title", "slug": "URL-friendly identifier for the dashboard", "published": "Whether the dashboard is published and visible", - "position_json": "JSON layout of dashboard components", - "json_metadata": "JSON metadata including filters and settings", "css": "Custom CSS for the dashboard", + "native_filters": "Native filter configuration (name, type, targets)", + "cross_filters_enabled": "Whether cross-filtering between charts is enabled", "theme_id": "Theme ID for dashboard styling", } diff --git a/superset/mcp_service/dashboard/schemas.py b/superset/mcp_service/dashboard/schemas.py index e76cde368271..8e20612d2f08 100644 --- a/superset/mcp_service/dashboard/schemas.py +++ b/superset/mcp_service/dashboard/schemas.py @@ -65,6 +65,7 @@ from __future__ import annotations +import logging from datetime import datetime from typing import Annotated, Any, Dict, List, Literal, TYPE_CHECKING @@ -83,7 +84,6 @@ from superset.models.dashboard import Dashboard from superset.daos.base import ColumnOperator, ColumnOperatorEnum -from superset.mcp_service.chart.schemas import ChartInfo, serialize_chart_object from superset.mcp_service.common.cache_schemas import MetadataCacheControl from superset.mcp_service.constants import DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE from superset.mcp_service.system.schemas import ( @@ -97,6 +97,7 @@ _remove_dangerous_unicode, _strip_html_tags, ) +from superset.utils.json import loads as json_loads class DashboardError(BaseModel): @@ -298,6 +299,43 @@ class GetDashboardInfoRequest(MetadataCacheControl): ) +logger = logging.getLogger(__name__) + + +class NativeFilterSummary(BaseModel): + """Lightweight summary of a native filter for LLM consumption. + + Extracts only the fields needed to understand what filters are + available on a dashboard: name, type, and which columns they target. + """ + + id: str | None = Field(None, description="Filter ID") + name: str | None = Field(None, description="Filter display name") + filter_type: str | None = Field( + None, description="Filter type (e.g. filter_select, filter_range)" + ) + targets: List[Dict[str, Any]] = Field( + default_factory=list, + description="Filter targets (column name and dataset ID)", + ) + + +class DashboardChartSummary(BaseModel): + """Lightweight chart representation for dashboard context. + + Contains only the fields needed for LLMs to understand which charts + are on a dashboard, omitting verbose fields like form_data, tags, + owners, and timestamps that bloat the response. + """ + + id: int | None = Field(None, description="Chart ID") + slice_name: str | None = Field(None, description="Chart name") + viz_type: str | None = Field(None, description="Visualization type") + datasource_name: str | None = Field(None, description="Datasource name") + url: str | None = Field(None, description="Chart explore page URL") + description: str | None = Field(None, description="Chart description") + + class DashboardInfo(BaseModel): id: int | None = None dashboard_title: str | None = None @@ -306,8 +344,6 @@ class DashboardInfo(BaseModel): css: str | None = None certified_by: str | None = None certification_details: str | None = None - json_metadata: str | None = None - position_json: str | None = None published: bool | None = None is_managed_externally: bool | None = None external_url: str | None = None @@ -323,7 +359,21 @@ class DashboardInfo(BaseModel): owners: List[UserInfo] = Field(default_factory=list) tags: List[TagInfo] = Field(default_factory=list) roles: List[RoleInfo] = Field(default_factory=list) - charts: List[ChartInfo] = Field(default_factory=list) + charts: List[DashboardChartSummary] = Field(default_factory=list) + + # Structured filter information extracted from json_metadata + native_filters: List[NativeFilterSummary] = Field( + default_factory=list, + description=( + "Native filters configured on this dashboard. Extracted from " + "json_metadata for LLM consumption — includes filter name, type, " + "and target columns." + ), + ) + cross_filters_enabled: bool | None = Field( + None, + description="Whether cross-filtering between charts is enabled.", + ) # Fields for permalink/filter state support permalink_key: str | None = Field( @@ -473,6 +523,74 @@ class GenerateDashboardResponse(BaseModel): error: str | None = Field(None, description="Error message, if creation failed") +def _extract_native_filters(json_metadata_str: str | None) -> List[NativeFilterSummary]: + """Extract native filter summaries from raw json_metadata string. + + Parses the json_metadata JSON blob and pulls out only the filter + name, type, and targets — dropping verbose fields like controlValues, + defaultDataMask, scope, and cascadeParentIds. + """ + if not json_metadata_str: + return [] + try: + metadata = json_loads(json_metadata_str) + except (ValueError, TypeError): + return [] + + native_filters = metadata.get("native_filter_configuration", []) + if not isinstance(native_filters, list): + return [] + + summaries: List[NativeFilterSummary] = [] + for f in native_filters: + if not isinstance(f, dict): + continue + summaries.append( + NativeFilterSummary( + id=f.get("id"), + name=f.get("name"), + filter_type=f.get("filterType"), + targets=f.get("targets", []), + ) + ) + return summaries + + +def _extract_cross_filters_enabled(json_metadata_str: str | None) -> bool | None: + """Extract the cross_filters_enabled flag from json_metadata.""" + if not json_metadata_str: + return None + try: + metadata = json_loads(json_metadata_str) + except (ValueError, TypeError): + return None + value = metadata.get("cross_filters_enabled") + if isinstance(value, bool): + return value + return None + + +def _serialize_chart_summary(chart: Any) -> DashboardChartSummary | None: + """Serialize a chart to a lightweight summary for dashboard context.""" + if not chart: + return None + from superset.mcp_service.utils.url_utils import get_superset_base_url + + chart_id = getattr(chart, "id", None) + chart_url = None + if chart_id: + chart_url = f"{get_superset_base_url()}/explore/?slice_id={chart_id}" + + return DashboardChartSummary( + id=chart_id, + slice_name=getattr(chart, "slice_name", None), + viz_type=getattr(chart, "viz_type", None), + datasource_name=getattr(chart, "datasource_name", None), + url=chart_url, + description=getattr(chart, "description", None), + ) + + def dashboard_serializer(dashboard: "Dashboard") -> DashboardInfo: from superset.mcp_service.utils.url_utils import get_superset_base_url @@ -488,8 +606,6 @@ def dashboard_serializer(dashboard: "Dashboard") -> DashboardInfo: css=dashboard.css, certified_by=dashboard.certified_by, certification_details=dashboard.certification_details, - json_metadata=dashboard.json_metadata, - position_json=dashboard.position_json, published=dashboard.published, is_managed_externally=dashboard.is_managed_externally, external_url=dashboard.external_url, @@ -506,6 +622,8 @@ def dashboard_serializer(dashboard: "Dashboard") -> DashboardInfo: created_on_humanized=dashboard.created_on_humanized, changed_on_humanized=dashboard.changed_on_humanized, chart_count=len(dashboard.slices) if dashboard.slices else 0, + native_filters=_extract_native_filters(dashboard.json_metadata), + cross_filters_enabled=_extract_cross_filters_enabled(dashboard.json_metadata), owners=[ info for owner in dashboard.owners @@ -524,7 +642,11 @@ def dashboard_serializer(dashboard: "Dashboard") -> DashboardInfo: ] if dashboard.roles else [], - charts=[serialize_chart_object(chart) for chart in dashboard.slices] + charts=[ + summary + for chart in dashboard.slices + if (summary := _serialize_chart_summary(chart)) is not None + ] if dashboard.slices else [], ) @@ -551,6 +673,8 @@ def serialize_dashboard_object(dashboard: Any) -> DashboardInfo: f"{get_superset_base_url()}/superset/dashboard/{slug or dashboard_id}/" ) + json_metadata_str = getattr(dashboard, "json_metadata", None) + return DashboardInfo( id=dashboard_id, dashboard_title=getattr(dashboard, "dashboard_title", None), @@ -571,8 +695,8 @@ def serialize_dashboard_object(dashboard: Any) -> DashboardInfo: css=getattr(dashboard, "css", None), certified_by=getattr(dashboard, "certified_by", None), certification_details=getattr(dashboard, "certification_details", None), - json_metadata=getattr(dashboard, "json_metadata", None), - position_json=getattr(dashboard, "position_json", None), + native_filters=_extract_native_filters(json_metadata_str), + cross_filters_enabled=_extract_cross_filters_enabled(json_metadata_str), is_managed_externally=getattr(dashboard, "is_managed_externally", None), external_url=getattr(dashboard, "external_url", None), uuid=str(getattr(dashboard, "uuid", "")) @@ -599,7 +723,9 @@ def serialize_dashboard_object(dashboard: Any) -> DashboardInfo: if getattr(dashboard, "roles", None) else [], charts=[ - serialize_chart_object(chart) for chart in getattr(dashboard, "slices", []) + summary + for chart in getattr(dashboard, "slices", []) + if (summary := _serialize_chart_summary(chart)) is not None ] if getattr(dashboard, "slices", None) else [], diff --git a/superset/mcp_service/dashboard/tool/add_chart_to_existing_dashboard.py b/superset/mcp_service/dashboard/tool/add_chart_to_existing_dashboard.py index 888f2423fcc3..73d90136d839 100644 --- a/superset/mcp_service/dashboard/tool/add_chart_to_existing_dashboard.py +++ b/superset/mcp_service/dashboard/tool/add_chart_to_existing_dashboard.py @@ -31,13 +31,13 @@ from superset.commands.exceptions import CommandException from superset.extensions import event_logger -from superset.mcp_service.chart.schemas import serialize_chart_object from superset.mcp_service.dashboard.constants import ( generate_id, GRID_COLUMN_COUNT, GRID_DEFAULT_CHART_WIDTH, ) from superset.mcp_service.dashboard.schemas import ( + _serialize_chart_summary, AddChartToDashboardRequest, AddChartToDashboardResponse, DashboardInfo, @@ -525,7 +525,7 @@ def add_chart_to_existing_dashboard( charts=[ obj for chart in getattr(updated_dashboard, "slices", []) - if (obj := serialize_chart_object(chart)) is not None + if (obj := _serialize_chart_summary(chart)) is not None ], ) diff --git a/superset/mcp_service/dashboard/tool/generate_dashboard.py b/superset/mcp_service/dashboard/tool/generate_dashboard.py index 1b0f457771f8..38dbdbabd2ed 100644 --- a/superset/mcp_service/dashboard/tool/generate_dashboard.py +++ b/superset/mcp_service/dashboard/tool/generate_dashboard.py @@ -29,7 +29,6 @@ from superset_core.mcp.decorators import tool, ToolAnnotations from superset.extensions import event_logger -from superset.mcp_service.chart.schemas import serialize_chart_object from superset.mcp_service.dashboard.constants import ( generate_id, GRID_COLUMN_COUNT, @@ -395,6 +394,7 @@ def generate_dashboard( # noqa: C901 # Convert to our response format from superset.mcp_service.dashboard.schemas import ( + _serialize_chart_summary, serialize_tag_object, serialize_user_object, ) @@ -426,7 +426,7 @@ def generate_dashboard( # noqa: C901 charts=[ obj for chart in getattr(dashboard, "slices", []) - if (obj := serialize_chart_object(chart)) is not None + if (obj := _serialize_chart_summary(chart)) is not None ], ) diff --git a/superset/mcp_service/utils/permissions_utils.py b/superset/mcp_service/utils/permissions_utils.py index a3864206532e..62d9c3d08404 100644 --- a/superset/mcp_service/utils/permissions_utils.py +++ b/superset/mcp_service/utils/permissions_utils.py @@ -44,8 +44,6 @@ "created_by_fk", # Internal user references }, "dashboard": { - "json_metadata", # May contain sensitive configuration - "position_json", # Internal layout data "css", # May contain sensitive styling info "changed_by_fk", # Internal user references "created_by_fk", # Internal user references @@ -64,8 +62,6 @@ "database_id": "can_this_form_get", # Database access permissions "query_context": "can_explore_json", # Explore permissions "cache_key": "can_warm_up_cache", # Cache management permissions - "json_metadata": "can_this_form_get", # Advanced dashboard permissions - "position_json": "can_this_form_get", # Dashboard edit permissions "css": "can_this_form_get", # Dashboard styling permissions } diff --git a/tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py b/tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py index d5ace35054d7..cda35db3f8f6 100644 --- a/tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py +++ b/tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py @@ -24,7 +24,12 @@ from typing import Any from unittest.mock import MagicMock, patch -from superset.mcp_service.dashboard.schemas import serialize_dashboard_object +from superset.mcp_service.dashboard.schemas import ( + _extract_cross_filters_enabled, + _extract_native_filters, + serialize_dashboard_object, +) +from superset.utils.json import dumps as json_dumps def _mock_dashboard( @@ -53,7 +58,6 @@ def _mock_dashboard( dashboard.certified_by = None dashboard.certification_details = None dashboard.json_metadata = None - dashboard.position_json = None dashboard.is_managed_externally = False dashboard.external_url = None dashboard.uuid = None @@ -117,3 +121,148 @@ def test_url_uses_slug_when_available(self, mock_base_url): result = serialize_dashboard_object(dashboard) assert result.url == "http://localhost:8088/superset/dashboard/my-dashboard/" + + @patch("superset.mcp_service.utils.url_utils.get_superset_base_url") + def test_no_json_metadata_or_position_json_in_response(self, mock_base_url): + """DashboardInfo should not contain json_metadata or position_json.""" + mock_base_url.return_value = "http://localhost:8088" + + dashboard = _mock_dashboard(id=1) + result = serialize_dashboard_object(dashboard) + + assert not hasattr(result, "json_metadata") + assert not hasattr(result, "position_json") + + @patch("superset.mcp_service.utils.url_utils.get_superset_base_url") + def test_native_filters_extracted_from_json_metadata(self, mock_base_url): + """Native filters should be extracted from json_metadata.""" + mock_base_url.return_value = "http://localhost:8088" + + metadata = { + "native_filter_configuration": [ + { + "id": "NATIVE_FILTER-abc123", + "name": "Region Filter", + "filterType": "filter_select", + "targets": [{"column": {"name": "region"}, "datasetId": 10}], + "controlValues": {"multiSelect": True}, + "defaultDataMask": {"filterState": {"value": ["US"]}}, + "scope": {"rootPath": ["ROOT_ID"]}, + }, + { + "id": "NATIVE_FILTER-def456", + "name": "Date Range", + "filterType": "filter_range", + "targets": [{"column": {"name": "order_date"}, "datasetId": 10}], + }, + ], + "cross_filters_enabled": True, + "color_scheme": "supersetColors", + "shared_label_colors": {"Sales": "#1FA8C9"}, + } + dashboard = _mock_dashboard(id=1) + dashboard.json_metadata = json_dumps(metadata) + + result = serialize_dashboard_object(dashboard) + + assert len(result.native_filters) == 2 + assert result.native_filters[0].id == "NATIVE_FILTER-abc123" + assert result.native_filters[0].name == "Region Filter" + assert result.native_filters[0].filter_type == "filter_select" + assert len(result.native_filters[0].targets) == 1 + assert result.native_filters[1].name == "Date Range" + assert result.cross_filters_enabled is True + + @patch("superset.mcp_service.utils.url_utils.get_superset_base_url") + def test_chart_summaries_are_lightweight(self, mock_base_url): + """Charts in dashboard response should only have core fields.""" + mock_base_url.return_value = "http://localhost:8088" + + chart = MagicMock() + chart.id = 5 + chart.slice_name = "Revenue Chart" + chart.viz_type = "echarts_timeseries_bar" + chart.datasource_name = "sales" + chart.description = "Monthly revenue" + + dashboard = _mock_dashboard(id=1, slices=[chart]) + result = serialize_dashboard_object(dashboard) + + assert len(result.charts) == 1 + assert result.charts[0].id == 5 + assert result.charts[0].slice_name == "Revenue Chart" + assert result.charts[0].viz_type == "echarts_timeseries_bar" + assert result.charts[0].datasource_name == "sales" + assert result.charts[0].url == "http://localhost:8088/explore/?slice_id=5" + # Verify no heavy fields + assert not hasattr(result.charts[0], "form_data") + assert not hasattr(result.charts[0], "tags") + assert not hasattr(result.charts[0], "owners") + + +class TestExtractNativeFilters: + """Tests for _extract_native_filters helper.""" + + def test_none_input(self): + assert _extract_native_filters(None) == [] + + def test_empty_string(self): + assert _extract_native_filters("") == [] + + def test_invalid_json(self): + assert _extract_native_filters("not json") == [] + + def test_no_filter_config(self): + assert _extract_native_filters("{}") == [] + + def test_non_list_filter_config(self): + assert _extract_native_filters('{"native_filter_configuration": "bad"}') == [] + + def test_valid_filters(self): + metadata = json_dumps( + { + "native_filter_configuration": [ + { + "id": "f1", + "name": "Filter 1", + "filterType": "filter_select", + "targets": [{"column": {"name": "col1"}}], + } + ] + } + ) + result = _extract_native_filters(metadata) + assert len(result) == 1 + assert result[0].id == "f1" + assert result[0].name == "Filter 1" + assert result[0].filter_type == "filter_select" + + def test_skips_non_dict_entries(self): + metadata = json_dumps( + {"native_filter_configuration": [{"id": "f1", "name": "ok"}, "bad", 123]} + ) + result = _extract_native_filters(metadata) + assert len(result) == 1 + + +class TestExtractCrossFiltersEnabled: + """Tests for _extract_cross_filters_enabled helper.""" + + def test_none_input(self): + assert _extract_cross_filters_enabled(None) is None + + def test_empty_json(self): + assert _extract_cross_filters_enabled("{}") is None + + def test_true(self): + assert _extract_cross_filters_enabled('{"cross_filters_enabled": true}') is True + + def test_false(self): + assert ( + _extract_cross_filters_enabled('{"cross_filters_enabled": false}') is False + ) + + def test_non_bool_value(self): + assert ( + _extract_cross_filters_enabled('{"cross_filters_enabled": "yes"}') is None + ) diff --git a/tests/unit_tests/mcp_service/dashboard/tool/test_dashboard_tools.py b/tests/unit_tests/mcp_service/dashboard/tool/test_dashboard_tools.py index fea9a05e3f1c..e16a20809ed3 100644 --- a/tests/unit_tests/mcp_service/dashboard/tool/test_dashboard_tools.py +++ b/tests/unit_tests/mcp_service/dashboard/tool/test_dashboard_tools.py @@ -36,7 +36,7 @@ logger = logging.getLogger(__name__) -@pytest.fixture +@pytest.fixture() def mcp_server(): return mcp @@ -53,7 +53,7 @@ def mock_auth(): @patch("superset.daos.dashboard.DashboardDAO.list") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_list_dashboards_basic(mock_list, mcp_server): dashboard = Mock() dashboard.id = 1 @@ -75,7 +75,6 @@ async def test_list_dashboards_basic(mock_list, mcp_server): dashboard.certified_by = None dashboard.certification_details = None dashboard.json_metadata = None - dashboard.position_json = None dashboard.is_managed_externally = False dashboard.external_url = None dashboard.uuid = "test-dashboard-uuid-1" @@ -120,7 +119,7 @@ async def test_list_dashboards_basic(mock_list, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.list") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_list_dashboards_with_filters(mock_list, mcp_server): dashboard = Mock() dashboard.id = 1 @@ -142,7 +141,6 @@ async def test_list_dashboards_with_filters(mock_list, mcp_server): dashboard.certified_by = None dashboard.certification_details = None dashboard.json_metadata = None - dashboard.position_json = None dashboard.is_managed_externally = False dashboard.external_url = None dashboard.uuid = None @@ -188,7 +186,7 @@ async def test_list_dashboards_with_filters(mock_list, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.list") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_list_dashboards_with_string_filters(mock_list, mcp_server): mock_list.return_value = ([], 0) async with Client(mcp_server) as client: # noqa: F841 @@ -203,7 +201,7 @@ async def test_list_dashboards_with_string_filters(mock_list, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.list") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_list_dashboards_api_error(mock_list, mcp_server): mock_list.side_effect = ToolError("API request failed") async with Client(mcp_server) as client: @@ -214,7 +212,7 @@ async def test_list_dashboards_api_error(mock_list, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.list") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_list_dashboards_with_search(mock_list, mcp_server): dashboard = Mock() dashboard.id = 1 @@ -236,7 +234,6 @@ async def test_list_dashboards_with_search(mock_list, mcp_server): dashboard.certified_by = None dashboard.certification_details = None dashboard.json_metadata = None - dashboard.position_json = None dashboard.is_managed_externally = False dashboard.external_url = None dashboard.uuid = None @@ -275,7 +272,7 @@ async def test_list_dashboards_with_search(mock_list, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.list") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_list_dashboards_with_simple_filters(mock_list, mcp_server): mock_list.return_value = ([], 0) async with Client(mcp_server) as client: @@ -292,7 +289,7 @@ async def test_list_dashboards_with_simple_filters(mock_list, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.find_by_id") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_get_dashboard_info_success(mock_info, mcp_server): dashboard = Mock() dashboard.id = 1 @@ -303,7 +300,6 @@ async def test_get_dashboard_info_success(mock_info, mcp_server): dashboard.certified_by = None dashboard.certification_details = None dashboard.json_metadata = None - dashboard.position_json = None dashboard.published = True dashboard.is_managed_externally = False dashboard.external_url = None @@ -346,7 +342,7 @@ async def test_get_dashboard_info_success(mock_info, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.find_by_id") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_get_dashboard_info_not_found(mock_info, mcp_server): mock_info.return_value = None # Not found returns None async with Client(mcp_server) as client: @@ -357,7 +353,7 @@ async def test_get_dashboard_info_not_found(mock_info, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.find_by_id") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_get_dashboard_info_access_denied(mock_info, mcp_server): mock_info.return_value = None # Access denied returns None async with Client(mcp_server) as client: @@ -371,7 +367,7 @@ async def test_get_dashboard_info_access_denied(mock_info, mcp_server): @patch("superset.mcp_service.mcp_core.ModelGetInfoCore._find_object") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_get_dashboard_info_by_uuid(mock_find_object, mcp_server): """Test getting dashboard info using UUID identifier.""" dashboard = Mock() @@ -383,7 +379,6 @@ async def test_get_dashboard_info_by_uuid(mock_find_object, mcp_server): dashboard.certified_by = None dashboard.certification_details = None dashboard.json_metadata = "{}" - dashboard.position_json = "{}" dashboard.published = True dashboard.is_managed_externally = False dashboard.external_url = None @@ -411,7 +406,7 @@ async def test_get_dashboard_info_by_uuid(mock_find_object, mcp_server): @patch("superset.mcp_service.mcp_core.ModelGetInfoCore._find_object") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_get_dashboard_info_by_slug(mock_find_object, mcp_server): """Test getting dashboard info using slug identifier.""" dashboard = Mock() @@ -423,7 +418,6 @@ async def test_get_dashboard_info_by_slug(mock_find_object, mcp_server): dashboard.certified_by = None dashboard.certification_details = None dashboard.json_metadata = "{}" - dashboard.position_json = "{}" dashboard.published = True dashboard.is_managed_externally = False dashboard.external_url = None @@ -450,7 +444,7 @@ async def test_get_dashboard_info_by_slug(mock_find_object, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.list") -@pytest.mark.asyncio +@pytest.mark.asyncio() async def test_list_dashboards_custom_uuid_slug_columns(mock_list, mcp_server): """Test that custom column selection includes UUID and slug when explicitly requested.""" @@ -475,7 +469,6 @@ async def test_list_dashboards_custom_uuid_slug_columns(mock_list, mcp_server): dashboard.certified_by = None dashboard.certification_details = None dashboard.json_metadata = None - dashboard.position_json = None dashboard.is_managed_externally = False dashboard.external_url = None dashboard.thumbnail_url = None @@ -542,8 +535,8 @@ def test_minimal_default_columns_constant(self): # Heavy columns should NOT be in defaults assert "charts" not in DASHBOARD_DEFAULT_COLUMNS assert "published" not in DASHBOARD_DEFAULT_COLUMNS - assert "json_metadata" not in DASHBOARD_DEFAULT_COLUMNS - assert "position_json" not in DASHBOARD_DEFAULT_COLUMNS + assert "native_filters" not in DASHBOARD_DEFAULT_COLUMNS + assert "cross_filters_enabled" not in DASHBOARD_DEFAULT_COLUMNS assert "uuid" not in DASHBOARD_DEFAULT_COLUMNS def test_empty_select_columns_default(self): @@ -563,7 +556,7 @@ def test_explicit_select_columns(self): assert len(request.select_columns) == 4 @patch("superset.daos.dashboard.DashboardDAO.list") - @pytest.mark.asyncio + @pytest.mark.asyncio() async def test_default_columns_in_response(self, mock_list, mcp_server): """Test that minimal default columns appear in response metadata.""" dashboard = Mock() @@ -587,7 +580,6 @@ async def test_default_columns_in_response(self, mock_list, mcp_server): dashboard.certified_by = None dashboard.certification_details = None dashboard.json_metadata = None - dashboard.position_json = None dashboard.is_managed_externally = False dashboard.external_url = None dashboard.thumbnail_url = None @@ -610,8 +602,8 @@ async def test_default_columns_in_response(self, mock_list, mcp_server): assert "changed_on_humanized" in data["columns_requested"] # Verify heavy columns are NOT in columns_loaded by default - assert "json_metadata" not in data["columns_loaded"] - assert "position_json" not in data["columns_loaded"] + assert "native_filters" not in data["columns_loaded"] + assert "cross_filters_enabled" not in data["columns_loaded"] class TestDashboardSortableColumns: @@ -637,7 +629,7 @@ def test_dashboard_sortable_columns_definition(self): assert "uuid" not in SORTABLE_DASHBOARD_COLUMNS @patch("superset.daos.dashboard.DashboardDAO.list") - @pytest.mark.asyncio + @pytest.mark.asyncio() async def test_list_dashboards_with_valid_order_column(self, mock_list, mcp_server): """Test list_dashboards with valid order column.""" mock_list.return_value = ([], 0) From ed6b43c7bdf18dbf7c45d5e957d9399072f6c90b Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Fri, 3 Apr 2026 20:55:55 -0400 Subject: [PATCH 2/6] fix(mcp): add isinstance(dict) guards after JSON parsing Address PR review comments: - Add isinstance(metadata, dict) guard in _extract_native_filters and _extract_cross_filters_enabled to handle non-object JSON (e.g. "[]", "123") gracefully instead of raising AttributeError - Add isinstance(raw_targets, list) guard for corrupted filter targets to prevent Pydantic ValidationError - Fix stale ChartInfo reference in module docstring - Add tests for non-dict top-level JSON edge cases --- superset/mcp_service/dashboard/schemas.py | 12 ++++++++++-- .../mcp_service/dashboard/test_dashboard_schemas.py | 12 ++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/superset/mcp_service/dashboard/schemas.py b/superset/mcp_service/dashboard/schemas.py index 8e20612d2f08..1709ea5e91af 100644 --- a/superset/mcp_service/dashboard/schemas.py +++ b/superset/mcp_service/dashboard/schemas.py @@ -28,7 +28,7 @@ dashboard_title="Sales Dashboard", published=True, owners=[UserInfo(id=1, username="admin")], - charts=[ChartInfo(id=1, slice_name="Sales Chart")] + charts=[DashboardChartSummary(id=1, slice_name="Sales Chart")] ) # For dashboard list responses @@ -537,6 +537,9 @@ def _extract_native_filters(json_metadata_str: str | None) -> List[NativeFilterS except (ValueError, TypeError): return [] + if not isinstance(metadata, dict): + return [] + native_filters = metadata.get("native_filter_configuration", []) if not isinstance(native_filters, list): return [] @@ -545,12 +548,15 @@ def _extract_native_filters(json_metadata_str: str | None) -> List[NativeFilterS for f in native_filters: if not isinstance(f, dict): continue + raw_targets = f.get("targets", []) + if not isinstance(raw_targets, list): + raw_targets = [] summaries.append( NativeFilterSummary( id=f.get("id"), name=f.get("name"), filter_type=f.get("filterType"), - targets=f.get("targets", []), + targets=raw_targets, ) ) return summaries @@ -564,6 +570,8 @@ def _extract_cross_filters_enabled(json_metadata_str: str | None) -> bool | None metadata = json_loads(json_metadata_str) except (ValueError, TypeError): return None + if not isinstance(metadata, dict): + return None value = metadata.get("cross_filters_enabled") if isinstance(value, bool): return value diff --git a/tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py b/tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py index cda35db3f8f6..d7cd38f48357 100644 --- a/tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py +++ b/tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py @@ -244,6 +244,12 @@ def test_skips_non_dict_entries(self): result = _extract_native_filters(metadata) assert len(result) == 1 + def test_non_dict_top_level_json(self): + """json_metadata that parses to a list/number should return empty.""" + assert _extract_native_filters("[]") == [] + assert _extract_native_filters("123") == [] + assert _extract_native_filters('"just a string"') == [] + class TestExtractCrossFiltersEnabled: """Tests for _extract_cross_filters_enabled helper.""" @@ -266,3 +272,9 @@ def test_non_bool_value(self): assert ( _extract_cross_filters_enabled('{"cross_filters_enabled": "yes"}') is None ) + + def test_non_dict_top_level_json(self): + """json_metadata that parses to a list/number should return None.""" + assert _extract_cross_filters_enabled("[]") is None + assert _extract_cross_filters_enabled("123") is None + assert _extract_cross_filters_enabled('"just a string"') is None From 795284775167481f8d38f8ecef858b23cfe5035e Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Fri, 3 Apr 2026 21:02:16 -0400 Subject: [PATCH 3/6] fix(mcp): extract shared _parse_json_metadata helper for safe parsing Refactor both _extract_native_filters and _extract_cross_filters_enabled to use a shared _parse_json_metadata helper that safely handles None, invalid JSON, and non-dict JSON values (e.g. "[]", "123"). This addresses review feedback about isinstance(dict) guards by centralizing the validation in one place. --- superset/mcp_service/dashboard/schemas.py | 42 +++++++++++++---------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/superset/mcp_service/dashboard/schemas.py b/superset/mcp_service/dashboard/schemas.py index 1709ea5e91af..b8b26e79ae0b 100644 --- a/superset/mcp_service/dashboard/schemas.py +++ b/superset/mcp_service/dashboard/schemas.py @@ -523,21 +523,32 @@ class GenerateDashboardResponse(BaseModel): error: str | None = Field(None, description="Error message, if creation failed") -def _extract_native_filters(json_metadata_str: str | None) -> List[NativeFilterSummary]: - """Extract native filter summaries from raw json_metadata string. +def _parse_json_metadata(json_metadata_str: str | None) -> Dict[str, Any] | None: + """Parse json_metadata string into a dict, returning None on any failure. - Parses the json_metadata JSON blob and pulls out only the filter - name, type, and targets — dropping verbose fields like controlValues, - defaultDataMask, scope, and cascadeParentIds. + Handles None/empty input, invalid JSON, and non-dict JSON values + (e.g. ``"[]"``, ``"123"``) by returning None instead of raising. """ if not json_metadata_str: - return [] + return None try: metadata = json_loads(json_metadata_str) except (ValueError, TypeError): - return [] - + return None if not isinstance(metadata, dict): + return None + return metadata + + +def _extract_native_filters(json_metadata_str: str | None) -> List[NativeFilterSummary]: + """Extract native filter summaries from raw json_metadata string. + + Parses the json_metadata JSON blob and pulls out only the filter + name, type, and targets — dropping verbose fields like controlValues, + defaultDataMask, scope, and cascadeParentIds. + """ + metadata = _parse_json_metadata(json_metadata_str) + if metadata is None: return [] native_filters = metadata.get("native_filter_configuration", []) @@ -564,17 +575,12 @@ def _extract_native_filters(json_metadata_str: str | None) -> List[NativeFilterS def _extract_cross_filters_enabled(json_metadata_str: str | None) -> bool | None: """Extract the cross_filters_enabled flag from json_metadata.""" - if not json_metadata_str: - return None - try: - metadata = json_loads(json_metadata_str) - except (ValueError, TypeError): - return None - if not isinstance(metadata, dict): + metadata = _parse_json_metadata(json_metadata_str) + if metadata is None: return None - value = metadata.get("cross_filters_enabled") - if isinstance(value, bool): - return value + cross_filters = metadata.get("cross_filters_enabled") + if isinstance(cross_filters, bool): + return cross_filters return None From 41ef88db32ad44d0f747f3de73d7205a7875f007 Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Fri, 3 Apr 2026 21:40:36 -0400 Subject: [PATCH 4/6] fix(mcp): apply ruff 0.9.7 PT023/PT001 auto-fixes to test file Add parentheses to @pytest.mark.asyncio and @pytest.fixture decorators to match CI pre-commit ruff version. --- .../dashboard/tool/test_dashboard_tools.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/unit_tests/mcp_service/dashboard/tool/test_dashboard_tools.py b/tests/unit_tests/mcp_service/dashboard/tool/test_dashboard_tools.py index e16a20809ed3..21d301492e19 100644 --- a/tests/unit_tests/mcp_service/dashboard/tool/test_dashboard_tools.py +++ b/tests/unit_tests/mcp_service/dashboard/tool/test_dashboard_tools.py @@ -36,7 +36,7 @@ logger = logging.getLogger(__name__) -@pytest.fixture() +@pytest.fixture def mcp_server(): return mcp @@ -53,7 +53,7 @@ def mock_auth(): @patch("superset.daos.dashboard.DashboardDAO.list") -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_list_dashboards_basic(mock_list, mcp_server): dashboard = Mock() dashboard.id = 1 @@ -119,7 +119,7 @@ async def test_list_dashboards_basic(mock_list, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.list") -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_list_dashboards_with_filters(mock_list, mcp_server): dashboard = Mock() dashboard.id = 1 @@ -186,7 +186,7 @@ async def test_list_dashboards_with_filters(mock_list, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.list") -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_list_dashboards_with_string_filters(mock_list, mcp_server): mock_list.return_value = ([], 0) async with Client(mcp_server) as client: # noqa: F841 @@ -201,7 +201,7 @@ async def test_list_dashboards_with_string_filters(mock_list, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.list") -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_list_dashboards_api_error(mock_list, mcp_server): mock_list.side_effect = ToolError("API request failed") async with Client(mcp_server) as client: @@ -212,7 +212,7 @@ async def test_list_dashboards_api_error(mock_list, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.list") -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_list_dashboards_with_search(mock_list, mcp_server): dashboard = Mock() dashboard.id = 1 @@ -272,7 +272,7 @@ async def test_list_dashboards_with_search(mock_list, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.list") -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_list_dashboards_with_simple_filters(mock_list, mcp_server): mock_list.return_value = ([], 0) async with Client(mcp_server) as client: @@ -289,7 +289,7 @@ async def test_list_dashboards_with_simple_filters(mock_list, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.find_by_id") -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_get_dashboard_info_success(mock_info, mcp_server): dashboard = Mock() dashboard.id = 1 @@ -342,7 +342,7 @@ async def test_get_dashboard_info_success(mock_info, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.find_by_id") -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_get_dashboard_info_not_found(mock_info, mcp_server): mock_info.return_value = None # Not found returns None async with Client(mcp_server) as client: @@ -353,7 +353,7 @@ async def test_get_dashboard_info_not_found(mock_info, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.find_by_id") -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_get_dashboard_info_access_denied(mock_info, mcp_server): mock_info.return_value = None # Access denied returns None async with Client(mcp_server) as client: @@ -367,7 +367,7 @@ async def test_get_dashboard_info_access_denied(mock_info, mcp_server): @patch("superset.mcp_service.mcp_core.ModelGetInfoCore._find_object") -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_get_dashboard_info_by_uuid(mock_find_object, mcp_server): """Test getting dashboard info using UUID identifier.""" dashboard = Mock() @@ -406,7 +406,7 @@ async def test_get_dashboard_info_by_uuid(mock_find_object, mcp_server): @patch("superset.mcp_service.mcp_core.ModelGetInfoCore._find_object") -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_get_dashboard_info_by_slug(mock_find_object, mcp_server): """Test getting dashboard info using slug identifier.""" dashboard = Mock() @@ -444,7 +444,7 @@ async def test_get_dashboard_info_by_slug(mock_find_object, mcp_server): @patch("superset.daos.dashboard.DashboardDAO.list") -@pytest.mark.asyncio() +@pytest.mark.asyncio async def test_list_dashboards_custom_uuid_slug_columns(mock_list, mcp_server): """Test that custom column selection includes UUID and slug when explicitly requested.""" @@ -556,7 +556,7 @@ def test_explicit_select_columns(self): assert len(request.select_columns) == 4 @patch("superset.daos.dashboard.DashboardDAO.list") - @pytest.mark.asyncio() + @pytest.mark.asyncio async def test_default_columns_in_response(self, mock_list, mcp_server): """Test that minimal default columns appear in response metadata.""" dashboard = Mock() @@ -629,7 +629,7 @@ def test_dashboard_sortable_columns_definition(self): assert "uuid" not in SORTABLE_DASHBOARD_COLUMNS @patch("superset.daos.dashboard.DashboardDAO.list") - @pytest.mark.asyncio() + @pytest.mark.asyncio async def test_list_dashboards_with_valid_order_column(self, mock_list, mcp_server): """Test list_dashboards with valid order column.""" mock_list.return_value = ([], 0) From f62af4219db7650a4315dc48f004139e2b0b35c4 Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Thu, 9 Apr 2026 11:56:33 -0400 Subject: [PATCH 5/6] fix(mcp): add omitted_fields metadata to DashboardInfo response Address reviewer feedback: instead of silently dropping json_metadata and position_json, include an omitted_fields dict that tells the LLM agent what was stripped, approximate sizes, and why. Introduces a reusable OmittedFieldsBuilder utility in response_utils.py that any MCP tool serializer can use for consistent omission metadata. Pattern follows industry best practices (mcp-git-polite, Axiom, Blockscout) for explicit omission signaling over silent drops. --- superset/mcp_service/dashboard/schemas.py | 48 ++++++ superset/mcp_service/utils/response_utils.py | 152 ++++++++++++++++++ .../dashboard/test_dashboard_schemas.py | 61 +++++++ 3 files changed, 261 insertions(+) create mode 100644 superset/mcp_service/utils/response_utils.py diff --git a/superset/mcp_service/dashboard/schemas.py b/superset/mcp_service/dashboard/schemas.py index b8b26e79ae0b..82909ecc3807 100644 --- a/superset/mcp_service/dashboard/schemas.py +++ b/superset/mcp_service/dashboard/schemas.py @@ -375,6 +375,17 @@ class DashboardInfo(BaseModel): description="Whether cross-filtering between charts is enabled.", ) + # Omission metadata — tells the agent what was stripped and why + omitted_fields: Dict[str, str] = Field( + default_factory=dict, + description=( + "Fields omitted from this response to reduce size. Keys are field " + "names, values describe what was omitted and how to access the full " + "data. Useful filter information has been extracted into " + "native_filters and cross_filters_enabled above." + ), + ) + # Fields for permalink/filter state support permalink_key: str | None = Field( None, @@ -584,6 +595,38 @@ def _extract_cross_filters_enabled(json_metadata_str: str | None) -> bool | None return None +def _build_omitted_fields( + json_metadata_str: str | None, position_json_str: str | None +) -> Dict[str, str]: + """Build omission metadata describing which fields were stripped and why. + + Uses the shared OmittedFieldsBuilder utility so the pattern is consistent + across all MCP tool serializers. + """ + from superset.mcp_service.utils.response_utils import OmittedFieldsBuilder + + return ( + OmittedFieldsBuilder() + .add_raw_field( + "position_json", + raw_value=position_json_str, + reason=( + "Internal layout tree with component positions/hierarchy. " + "Not useful for analysis or LLM context." + ), + ) + .add_extracted_field( + "json_metadata", + raw_value=json_metadata_str, + reason=( + "native_filters and cross_filters_enabled extracted into " + "dedicated fields above." + ), + ) + .build() + ) + + def _serialize_chart_summary(chart: Any) -> DashboardChartSummary | None: """Serialize a chart to a lightweight summary for dashboard context.""" if not chart: @@ -638,6 +681,9 @@ def dashboard_serializer(dashboard: "Dashboard") -> DashboardInfo: chart_count=len(dashboard.slices) if dashboard.slices else 0, native_filters=_extract_native_filters(dashboard.json_metadata), cross_filters_enabled=_extract_cross_filters_enabled(dashboard.json_metadata), + omitted_fields=_build_omitted_fields( + dashboard.json_metadata, dashboard.position_json + ), owners=[ info for owner in dashboard.owners @@ -688,6 +734,7 @@ def serialize_dashboard_object(dashboard: Any) -> DashboardInfo: ) json_metadata_str = getattr(dashboard, "json_metadata", None) + position_json_str = getattr(dashboard, "position_json", None) return DashboardInfo( id=dashboard_id, @@ -711,6 +758,7 @@ def serialize_dashboard_object(dashboard: Any) -> DashboardInfo: certification_details=getattr(dashboard, "certification_details", None), native_filters=_extract_native_filters(json_metadata_str), cross_filters_enabled=_extract_cross_filters_enabled(json_metadata_str), + omitted_fields=_build_omitted_fields(json_metadata_str, position_json_str), is_managed_externally=getattr(dashboard, "is_managed_externally", None), external_url=getattr(dashboard, "external_url", None), uuid=str(getattr(dashboard, "uuid", "")) diff --git a/superset/mcp_service/utils/response_utils.py b/superset/mcp_service/utils/response_utils.py new file mode 100644 index 000000000000..777c619cfb2b --- /dev/null +++ b/superset/mcp_service/utils/response_utils.py @@ -0,0 +1,152 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Utilities for building MCP tool responses with explicit omission metadata. + +When MCP tool responses strip large fields to reduce context window usage, +the LLM agent should be told *what* was omitted and *why* — otherwise it +cannot distinguish "field is empty" from "field was stripped for size". + +This module provides a reusable builder for omission metadata that any +MCP tool serializer can use. + +Industry context (as of 2026): +- The MCP spec has no standard for field omission signaling. +- Silent omission is considered an anti-pattern (Grafana MCP #557). +- Production servers (mcp-git-polite, Blockscout, Axiom) converge on + explicit omission indicators with size hints and retrieval guidance. +- Anthropic's "Writing Tools for Agents" blog recommends surfacing + what was stripped so agents can decide whether to fetch full data. + +Usage example:: + + from superset.mcp_service.utils.response_utils import OmittedFieldsBuilder + + omitted = ( + OmittedFieldsBuilder() + .add_raw_field( + "position_json", + raw_value=dashboard.position_json, + reason="Internal layout tree — not useful for analysis.", + ) + .add_extracted_field( + "json_metadata", + raw_value=dashboard.json_metadata, + reason="native_filters and cross_filters_enabled extracted above.", + ) + .build() + ) + # Returns: {"position_json": "Omitted (~42 KB) — ...", ...} +""" + +from __future__ import annotations + +from typing import Dict + + +def _byte_size_label(value: str | None) -> str: + """Return a human-readable size label for a string value.""" + if not value or not isinstance(value, str): + return "empty" + size_bytes = len(value.encode("utf-8", errors="replace")) + if size_bytes < 1024: + return f"{size_bytes} B" + return f"{size_bytes / 1024:.0f} KB" + + +class OmittedFieldsBuilder: + """Builder for constructing omission metadata dicts. + + Produces a ``Dict[str, str]`` mapping field names to human-readable + descriptions of what was omitted, including approximate sizes. + + Two field types are supported: + + - **Raw fields** (``add_raw_field``): The field was stripped entirely + with no replacement. The agent has no way to access this data + unless a companion tool exists. + + - **Extracted fields** (``add_extracted_field``): The raw blob was + stripped, but useful subsets were extracted into structured fields + on the same response object (e.g. ``native_filters`` extracted + from ``json_metadata``). + + All methods return ``self`` for fluent chaining. + """ + + def __init__(self) -> None: + self._fields: Dict[str, str] = {} + + def add_raw_field( + self, + field_name: str, + raw_value: str | None, + reason: str, + ) -> "OmittedFieldsBuilder": + """Record a field that was stripped with no replacement. + + Parameters + ---------- + field_name: + The original field name (e.g. ``"position_json"``). + raw_value: + The raw value that was omitted (used only to compute size). + Pass ``None`` if the field was empty/unset. + reason: + Why the field was omitted, written for an LLM audience. + """ + size = _byte_size_label(raw_value) + has_data = isinstance(raw_value, str) and len(raw_value) > 0 + if has_data: + self._fields[field_name] = f"Omitted (~{size}) — {reason}" + else: + self._fields[field_name] = f"Omitted ({size}) — {reason}" + return self + + def add_extracted_field( + self, + field_name: str, + raw_value: str | None, + reason: str, + ) -> "OmittedFieldsBuilder": + """Record a field whose useful parts were extracted into other fields. + + Parameters + ---------- + field_name: + The original raw field name (e.g. ``"json_metadata"``). + raw_value: + The raw value that was omitted (used only to compute size). + reason: + Explanation of what was extracted and where, for LLM context. + """ + size = _byte_size_label(raw_value) + has_data = isinstance(raw_value, str) and len(raw_value) > 0 + if has_data: + self._fields[field_name] = ( + f"Omitted (~{size}), useful parts extracted — {reason}" + ) + else: + self._fields[field_name] = ( + f"Omitted ({size}), useful parts extracted — {reason}" + ) + return self + + def build(self) -> Dict[str, str]: + """Return the omission metadata dict.""" + return dict(self._fields) diff --git a/tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py b/tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py index d7cd38f48357..98fe464a24fe 100644 --- a/tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py +++ b/tests/unit_tests/mcp_service/dashboard/test_dashboard_schemas.py @@ -58,6 +58,7 @@ def _mock_dashboard( dashboard.certified_by = None dashboard.certification_details = None dashboard.json_metadata = None + dashboard.position_json = None dashboard.is_managed_externally = False dashboard.external_url = None dashboard.uuid = None @@ -278,3 +279,63 @@ def test_non_dict_top_level_json(self): assert _extract_cross_filters_enabled("[]") is None assert _extract_cross_filters_enabled("123") is None assert _extract_cross_filters_enabled('"just a string"') is None + + +class TestOmittedFieldsBuilder: + """Tests for the shared OmittedFieldsBuilder utility.""" + + def test_builder_basic(self): + from superset.mcp_service.utils.response_utils import OmittedFieldsBuilder + + result = ( + OmittedFieldsBuilder() + .add_raw_field("big_field", "x" * 2048, "Too large for context.") + .add_extracted_field("meta_field", "y" * 512, "Useful parts above.") + .build() + ) + assert "big_field" in result + assert "~2 KB" in result["big_field"] + assert "Too large" in result["big_field"] + assert "meta_field" in result + assert "extracted" in result["meta_field"] + + def test_builder_none_values(self): + from superset.mcp_service.utils.response_utils import OmittedFieldsBuilder + + result = ( + OmittedFieldsBuilder() + .add_raw_field("empty_field", None, "Was not set.") + .add_extracted_field("also_empty", None, "Nothing to extract.") + .build() + ) + assert "empty" in result["empty_field"] + assert "empty" in result["also_empty"] + + @patch("superset.mcp_service.utils.url_utils.get_superset_base_url") + def test_omitted_fields_in_serialized_dashboard(self, mock_base_url): + """omitted_fields should describe what was stripped and include sizes.""" + mock_base_url.return_value = "http://localhost:8088" + + dashboard = _mock_dashboard(id=1) + dashboard.json_metadata = json_dumps( + {"color_scheme": "preset", "native_filter_configuration": []} + ) + dashboard.position_json = json_dumps({"ROOT_ID": {"children": ["GRID_ID"]}}) + + result = serialize_dashboard_object(dashboard) + + assert "json_metadata" in result.omitted_fields + assert "position_json" in result.omitted_fields + assert "extracted" in result.omitted_fields["json_metadata"] + assert "layout tree" in result.omitted_fields["position_json"].lower() + + @patch("superset.mcp_service.utils.url_utils.get_superset_base_url") + def test_omitted_fields_with_none_values(self, mock_base_url): + """omitted_fields should still be present when raw fields are None.""" + mock_base_url.return_value = "http://localhost:8088" + + dashboard = _mock_dashboard(id=1) + result = serialize_dashboard_object(dashboard) + + assert "json_metadata" in result.omitted_fields + assert "position_json" in result.omitted_fields From 2b51055bb2ed9d6d66feeafe79ad52ac0682178f Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Thu, 9 Apr 2026 14:20:51 -0400 Subject: [PATCH 6/6] =?UTF-8?q?fix(mcp):=20address=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20targets=20validation,=20naming,=20lazy-load=20guard?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Filter non-dict entries from native filter targets to prevent Pydantic ValidationError on corrupted json_metadata - Rename _serialize_chart_summary to serialize_chart_summary (public API since it's imported cross-module) - Use `if chart_id is not None:` instead of `if chart_id:` to handle falsy-but-valid IDs - Use getattr() for json_metadata/position_json in dashboard_serializer to avoid triggering SQLAlchemy lazy-loads on deferred columns --- superset/mcp_service/dashboard/schemas.py | 22 ++++++++++++------- .../tool/add_chart_to_existing_dashboard.py | 4 ++-- .../dashboard/tool/generate_dashboard.py | 4 ++-- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/superset/mcp_service/dashboard/schemas.py b/superset/mcp_service/dashboard/schemas.py index 82909ecc3807..e61573dd7ad1 100644 --- a/superset/mcp_service/dashboard/schemas.py +++ b/superset/mcp_service/dashboard/schemas.py @@ -573,12 +573,13 @@ def _extract_native_filters(json_metadata_str: str | None) -> List[NativeFilterS raw_targets = f.get("targets", []) if not isinstance(raw_targets, list): raw_targets = [] + targets = [t for t in raw_targets if isinstance(t, dict)] summaries.append( NativeFilterSummary( id=f.get("id"), name=f.get("name"), filter_type=f.get("filterType"), - targets=raw_targets, + targets=targets, ) ) return summaries @@ -627,7 +628,7 @@ def _build_omitted_fields( ) -def _serialize_chart_summary(chart: Any) -> DashboardChartSummary | None: +def serialize_chart_summary(chart: Any) -> DashboardChartSummary | None: """Serialize a chart to a lightweight summary for dashboard context.""" if not chart: return None @@ -635,7 +636,7 @@ def _serialize_chart_summary(chart: Any) -> DashboardChartSummary | None: chart_id = getattr(chart, "id", None) chart_url = None - if chart_id: + if chart_id is not None: chart_url = f"{get_superset_base_url()}/explore/?slice_id={chart_id}" return DashboardChartSummary( @@ -679,10 +680,15 @@ def dashboard_serializer(dashboard: "Dashboard") -> DashboardInfo: created_on_humanized=dashboard.created_on_humanized, changed_on_humanized=dashboard.changed_on_humanized, chart_count=len(dashboard.slices) if dashboard.slices else 0, - native_filters=_extract_native_filters(dashboard.json_metadata), - cross_filters_enabled=_extract_cross_filters_enabled(dashboard.json_metadata), + native_filters=_extract_native_filters( + getattr(dashboard, "json_metadata", None) + ), + cross_filters_enabled=_extract_cross_filters_enabled( + getattr(dashboard, "json_metadata", None) + ), omitted_fields=_build_omitted_fields( - dashboard.json_metadata, dashboard.position_json + getattr(dashboard, "json_metadata", None), + getattr(dashboard, "position_json", None), ), owners=[ info @@ -705,7 +711,7 @@ def dashboard_serializer(dashboard: "Dashboard") -> DashboardInfo: charts=[ summary for chart in dashboard.slices - if (summary := _serialize_chart_summary(chart)) is not None + if (summary := serialize_chart_summary(chart)) is not None ] if dashboard.slices else [], @@ -787,7 +793,7 @@ def serialize_dashboard_object(dashboard: Any) -> DashboardInfo: charts=[ summary for chart in getattr(dashboard, "slices", []) - if (summary := _serialize_chart_summary(chart)) is not None + if (summary := serialize_chart_summary(chart)) is not None ] if getattr(dashboard, "slices", None) else [], diff --git a/superset/mcp_service/dashboard/tool/add_chart_to_existing_dashboard.py b/superset/mcp_service/dashboard/tool/add_chart_to_existing_dashboard.py index 73d90136d839..d3d9293ce5b3 100644 --- a/superset/mcp_service/dashboard/tool/add_chart_to_existing_dashboard.py +++ b/superset/mcp_service/dashboard/tool/add_chart_to_existing_dashboard.py @@ -37,10 +37,10 @@ GRID_DEFAULT_CHART_WIDTH, ) from superset.mcp_service.dashboard.schemas import ( - _serialize_chart_summary, AddChartToDashboardRequest, AddChartToDashboardResponse, DashboardInfo, + serialize_chart_summary, ) from superset.mcp_service.utils.url_utils import get_superset_base_url from superset.utils import json @@ -525,7 +525,7 @@ def add_chart_to_existing_dashboard( charts=[ obj for chart in getattr(updated_dashboard, "slices", []) - if (obj := _serialize_chart_summary(chart)) is not None + if (obj := serialize_chart_summary(chart)) is not None ], ) diff --git a/superset/mcp_service/dashboard/tool/generate_dashboard.py b/superset/mcp_service/dashboard/tool/generate_dashboard.py index 38dbdbabd2ed..1cec383f6435 100644 --- a/superset/mcp_service/dashboard/tool/generate_dashboard.py +++ b/superset/mcp_service/dashboard/tool/generate_dashboard.py @@ -394,7 +394,7 @@ def generate_dashboard( # noqa: C901 # Convert to our response format from superset.mcp_service.dashboard.schemas import ( - _serialize_chart_summary, + serialize_chart_summary, serialize_tag_object, serialize_user_object, ) @@ -426,7 +426,7 @@ def generate_dashboard( # noqa: C901 charts=[ obj for chart in getattr(dashboard, "slices", []) - if (obj := _serialize_chart_summary(chart)) is not None + if (obj := serialize_chart_summary(chart)) is not None ], )