From bbc6abf5cc10af13d09b886f7291423b11ae8212 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Sat, 4 Apr 2026 18:47:18 +0000 Subject: [PATCH 1/7] feat(mcp): add database connection listing and info tools Add MCP tools for listing and getting database connection details, filling a gap where databases were the only core Superset resource without MCP tool coverage. New tools: - list_databases: List database connections with filtering/pagination - get_database_info: Get detailed database connection metadata by ID Also registers "database" as a model type in get_schema for column/filter discovery. Co-Authored-By: Claude Opus 4.6 --- superset/mcp_service/app.py | 8 + .../mcp_service/common/schema_discovery.py | 75 +++- superset/mcp_service/database/__init__.py | 16 + superset/mcp_service/database/schemas.py | 324 ++++++++++++++++++ .../mcp_service/database/tool/__init__.py | 24 ++ .../database/tool/get_database_info.py | 130 +++++++ .../database/tool/list_databases.py | 174 ++++++++++ superset/mcp_service/mcp_core.py | 2 +- .../mcp_service/system/tool/get_schema.py | 28 +- .../mcp_service/database/__init__.py | 16 + .../mcp_service/database/tool/__init__.py | 16 + .../database/tool/test_database_tools.py | 207 +++++++++++ 12 files changed, 1015 insertions(+), 5 deletions(-) create mode 100644 superset/mcp_service/database/__init__.py create mode 100644 superset/mcp_service/database/schemas.py create mode 100644 superset/mcp_service/database/tool/__init__.py create mode 100644 superset/mcp_service/database/tool/get_database_info.py create mode 100644 superset/mcp_service/database/tool/list_databases.py create mode 100644 tests/unit_tests/mcp_service/database/__init__.py create mode 100644 tests/unit_tests/mcp_service/database/tool/__init__.py create mode 100644 tests/unit_tests/mcp_service/database/tool/test_database_tools.py diff --git a/superset/mcp_service/app.py b/superset/mcp_service/app.py index 086bc7f668be..4adca208b377 100644 --- a/superset/mcp_service/app.py +++ b/superset/mcp_service/app.py @@ -54,6 +54,10 @@ def get_default_instructions(branding: str = "Apache Superset") -> str: - generate_dashboard: Create a dashboard from chart IDs - add_chart_to_existing_dashboard: Add a chart to an existing dashboard +Database Connections: +- list_databases: List database connections with advanced filters (1-based pagination) +- get_database_info: Get detailed database connection info by ID (backend, permissions) + Dataset Management: - list_datasets: List datasets with advanced filters (1-based pagination) - get_dataset_info: Get detailed dataset information by ID (includes columns/metrics) @@ -432,6 +436,10 @@ def create_mcp_app( get_dashboard_info, list_dashboards, ) +from superset.mcp_service.database.tool import ( # noqa: F401, E402 + get_database_info, + list_databases, +) from superset.mcp_service.dataset.tool import ( # noqa: F401, E402 get_dataset_info, list_datasets, diff --git a/superset/mcp_service/common/schema_discovery.py b/superset/mcp_service/common/schema_discovery.py index dda1a0d5412c..515f60a9bb4b 100644 --- a/superset/mcp_service/common/schema_discovery.py +++ b/superset/mcp_service/common/schema_discovery.py @@ -52,7 +52,7 @@ class ModelSchemaInfo(BaseModel): - Default values for each """ - model_type: Literal["chart", "dataset", "dashboard"] = Field( + model_type: Literal["chart", "dataset", "dashboard", "database"] = Field( ..., description="The model type this schema describes" ) select_columns: list[ColumnMetadata] = Field( @@ -82,7 +82,7 @@ class ModelSchemaInfo(BaseModel): class GetSchemaRequest(BaseModel): """Request schema for unified get_schema tool.""" - model_type: Literal["chart", "dataset", "dashboard"] = Field( + model_type: Literal["chart", "dataset", "dashboard", "database"] = Field( ..., description="Model type to get schema for" ) @@ -452,6 +452,67 @@ def get_columns_from_model( } +# Database configuration +DATABASE_DEFAULT_COLUMNS = [ + "id", + "database_name", + "backend", + "expose_in_sqllab", + "changed_on_humanized", +] +DATABASE_SORTABLE_COLUMNS = [ + "id", + "database_name", + "changed_on", + "created_on", +] +DATABASE_SEARCH_COLUMNS = ["database_name"] +DATABASE_EXTRA_COLUMNS: dict[str, ColumnMetadata] = { + "backend": ColumnMetadata( + name="backend", + description="Database backend type (e.g., postgresql, mysql)", + type="str", + is_default=True, + ), + "changed_by": ColumnMetadata( + name="changed_by", + description="Last modifier username", + type="str", + is_default=False, + ), + "changed_by_name": ColumnMetadata( + name="changed_by_name", + description="Last modifier display name", + type="str", + is_default=False, + ), + "changed_on_humanized": ColumnMetadata( + name="changed_on_humanized", + description="Humanized modification time", + type="str", + is_default=True, + ), + "created_by": ColumnMetadata( + name="created_by", + description="Creator username", + type="str", + is_default=False, + ), + "created_by_name": ColumnMetadata( + name="created_by_name", + description="Creator display name", + type="str", + is_default=False, + ), + "created_on_humanized": ColumnMetadata( + name="created_on_humanized", + description="Humanized creation time", + type="str", + is_default=False, + ), +} + + def get_chart_columns() -> list[ColumnMetadata]: """Get column metadata for Chart model dynamically.""" from superset.models.slice import Slice @@ -477,6 +538,15 @@ def get_dashboard_columns() -> list[ColumnMetadata]: ) +def get_database_columns() -> list[ColumnMetadata]: + """Get column metadata for Database model dynamically.""" + from superset.models.core import Database + + return get_columns_from_model( + Database, DATABASE_DEFAULT_COLUMNS, DATABASE_EXTRA_COLUMNS + ) + + def get_all_column_names(columns: list[ColumnMetadata]) -> list[str]: """Extract all column names from column metadata list.""" return [col.name for col in columns] @@ -487,3 +557,4 @@ def get_all_column_names(columns: list[ColumnMetadata]) -> list[str]: CHART_ALL_COLUMNS: list[str] = [] DATASET_ALL_COLUMNS: list[str] = [] DASHBOARD_ALL_COLUMNS: list[str] = [] +DATABASE_ALL_COLUMNS: list[str] = [] diff --git a/superset/mcp_service/database/__init__.py b/superset/mcp_service/database/__init__.py new file mode 100644 index 000000000000..13a83393a912 --- /dev/null +++ b/superset/mcp_service/database/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/superset/mcp_service/database/schemas.py b/superset/mcp_service/database/schemas.py new file mode 100644 index 000000000000..a15fe1ff3394 --- /dev/null +++ b/superset/mcp_service/database/schemas.py @@ -0,0 +1,324 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Pydantic schemas for database-related responses +""" + +from __future__ import annotations + +from datetime import datetime +from typing import Annotated, Any, Dict, List, Literal + +import humanize +from pydantic import ( + BaseModel, + ConfigDict, + Field, + model_serializer, + model_validator, + PositiveInt, +) + +from superset.daos.base import ColumnOperator, ColumnOperatorEnum +from superset.mcp_service.common.cache_schemas import MetadataCacheControl +from superset.mcp_service.constants import DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE +from superset.mcp_service.system.schemas import PaginationInfo +from superset.utils import json + + +class DatabaseFilter(ColumnOperator): + """ + Filter object for database listing. + col: The column to filter on. Must be one of the allowed filter fields. + opr: The operator to use. Must be one of the supported operators. + value: The value to filter by (type depends on col and opr). + """ + + col: Literal[ + "database_name", + "backend", + "expose_in_sqllab", + "allow_file_upload", + ] = Field( + ..., + description="Column to filter on. Use get_schema(model_type='database') for " + "available filter columns.", + ) + opr: ColumnOperatorEnum = Field( + ..., + description="Operator to use. Use get_schema(model_type='database') for " + "available operators.", + ) + value: str | int | float | bool | List[str | int | float | bool] = Field( + ..., description="Value to filter by (type depends on col and opr)" + ) + + +class DatabaseInfo(BaseModel): + id: int | None = Field(None, description="Database ID") + database_name: str | None = Field(None, description="Database connection name") + backend: str | None = Field(None, description="Database backend (e.g., postgresql)") + expose_in_sqllab: bool | None = Field( + None, description="Whether exposed in SQL Lab" + ) + allow_ctas: bool | None = Field( + None, description="Whether CREATE TABLE AS is allowed" + ) + allow_cvas: bool | None = Field( + None, description="Whether CREATE VIEW AS is allowed" + ) + allow_dml: bool | None = Field( + None, description="Whether DML statements are allowed" + ) + allow_file_upload: bool | None = Field( + None, description="Whether file upload is allowed" + ) + allow_run_async: bool | None = Field( + None, description="Whether async query execution is allowed" + ) + cache_timeout: int | None = Field( + None, description="Cache timeout override in seconds" + ) + configuration_method: str | None = Field( + None, description="Configuration method (sqlalchemy_form or dynamic_form)" + ) + force_ctas_schema: str | None = Field( + None, description="Schema to force for CTAS queries" + ) + impersonate_user: bool | None = Field( + None, description="Whether to impersonate the logged-in user" + ) + is_managed_externally: bool | None = Field( + None, description="Whether managed by an external system" + ) + external_url: str | None = Field( + None, description="URL of the external management system" + ) + extra: Dict[str, Any | None] | None = Field(None, description="Extra configuration") + changed_by: str | None = Field(None, description="Last modifier (username)") + changed_on: str | datetime | None = Field( + None, description="Last modification timestamp" + ) + changed_on_humanized: str | None = Field( + None, description="Humanized modification time" + ) + created_by: str | None = Field(None, description="Database creator (username)") + created_on: str | datetime | None = Field(None, description="Creation timestamp") + created_on_humanized: str | None = Field( + None, description="Humanized creation time" + ) + model_config = ConfigDict( + from_attributes=True, + ser_json_timedelta="iso8601", + populate_by_name=True, + ) + + @model_serializer(mode="wrap", when_used="json") + def _filter_fields_by_context(self, serializer: Any, info: Any) -> Dict[str, Any]: + """Filter fields based on serialization context. + + If context contains 'select_columns', only include those fields. + Otherwise, include all fields (default behavior). + """ + data = serializer(self) + + if info.context and isinstance(info.context, dict): + select_columns = info.context.get("select_columns") + if select_columns: + requested_fields = set(select_columns) + return {k: v for k, v in data.items() if k in requested_fields} + + return data + + +class DatabaseList(BaseModel): + databases: List[DatabaseInfo] + count: int + total_count: int + page: int + page_size: int + total_pages: int + has_previous: bool + has_next: bool + columns_requested: List[str] = Field( + default_factory=list, + description="Requested columns for the response", + ) + columns_loaded: List[str] = Field( + default_factory=list, + description="Columns that were actually loaded for each database", + ) + columns_available: List[str] = Field( + default_factory=list, + description="All columns available for selection via select_columns parameter", + ) + sortable_columns: List[str] = Field( + default_factory=list, + description="Columns that can be used with order_column parameter", + ) + filters_applied: List[DatabaseFilter] = Field( + default_factory=list, + description="List of advanced filter dicts applied to the query.", + ) + pagination: PaginationInfo | None = None + timestamp: datetime | None = None + model_config = ConfigDict(ser_json_timedelta="iso8601") + + +class ListDatabasesRequest(MetadataCacheControl): + """Request schema for list_databases with clear, unambiguous types.""" + + filters: Annotated[ + List[DatabaseFilter], + Field( + default_factory=list, + description="List of filter objects (column, operator, value). Each " + "filter is an object with 'col', 'opr', and 'value' " + "properties. Cannot be used together with 'search'.", + ), + ] + select_columns: Annotated[ + List[str], + Field( + default_factory=list, + description="List of columns to select. Defaults to common columns if not " + "specified.", + ), + ] + search: Annotated[ + str | None, + Field( + default=None, + description="Text search string to match against database fields. Cannot " + "be used together with 'filters'.", + ), + ] + order_column: Annotated[ + str | None, Field(default=None, description="Column to order results by") + ] + order_direction: Annotated[ + Literal["asc", "desc"], + Field( + default="desc", description="Direction to order results ('asc' or 'desc')" + ), + ] + page: Annotated[ + PositiveInt, + Field(default=1, description="Page number for pagination (1-based)"), + ] + page_size: Annotated[ + int, + Field( + default=DEFAULT_PAGE_SIZE, + gt=0, + le=MAX_PAGE_SIZE, + description=f"Number of items per page (max {MAX_PAGE_SIZE})", + ), + ] + + @model_validator(mode="after") + def validate_search_and_filters(self) -> "ListDatabasesRequest": + """Prevent using both search and filters simultaneously to avoid query + conflicts.""" + if self.search and self.filters: + raise ValueError( + "Cannot use both 'search' and 'filters' parameters simultaneously. " + "Use either 'search' for text-based searching across multiple fields, " + "or 'filters' for precise column-based filtering, but not both." + ) + return self + + +class DatabaseError(BaseModel): + error: str = Field(..., description="Error message") + error_type: str = Field(..., description="Type of error") + timestamp: str | datetime | None = Field(None, description="Error timestamp") + model_config = ConfigDict(ser_json_timedelta="iso8601") + + @classmethod + def create(cls, error: str, error_type: str) -> "DatabaseError": + """Create a standardized DatabaseError with timestamp.""" + from datetime import datetime + + return cls(error=error, error_type=error_type, timestamp=datetime.now()) + + +class GetDatabaseInfoRequest(MetadataCacheControl): + """Request schema for get_database_info with support for ID.""" + + identifier: Annotated[ + int, + Field(description="Database identifier (numeric ID)"), + ] + + +def _parse_json_field(obj: Any, field_name: str) -> Dict[str, Any] | None: + """Parse a field that may be stored as a JSON string into a dict.""" + value = getattr(obj, field_name, None) + if isinstance(value, str): + try: + parsed = json.loads(value) + if isinstance(parsed, dict): + return parsed + except (ValueError, TypeError): + pass + return None + return value + + +def _humanize_timestamp(dt: datetime | None) -> str | None: + """Convert a datetime to a humanized string like '2 hours ago'.""" + if dt is None: + return None + return humanize.naturaltime(datetime.now() - dt) + + +def serialize_database_object(database: Any) -> DatabaseInfo | None: + if not database: + return None + + return DatabaseInfo( + id=getattr(database, "id", None), + database_name=getattr(database, "database_name", None), + backend=getattr(database, "backend", None), + expose_in_sqllab=getattr(database, "expose_in_sqllab", None), + allow_ctas=getattr(database, "allow_ctas", None), + allow_cvas=getattr(database, "allow_cvas", None), + allow_dml=getattr(database, "allow_dml", None), + allow_file_upload=getattr(database, "allow_file_upload", None), + allow_run_async=getattr(database, "allow_run_async", None), + cache_timeout=getattr(database, "cache_timeout", None), + configuration_method=getattr(database, "configuration_method", None), + force_ctas_schema=getattr(database, "force_ctas_schema", None), + impersonate_user=getattr(database, "impersonate_user", None), + is_managed_externally=getattr(database, "is_managed_externally", None), + external_url=getattr(database, "external_url", None), + extra=_parse_json_field(database, "extra"), + changed_by=getattr(database, "changed_by_name", None) + or ( + str(database.changed_by) if getattr(database, "changed_by", None) else None + ), + changed_on=getattr(database, "changed_on", None), + changed_on_humanized=_humanize_timestamp(getattr(database, "changed_on", None)), + created_by=getattr(database, "created_by_name", None) + or ( + str(database.created_by) if getattr(database, "created_by", None) else None + ), + created_on=getattr(database, "created_on", None), + created_on_humanized=_humanize_timestamp(getattr(database, "created_on", None)), + ) diff --git a/superset/mcp_service/database/tool/__init__.py b/superset/mcp_service/database/tool/__init__.py new file mode 100644 index 000000000000..bb87106908e8 --- /dev/null +++ b/superset/mcp_service/database/tool/__init__.py @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .get_database_info import get_database_info +from .list_databases import list_databases + +__all__ = [ + "list_databases", + "get_database_info", +] diff --git a/superset/mcp_service/database/tool/get_database_info.py b/superset/mcp_service/database/tool/get_database_info.py new file mode 100644 index 000000000000..22a75aaa0c2d --- /dev/null +++ b/superset/mcp_service/database/tool/get_database_info.py @@ -0,0 +1,130 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Get database info FastMCP tool + +This module contains the FastMCP tool for getting detailed information +about a specific database connection. +""" + +import logging +from datetime import datetime, timezone + +from fastmcp import Context +from superset_core.mcp.decorators import tool, ToolAnnotations + +from superset.extensions import event_logger +from superset.mcp_service.database.schemas import ( + DatabaseError, + DatabaseInfo, + GetDatabaseInfoRequest, + serialize_database_object, +) +from superset.mcp_service.mcp_core import ModelGetInfoCore + +logger = logging.getLogger(__name__) + + +@tool( + tags=["discovery"], + class_permission_name="Database", + annotations=ToolAnnotations( + title="Get database info", + readOnlyHint=True, + destructiveHint=False, + ), +) +async def get_database_info( + request: GetDatabaseInfoRequest, ctx: Context +) -> DatabaseInfo | DatabaseError: + """Get database connection metadata by ID. + + Returns database configuration including backend type, permissions, + and capabilities. + + IMPORTANT FOR LLM CLIENTS: + - Use numeric ID (e.g., 123) + - To find a database ID, use the list_databases tool first + + Example usage: + ```json + { + "identifier": 1 + } + ``` + """ + await ctx.info( + "Retrieving database information: identifier=%s" % (request.identifier,) + ) + await ctx.debug( + "Metadata cache settings: use_cache=%s refresh_metadata=%s force_refresh=%s" + % ( + request.use_cache, + request.refresh_metadata, + request.force_refresh, + ) + ) + + try: + from superset.daos.database import DatabaseDAO + + with event_logger.log_context(action="mcp.get_database_info.lookup"): + get_tool = ModelGetInfoCore( + dao_class=DatabaseDAO, + output_schema=DatabaseInfo, + error_schema=DatabaseError, + serializer=serialize_database_object, + supports_slug=False, + logger=logger, + ) + + result = get_tool.run_tool(request.identifier) + + if isinstance(result, DatabaseInfo): + await ctx.info( + "Database information retrieved successfully: " + "database_id=%s, database_name=%s, backend=%s" + % ( + result.id, + result.database_name, + result.backend, + ) + ) + else: + await ctx.warning( + "Database retrieval failed: error_type=%s, error=%s" + % (result.error_type, result.error) + ) + + return result + + except Exception as e: + await ctx.error( + "Database information retrieval failed: identifier=%s, error=%s, " + "error_type=%s" + % ( + request.identifier, + str(e), + type(e).__name__, + ) + ) + return DatabaseError( + error=f"Failed to get database info: {str(e)}", + error_type="InternalError", + timestamp=datetime.now(timezone.utc), + ) diff --git a/superset/mcp_service/database/tool/list_databases.py b/superset/mcp_service/database/tool/list_databases.py new file mode 100644 index 000000000000..760b038233fe --- /dev/null +++ b/superset/mcp_service/database/tool/list_databases.py @@ -0,0 +1,174 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +List databases FastMCP tool (Advanced with metadata cache control) + +This module contains the FastMCP tool for listing databases using +advanced filtering with clear, unambiguous request schema and metadata cache control. +""" + +import logging +from typing import TYPE_CHECKING + +from fastmcp import Context +from superset_core.mcp.decorators import tool, ToolAnnotations + +if TYPE_CHECKING: + from superset.models.core import Database + +from superset.extensions import event_logger +from superset.mcp_service.database.schemas import ( + DatabaseFilter, + DatabaseInfo, + DatabaseList, + ListDatabasesRequest, + serialize_database_object, +) +from superset.mcp_service.mcp_core import ModelListCore + +logger = logging.getLogger(__name__) + +# Minimal defaults for reduced token usage - users can request more via select_columns +DEFAULT_DATABASE_COLUMNS = [ + "id", + "database_name", + "backend", + "expose_in_sqllab", + "changed_on_humanized", +] + + +@tool( + tags=["core"], + class_permission_name="Database", + annotations=ToolAnnotations( + title="List databases", + readOnlyHint=True, + destructiveHint=False, + ), +) +async def list_databases(request: ListDatabasesRequest, ctx: Context) -> DatabaseList: + """List database connections with filtering and search. + + Returns database metadata including name, backend type, and permissions. + + Sortable columns for order_column: id, database_name, changed_on, + created_on + """ + await ctx.info( + "Listing databases: page=%s, page_size=%s, search=%s" + % ( + request.page, + request.page_size, + request.search, + ) + ) + await ctx.debug( + "Database listing parameters: filters=%s, order_column=%s, " + "order_direction=%s, select_columns=%s" + % ( + request.filters, + request.order_column, + request.order_direction, + request.select_columns, + ) + ) + await ctx.debug( + "Metadata cache settings: use_cache=%s, refresh_metadata=%s, force_refresh=%s" + % ( + request.use_cache, + request.refresh_metadata, + request.force_refresh, + ) + ) + + try: + from superset.daos.database import DatabaseDAO + from superset.mcp_service.common.schema_discovery import ( + DATABASE_SORTABLE_COLUMNS, + get_all_column_names, + get_database_columns, + ) + + # Get all column names dynamically from the model + all_columns = get_all_column_names(get_database_columns()) + + def _serialize_database( + obj: "Database | None", cols: list[str] | None + ) -> DatabaseInfo | None: + """Serialize database (filtering via model_serializer).""" + return serialize_database_object(obj) + + # Create tool with standard serialization + list_tool = ModelListCore( + dao_class=DatabaseDAO, + output_schema=DatabaseInfo, + item_serializer=_serialize_database, + filter_type=DatabaseFilter, + default_columns=DEFAULT_DATABASE_COLUMNS, + search_columns=["database_name"], + list_field_name="databases", + output_list_schema=DatabaseList, + all_columns=all_columns, + sortable_columns=DATABASE_SORTABLE_COLUMNS, + logger=logger, + ) + + with event_logger.log_context(action="mcp.list_databases.query"): + result = list_tool.run_tool( + filters=request.filters, + search=request.search, + select_columns=request.select_columns, + order_column=request.order_column, + order_direction=request.order_direction, + page=max(request.page - 1, 0), + page_size=request.page_size, + ) + + await ctx.info( + "Databases listed successfully: count=%s, total_count=%s, total_pages=%s" + % ( + len(result.databases) if hasattr(result, "databases") else 0, + getattr(result, "total_count", None), + getattr(result, "total_pages", None), + ) + ) + + # Apply field filtering via serialization context + columns_to_filter = result.columns_requested + await ctx.debug( + "Applying field filtering via serialization context: columns=%s" + % (columns_to_filter,) + ) + with event_logger.log_context(action="mcp.list_databases.serialization"): + return result.model_dump( + mode="json", + context={"select_columns": columns_to_filter}, + ) + + except Exception as e: + await ctx.error( + "Database listing failed: page=%s, page_size=%s, error=%s, error_type=%s" + % ( + request.page, + request.page_size, + str(e), + type(e).__name__, + ) + ) + raise diff --git a/superset/mcp_service/mcp_core.py b/superset/mcp_service/mcp_core.py index 051aa20809a7..447de443ea88 100644 --- a/superset/mcp_service/mcp_core.py +++ b/superset/mcp_service/mcp_core.py @@ -521,7 +521,7 @@ class ModelGetSchemaCore(BaseCore, Generic[S]): def __init__( self, - model_type: Literal["chart", "dataset", "dashboard"], + model_type: Literal["chart", "dataset", "dashboard", "database"], dao_class: Type[BaseDAO[Any]], output_schema: Type[S], select_columns: List[Any], diff --git a/superset/mcp_service/system/tool/get_schema.py b/superset/mcp_service/system/tool/get_schema.py index 57b8909c3014..573880d6c998 100644 --- a/superset/mcp_service/system/tool/get_schema.py +++ b/superset/mcp_service/system/tool/get_schema.py @@ -37,11 +37,15 @@ DASHBOARD_DEFAULT_COLUMNS, DASHBOARD_SEARCH_COLUMNS, DASHBOARD_SORTABLE_COLUMNS, + DATABASE_DEFAULT_COLUMNS, + DATABASE_SEARCH_COLUMNS, + DATABASE_SORTABLE_COLUMNS, DATASET_DEFAULT_COLUMNS, DATASET_SEARCH_COLUMNS, DATASET_SORTABLE_COLUMNS, get_chart_columns, get_dashboard_columns, + get_database_columns, get_dataset_columns, GetSchemaRequest, GetSchemaResponse, @@ -109,14 +113,34 @@ def _get_dashboard_schema_core() -> ModelGetSchemaCore[ModelSchemaInfo]: ) +def _get_database_schema_core() -> ModelGetSchemaCore[ModelSchemaInfo]: + """Create database schema core with dynamically extracted columns.""" + # Lazy import to avoid circular dependency at module load time + from superset.daos.database import DatabaseDAO + + return ModelGetSchemaCore( + model_type="database", + dao_class=DatabaseDAO, + output_schema=ModelSchemaInfo, + select_columns=get_database_columns(), + sortable_columns=DATABASE_SORTABLE_COLUMNS, + default_columns=DATABASE_DEFAULT_COLUMNS, + search_columns=DATABASE_SEARCH_COLUMNS, + default_sort="changed_on", + default_sort_direction="desc", + logger=logger, + ) + + # Map model types to their core factory functions _SCHEMA_CORE_FACTORIES: dict[ - Literal["chart", "dataset", "dashboard"], + Literal["chart", "dataset", "dashboard", "database"], Callable[[], ModelGetSchemaCore[ModelSchemaInfo]], ] = { "chart": _get_chart_schema_core, "dataset": _get_dataset_schema_core, "dashboard": _get_dashboard_schema_core, + "database": _get_database_schema_core, } @@ -143,7 +167,7 @@ async def get_schema(request: GetSchemaRequest, ctx: Context) -> GetSchemaRespon Column metadata is extracted dynamically from SQLAlchemy models. Args: - model_type: One of "chart", "dataset", or "dashboard" + model_type: One of "chart", "dataset", "dashboard", or "database" Returns: Comprehensive schema information for the requested model type diff --git a/tests/unit_tests/mcp_service/database/__init__.py b/tests/unit_tests/mcp_service/database/__init__.py new file mode 100644 index 000000000000..13a83393a912 --- /dev/null +++ b/tests/unit_tests/mcp_service/database/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/unit_tests/mcp_service/database/tool/__init__.py b/tests/unit_tests/mcp_service/database/tool/__init__.py new file mode 100644 index 000000000000..13a83393a912 --- /dev/null +++ b/tests/unit_tests/mcp_service/database/tool/__init__.py @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. diff --git a/tests/unit_tests/mcp_service/database/tool/test_database_tools.py b/tests/unit_tests/mcp_service/database/tool/test_database_tools.py new file mode 100644 index 000000000000..bde2593a98f6 --- /dev/null +++ b/tests/unit_tests/mcp_service/database/tool/test_database_tools.py @@ -0,0 +1,207 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import logging +from unittest.mock import MagicMock, patch + +import pytest +from fastmcp import Client +from fastmcp.exceptions import ToolError + +from superset.mcp_service.app import mcp +from superset.mcp_service.database.schemas import ListDatabasesRequest +from superset.utils import json + +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger(__name__) + + +def create_mock_database( + database_id=1, + database_name="examples", + backend="postgresql", + expose_in_sqllab=True, + allow_ctas=False, + allow_cvas=False, + allow_dml=False, + allow_file_upload=False, + allow_run_async=False, +): + """Factory function to create mock database objects with sensible defaults.""" + database = MagicMock() + database.id = database_id + database.database_name = database_name + database.backend = backend + database.verbose_name = None + database.expose_in_sqllab = expose_in_sqllab + database.allow_ctas = allow_ctas + database.allow_cvas = allow_cvas + database.allow_dml = allow_dml + database.allow_file_upload = allow_file_upload + database.allow_run_async = allow_run_async + database.cache_timeout = None + database.configuration_method = "sqlalchemy_form" + database.force_ctas_schema = None + database.impersonate_user = False + database.is_managed_externally = False + database.external_url = None + database.extra = '{"metadata_params": {}, "engine_params": {}}' + database.changed_by_name = "admin" + database.changed_by = None + database.changed_on = None + database.created_by_name = "admin" + database.created_by = None + database.created_on = None + database.owners = [] + return database + + +@pytest.fixture +def mcp_server(): + return mcp + + +@pytest.fixture(autouse=True) +def mock_auth(): + """Mock authentication for all tests.""" + from unittest.mock import Mock, patch + + with patch("superset.mcp_service.auth.get_user_from_request") as mock_get_user: + mock_user = Mock() + mock_user.id = 1 + mock_user.username = "admin" + mock_get_user.return_value = mock_user + yield mock_get_user + + +@patch("superset.daos.database.DatabaseDAO.list") +@pytest.mark.asyncio +async def test_list_databases_basic(mock_list, mcp_server): + """Test basic database listing functionality.""" + database = create_mock_database() + database._mapping = { + "id": database.id, + "database_name": database.database_name, + "backend": database.backend, + "expose_in_sqllab": database.expose_in_sqllab, + } + mock_list.return_value = ([database], 1) + async with Client(mcp_server) as client: + request = ListDatabasesRequest(page=1, page_size=10) + result = await client.call_tool( + "list_databases", {"request": request.model_dump()} + ) + assert result.content is not None + data = json.loads(result.content[0].text) + assert data["databases"] is not None + assert len(data["databases"]) == 1 + assert data["databases"][0]["id"] == 1 + assert data["databases"][0]["database_name"] == "examples" + + +@patch("superset.daos.database.DatabaseDAO.list") +@pytest.mark.asyncio +async def test_list_databases_with_search(mock_list, mcp_server): + """Test database listing with search functionality.""" + database = create_mock_database(database_name="production_db") + database._mapping = { + "id": database.id, + "database_name": database.database_name, + } + mock_list.return_value = ([database], 1) + async with Client(mcp_server) as client: + request = ListDatabasesRequest(page=1, page_size=10, search="production") + result = await client.call_tool( + "list_databases", {"request": request.model_dump()} + ) + assert result.content is not None + data = json.loads(result.content[0].text) + assert data["databases"] is not None + assert len(data["databases"]) == 1 + assert data["databases"][0]["database_name"] == "production_db" + + +@patch("superset.daos.database.DatabaseDAO.list") +@pytest.mark.asyncio +async def test_list_databases_with_filters(mock_list, mcp_server): + """Test database listing with filters.""" + database = create_mock_database(expose_in_sqllab=True) + database._mapping = { + "id": database.id, + "database_name": database.database_name, + "expose_in_sqllab": database.expose_in_sqllab, + } + mock_list.return_value = ([database], 1) + async with Client(mcp_server) as client: + request = ListDatabasesRequest( + page=1, + page_size=10, + filters=[ + {"col": "expose_in_sqllab", "opr": "eq", "value": True}, + ], + ) + result = await client.call_tool( + "list_databases", {"request": request.model_dump()} + ) + assert result.content is not None + data = json.loads(result.content[0].text) + assert data["databases"] is not None + assert len(data["databases"]) == 1 + + +@patch("superset.daos.database.DatabaseDAO.list") +@pytest.mark.asyncio +async def test_list_databases_api_error(mock_list, mcp_server): + """Test error handling when DAO raises an exception.""" + mock_list.side_effect = ToolError("Database error") + async with Client(mcp_server) as client: + request = ListDatabasesRequest(page=1, page_size=10) + with pytest.raises(ToolError) as excinfo: # noqa: PT012 + await client.call_tool("list_databases", {"request": request.model_dump()}) + assert "Database error" in str(excinfo.value) + + +@patch("superset.daos.database.DatabaseDAO.find_by_id") +@pytest.mark.asyncio +async def test_get_database_info_basic(mock_find, mcp_server): + """Test basic get database info functionality.""" + database = create_mock_database() + mock_find.return_value = database + async with Client(mcp_server) as client: + result = await client.call_tool( + "get_database_info", {"request": {"identifier": 1}} + ) + assert result.content is not None + data = json.loads(result.content[0].text) + assert data["id"] == 1 + assert data["database_name"] == "examples" + assert data["backend"] == "postgresql" + + +@patch("superset.daos.database.DatabaseDAO.find_by_id") +@pytest.mark.asyncio +async def test_get_database_info_not_found(mock_find, mcp_server): + """Test get database info when database does not exist.""" + mock_find.return_value = None + async with Client(mcp_server) as client: + result = await client.call_tool( + "get_database_info", {"request": {"identifier": 999}} + ) + assert result.content is not None + data = json.loads(result.content[0].text) + assert "error" in data From 2160a3f6d40ca887641fbadef5d3b3726a41b15c Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Sat, 4 Apr 2026 19:01:50 +0000 Subject: [PATCH 2/7] refactor(mcp): extract ModelType to constants, deduplicate Literal Move the repeated Literal["chart", "dataset", "dashboard", "database"] type alias to a shared ModelType in constants.py, replacing 4 inline occurrences across schema_discovery.py, mcp_core.py, and get_schema.py. Co-Authored-By: Claude Opus 4.6 --- superset/mcp_service/common/schema_discovery.py | 8 ++++---- superset/mcp_service/constants.py | 5 +++++ superset/mcp_service/mcp_core.py | 3 ++- superset/mcp_service/system/tool/get_schema.py | 5 +++-- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/superset/mcp_service/common/schema_discovery.py b/superset/mcp_service/common/schema_discovery.py index 515f60a9bb4b..faf7f9172e47 100644 --- a/superset/mcp_service/common/schema_discovery.py +++ b/superset/mcp_service/common/schema_discovery.py @@ -29,6 +29,8 @@ from pydantic import BaseModel, Field from sqlalchemy.inspection import inspect +from superset.mcp_service.constants import ModelType + class ColumnMetadata(BaseModel): """Metadata for a selectable column.""" @@ -52,7 +54,7 @@ class ModelSchemaInfo(BaseModel): - Default values for each """ - model_type: Literal["chart", "dataset", "dashboard", "database"] = Field( + model_type: ModelType = Field( ..., description="The model type this schema describes" ) select_columns: list[ColumnMetadata] = Field( @@ -82,9 +84,7 @@ class ModelSchemaInfo(BaseModel): class GetSchemaRequest(BaseModel): """Request schema for unified get_schema tool.""" - model_type: Literal["chart", "dataset", "dashboard", "database"] = Field( - ..., description="Model type to get schema for" - ) + model_type: ModelType = Field(..., description="Model type to get schema for") class GetSchemaResponse(BaseModel): diff --git a/superset/mcp_service/constants.py b/superset/mcp_service/constants.py index a23a7949e948..6315ef8755f9 100644 --- a/superset/mcp_service/constants.py +++ b/superset/mcp_service/constants.py @@ -16,6 +16,11 @@ # under the License. """Constants for the MCP service.""" +from typing import Literal + +# Supported model types for schema discovery and MCP tools +ModelType = Literal["chart", "dataset", "dashboard", "database"] + # Pagination defaults DEFAULT_PAGE_SIZE = 10 # Default number of items per page MAX_PAGE_SIZE = 100 # Maximum allowed page_size to prevent oversized responses diff --git a/superset/mcp_service/mcp_core.py b/superset/mcp_service/mcp_core.py index 447de443ea88..3e298129b517 100644 --- a/superset/mcp_service/mcp_core.py +++ b/superset/mcp_service/mcp_core.py @@ -25,6 +25,7 @@ from pydantic import BaseModel from superset.daos.base import BaseDAO +from superset.mcp_service.constants import ModelType from superset.mcp_service.utils import _is_uuid # Type variables for generic model tools @@ -521,7 +522,7 @@ class ModelGetSchemaCore(BaseCore, Generic[S]): def __init__( self, - model_type: Literal["chart", "dataset", "dashboard", "database"], + model_type: ModelType, dao_class: Type[BaseDAO[Any]], output_schema: Type[S], select_columns: List[Any], diff --git a/superset/mcp_service/system/tool/get_schema.py b/superset/mcp_service/system/tool/get_schema.py index 573880d6c998..ef770abd0c77 100644 --- a/superset/mcp_service/system/tool/get_schema.py +++ b/superset/mcp_service/system/tool/get_schema.py @@ -24,7 +24,7 @@ """ import logging -from typing import Callable, Literal +from typing import Callable from fastmcp import Context from superset_core.mcp.decorators import tool, ToolAnnotations @@ -51,6 +51,7 @@ GetSchemaResponse, ModelSchemaInfo, ) +from superset.mcp_service.constants import ModelType from superset.mcp_service.mcp_core import ModelGetSchemaCore logger = logging.getLogger(__name__) @@ -134,7 +135,7 @@ def _get_database_schema_core() -> ModelGetSchemaCore[ModelSchemaInfo]: # Map model types to their core factory functions _SCHEMA_CORE_FACTORIES: dict[ - Literal["chart", "dataset", "dashboard", "database"], + ModelType, Callable[[], ModelGetSchemaCore[ModelSchemaInfo]], ] = { "chart": _get_chart_schema_core, From 81c4b126e9f7c3d0533dbc15a90b8d45db106a87 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Sat, 4 Apr 2026 19:14:43 +0000 Subject: [PATCH 3/7] fix(mcp): address code review findings for database tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove `backend` from DatabaseFilter (it's a @property, not a SQL column, so DAO filtering would fail at query time) - Add UUID support to GetDatabaseInfoRequest (identifier: int | str) and expose uuid field on DatabaseInfo — Database inherits UUIDMixin via ImportExportMixin - Exclude sensitive columns (sqlalchemy_uri, password, encrypted_extra, server_cert) from schema discovery via new exclude_columns parameter on get_columns_from_model() - Update get_database_info docstring with UUID examples Co-Authored-By: Claude Opus 4.6 --- .../mcp_service/common/schema_discovery.py | 18 +++++++++++++++++- superset/mcp_service/database/schemas.py | 11 +++++++---- .../database/tool/get_database_info.py | 11 +++++++++-- .../database/tool/test_database_tools.py | 1 + 4 files changed, 34 insertions(+), 7 deletions(-) diff --git a/superset/mcp_service/common/schema_discovery.py b/superset/mcp_service/common/schema_discovery.py index faf7f9172e47..f97bb29e771d 100644 --- a/superset/mcp_service/common/schema_discovery.py +++ b/superset/mcp_service/common/schema_discovery.py @@ -180,6 +180,7 @@ def get_columns_from_model( model_cls: Type[Any], default_columns: list[str], extra_columns: dict[str, ColumnMetadata] | None = None, + exclude_columns: set[str] | None = None, ) -> list[ColumnMetadata]: """ Dynamically extract column metadata from a SQLAlchemy model. @@ -188,6 +189,7 @@ def get_columns_from_model( model_cls: The SQLAlchemy model class to inspect default_columns: List of column names that should be marked as defaults extra_columns: Additional columns not on the model (e.g., computed fields) + exclude_columns: Column names to omit (e.g., sensitive fields) Returns: List of ColumnMetadata objects for all columns @@ -197,6 +199,8 @@ def get_columns_from_model( for col in mapper.columns: col_name = col.key + if exclude_columns and col_name in exclude_columns: + continue col_type = _get_sqlalchemy_type_name(col.type) # Get description from column doc, comment, or fallback mapping description = ( @@ -538,12 +542,24 @@ def get_dashboard_columns() -> list[ColumnMetadata]: ) +# Sensitive columns that should not be exposed via schema discovery +DATABASE_EXCLUDE_COLUMNS = { + "sqlalchemy_uri", + "password", + "encrypted_extra", + "server_cert", +} + + def get_database_columns() -> list[ColumnMetadata]: """Get column metadata for Database model dynamically.""" from superset.models.core import Database return get_columns_from_model( - Database, DATABASE_DEFAULT_COLUMNS, DATABASE_EXTRA_COLUMNS + Database, + DATABASE_DEFAULT_COLUMNS, + DATABASE_EXTRA_COLUMNS, + exclude_columns=DATABASE_EXCLUDE_COLUMNS, ) diff --git a/superset/mcp_service/database/schemas.py b/superset/mcp_service/database/schemas.py index a15fe1ff3394..335ba782b713 100644 --- a/superset/mcp_service/database/schemas.py +++ b/superset/mcp_service/database/schemas.py @@ -51,7 +51,6 @@ class DatabaseFilter(ColumnOperator): col: Literal[ "database_name", - "backend", "expose_in_sqllab", "allow_file_upload", ] = Field( @@ -71,6 +70,7 @@ class DatabaseFilter(ColumnOperator): class DatabaseInfo(BaseModel): id: int | None = Field(None, description="Database ID") + uuid: str | None = Field(None, description="Database UUID") database_name: str | None = Field(None, description="Database connection name") backend: str | None = Field(None, description="Database backend (e.g., postgresql)") expose_in_sqllab: bool | None = Field( @@ -259,11 +259,11 @@ def create(cls, error: str, error_type: str) -> "DatabaseError": class GetDatabaseInfoRequest(MetadataCacheControl): - """Request schema for get_database_info with support for ID.""" + """Request schema for get_database_info with support for ID or UUID.""" identifier: Annotated[ - int, - Field(description="Database identifier (numeric ID)"), + int | str, + Field(description="Database identifier - can be numeric ID or UUID string"), ] @@ -294,6 +294,9 @@ def serialize_database_object(database: Any) -> DatabaseInfo | None: return DatabaseInfo( id=getattr(database, "id", None), + uuid=str(getattr(database, "uuid", "")) + if getattr(database, "uuid", None) + else None, database_name=getattr(database, "database_name", None), backend=getattr(database, "backend", None), expose_in_sqllab=getattr(database, "expose_in_sqllab", None), diff --git a/superset/mcp_service/database/tool/get_database_info.py b/superset/mcp_service/database/tool/get_database_info.py index 22a75aaa0c2d..f237c0708ed5 100644 --- a/superset/mcp_service/database/tool/get_database_info.py +++ b/superset/mcp_service/database/tool/get_database_info.py @@ -52,13 +52,13 @@ async def get_database_info( request: GetDatabaseInfoRequest, ctx: Context ) -> DatabaseInfo | DatabaseError: - """Get database connection metadata by ID. + """Get database connection metadata by ID or UUID. Returns database configuration including backend type, permissions, and capabilities. IMPORTANT FOR LLM CLIENTS: - - Use numeric ID (e.g., 123) + - Use numeric ID (e.g., 123) or UUID string (e.g., "a1b2c3d4-...") - To find a database ID, use the list_databases tool first Example usage: @@ -67,6 +67,13 @@ async def get_database_info( "identifier": 1 } ``` + + Or with UUID: + ```json + { + "identifier": "a1b2c3d4-5678-90ab-cdef-1234567890ab" + } + ``` """ await ctx.info( "Retrieving database information: identifier=%s" % (request.identifier,) diff --git a/tests/unit_tests/mcp_service/database/tool/test_database_tools.py b/tests/unit_tests/mcp_service/database/tool/test_database_tools.py index bde2593a98f6..81934db1041e 100644 --- a/tests/unit_tests/mcp_service/database/tool/test_database_tools.py +++ b/tests/unit_tests/mcp_service/database/tool/test_database_tools.py @@ -61,6 +61,7 @@ def create_mock_database( database.is_managed_externally = False database.external_url = None database.extra = '{"metadata_params": {}, "engine_params": {}}' + database.uuid = f"test-database-uuid-{database_id}" database.changed_by_name = "admin" database.changed_by = None database.changed_on = None From fc8c06f7a3ba4c0a5feeafbb522b876b5f017d45 Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Sat, 4 Apr 2026 16:44:09 -0400 Subject: [PATCH 4/7] fix(mcp): add created_by_fk filter, field validators, and deduplicate defaults for database tools - Add created_by_fk and changed_by_fk to DatabaseFilter columns so users can filter databases by creator (matching chart/dashboard patterns) - Add field_validator for filters and select_columns to handle JSON string parsing from MCP clients (double-serialization bug workaround) - Remove duplicate DEFAULT_DATABASE_COLUMNS from list_databases.py, import from schema_discovery.py instead - Update app.py instructions to document database created_by_fk workflow --- superset/mcp_service/app.py | 6 ++++- superset/mcp_service/database/schemas.py | 23 ++++++++++++++++++- .../database/tool/list_databases.py | 12 ++-------- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/superset/mcp_service/app.py b/superset/mcp_service/app.py index 4adca208b377..e0d5d293dc05 100644 --- a/superset/mcp_service/app.py +++ b/superset/mcp_service/app.py @@ -118,12 +118,14 @@ def get_default_instructions(branding: str = "Apache Superset") -> str: 3. generate_explore_link(dataset_id, config) -> preview interactively 4. generate_chart(dataset_id, config, save_chart=True) -> save permanently -To find your own charts/dashboards: +To find your own charts/dashboards/databases: 1. get_instance_info -> get current_user.id 2. list_charts(filters=[{{"col": "created_by_fk", "opr": "eq", "value": current_user.id}}]) 3. Or: list_dashboards(filters=[{{"col": "created_by_fk", "opr": "eq", "value": current_user.id}}]) +4. Or: list_databases(filters=[{{"col": "created_by_fk", + "opr": "eq", "value": current_user.id}}]) To explore data with SQL: 1. list_datasets -> find a dataset and note its database_id @@ -172,6 +174,8 @@ def get_default_instructions(branding: str = "Apache Superset") -> str: filters=[{{"col": "created_by_fk", "opr": "eq", "value": }}] - My dashboards: filters=[{{"col": "created_by_fk", "opr": "eq", "value": }}] +- My databases: + filters=[{{"col": "created_by_fk", "opr": "eq", "value": }}] To modify an existing chart (add filters, change metrics, change dimensions, etc.): 1. get_chart_info(chart_id) -> examine current configuration diff --git a/superset/mcp_service/database/schemas.py b/superset/mcp_service/database/schemas.py index 335ba782b713..3eb9cffe4f65 100644 --- a/superset/mcp_service/database/schemas.py +++ b/superset/mcp_service/database/schemas.py @@ -29,6 +29,7 @@ BaseModel, ConfigDict, Field, + field_validator, model_serializer, model_validator, PositiveInt, @@ -38,6 +39,10 @@ from superset.mcp_service.common.cache_schemas import MetadataCacheControl from superset.mcp_service.constants import DEFAULT_PAGE_SIZE, MAX_PAGE_SIZE from superset.mcp_service.system.schemas import PaginationInfo +from superset.mcp_service.utils.schema_utils import ( + parse_json_or_list, + parse_json_or_model_list, +) from superset.utils import json @@ -53,10 +58,14 @@ class DatabaseFilter(ColumnOperator): "database_name", "expose_in_sqllab", "allow_file_upload", + "created_by_fk", + "changed_by_fk", ] = Field( ..., description="Column to filter on. Use get_schema(model_type='database') for " - "available filter columns.", + "available filter columns. Use created_by_fk with the user " + "ID from get_instance_info's current_user to find " + "databases created by a specific user.", ) opr: ColumnOperatorEnum = Field( ..., @@ -231,6 +240,18 @@ class ListDatabasesRequest(MetadataCacheControl): ), ] + @field_validator("filters", mode="before") + @classmethod + def parse_filters(cls, v: Any) -> List[DatabaseFilter]: + """Accept both JSON string and list of objects.""" + return parse_json_or_model_list(v, DatabaseFilter, "filters") + + @field_validator("select_columns", mode="before") + @classmethod + def parse_columns(cls, v: Any) -> List[str]: + """Accept JSON array, list, or comma-separated string.""" + return parse_json_or_list(v, "select_columns") + @model_validator(mode="after") def validate_search_and_filters(self) -> "ListDatabasesRequest": """Prevent using both search and filters simultaneously to avoid query diff --git a/superset/mcp_service/database/tool/list_databases.py b/superset/mcp_service/database/tool/list_databases.py index 760b038233fe..8db4e0fbb353 100644 --- a/superset/mcp_service/database/tool/list_databases.py +++ b/superset/mcp_service/database/tool/list_databases.py @@ -43,15 +43,6 @@ logger = logging.getLogger(__name__) -# Minimal defaults for reduced token usage - users can request more via select_columns -DEFAULT_DATABASE_COLUMNS = [ - "id", - "database_name", - "backend", - "expose_in_sqllab", - "changed_on_humanized", -] - @tool( tags=["core"], @@ -100,6 +91,7 @@ async def list_databases(request: ListDatabasesRequest, ctx: Context) -> Databas try: from superset.daos.database import DatabaseDAO from superset.mcp_service.common.schema_discovery import ( + DATABASE_DEFAULT_COLUMNS, DATABASE_SORTABLE_COLUMNS, get_all_column_names, get_database_columns, @@ -120,7 +112,7 @@ def _serialize_database( output_schema=DatabaseInfo, item_serializer=_serialize_database, filter_type=DatabaseFilter, - default_columns=DEFAULT_DATABASE_COLUMNS, + default_columns=DATABASE_DEFAULT_COLUMNS, search_columns=["database_name"], list_field_name="databases", output_list_schema=DatabaseList, From be7e1abd8ad3657337840384878d3bbe9f6bde1b Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Sat, 4 Apr 2026 18:50:38 -0400 Subject: [PATCH 5/7] fix(mcp): address review bot findings for database tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix datetime.now() without timezone in DatabaseError.create() and _humanize_timestamp() — use timezone-aware datetimes consistently - Add type hints to create_mock_database() test helper - Update mcp_core.py ModelGetSchemaCore docstring to include "database" in the model_type description --- superset/mcp_service/database/schemas.py | 9 ++++++--- superset/mcp_service/mcp_core.py | 2 +- .../database/tool/test_database_tools.py | 20 +++++++++---------- 3 files changed, 17 insertions(+), 14 deletions(-) diff --git a/superset/mcp_service/database/schemas.py b/superset/mcp_service/database/schemas.py index 3eb9cffe4f65..ff2821288b83 100644 --- a/superset/mcp_service/database/schemas.py +++ b/superset/mcp_service/database/schemas.py @@ -274,9 +274,11 @@ class DatabaseError(BaseModel): @classmethod def create(cls, error: str, error_type: str) -> "DatabaseError": """Create a standardized DatabaseError with timestamp.""" - from datetime import datetime + from datetime import datetime, timezone - return cls(error=error, error_type=error_type, timestamp=datetime.now()) + return cls( + error=error, error_type=error_type, timestamp=datetime.now(timezone.utc) + ) class GetDatabaseInfoRequest(MetadataCacheControl): @@ -306,7 +308,8 @@ def _humanize_timestamp(dt: datetime | None) -> str | None: """Convert a datetime to a humanized string like '2 hours ago'.""" if dt is None: return None - return humanize.naturaltime(datetime.now() - dt) + now = datetime.now(dt.tzinfo) if dt.tzinfo else datetime.now() + return humanize.naturaltime(now - dt) def serialize_database_object(database: Any) -> DatabaseInfo | None: diff --git a/superset/mcp_service/mcp_core.py b/superset/mcp_service/mcp_core.py index 3e298129b517..88ca6e7bebef 100644 --- a/superset/mcp_service/mcp_core.py +++ b/superset/mcp_service/mcp_core.py @@ -537,7 +537,7 @@ def __init__( Initialize the schema discovery core. Args: - model_type: The type of model (chart, dataset, dashboard) + model_type: The type of model (chart, dataset, dashboard, database) dao_class: The DAO class to query for filter columns output_schema: Pydantic schema for the response (e.g., ModelSchemaInfo) select_columns: Column metadata (List[ColumnMetadata] or similar) diff --git a/tests/unit_tests/mcp_service/database/tool/test_database_tools.py b/tests/unit_tests/mcp_service/database/tool/test_database_tools.py index 81934db1041e..0aee09ae86de 100644 --- a/tests/unit_tests/mcp_service/database/tool/test_database_tools.py +++ b/tests/unit_tests/mcp_service/database/tool/test_database_tools.py @@ -32,16 +32,16 @@ def create_mock_database( - database_id=1, - database_name="examples", - backend="postgresql", - expose_in_sqllab=True, - allow_ctas=False, - allow_cvas=False, - allow_dml=False, - allow_file_upload=False, - allow_run_async=False, -): + database_id: int = 1, + database_name: str = "examples", + backend: str = "postgresql", + expose_in_sqllab: bool = True, + allow_ctas: bool = False, + allow_cvas: bool = False, + allow_dml: bool = False, + allow_file_upload: bool = False, + allow_run_async: bool = False, +) -> MagicMock: """Factory function to create mock database objects with sensible defaults.""" database = MagicMock() database.id = database_id From 5e3309b618710ff6b9c03c0cb452a779bebc1d21 Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Sat, 4 Apr 2026 19:56:37 -0400 Subject: [PATCH 6/7] fix(mcp): address codeant review findings for database tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix misleading "permissions" in app.py and get_database_info docstring — replaced with "capabilities" to match actual response fields - Add changed_on to DATABASE_DEFAULT_COLUMNS so humanized timestamps can be computed reliably (matching chart/dataset pattern) - Fix not-found test to use result.data["error_type"] instead of result.content (matching existing dataset/dashboard test patterns) --- superset/mcp_service/app.py | 2 +- superset/mcp_service/common/schema_discovery.py | 1 + superset/mcp_service/database/tool/get_database_info.py | 4 ++-- .../mcp_service/database/tool/test_database_tools.py | 4 +--- 4 files changed, 5 insertions(+), 6 deletions(-) diff --git a/superset/mcp_service/app.py b/superset/mcp_service/app.py index e0d5d293dc05..fc9df494ab96 100644 --- a/superset/mcp_service/app.py +++ b/superset/mcp_service/app.py @@ -56,7 +56,7 @@ def get_default_instructions(branding: str = "Apache Superset") -> str: Database Connections: - list_databases: List database connections with advanced filters (1-based pagination) -- get_database_info: Get detailed database connection info by ID (backend, permissions) +- get_database_info: Get detailed database connection info by ID (backend, capabilities) Dataset Management: - list_datasets: List datasets with advanced filters (1-based pagination) diff --git a/superset/mcp_service/common/schema_discovery.py b/superset/mcp_service/common/schema_discovery.py index f97bb29e771d..a648f45cc7aa 100644 --- a/superset/mcp_service/common/schema_discovery.py +++ b/superset/mcp_service/common/schema_discovery.py @@ -462,6 +462,7 @@ def get_columns_from_model( "database_name", "backend", "expose_in_sqllab", + "changed_on", "changed_on_humanized", ] DATABASE_SORTABLE_COLUMNS = [ diff --git a/superset/mcp_service/database/tool/get_database_info.py b/superset/mcp_service/database/tool/get_database_info.py index f237c0708ed5..56aef1d2bb41 100644 --- a/superset/mcp_service/database/tool/get_database_info.py +++ b/superset/mcp_service/database/tool/get_database_info.py @@ -54,8 +54,8 @@ async def get_database_info( ) -> DatabaseInfo | DatabaseError: """Get database connection metadata by ID or UUID. - Returns database configuration including backend type, permissions, - and capabilities. + Returns database configuration including backend type and capabilities + (allow_ctas, allow_dml, expose_in_sqllab, etc.). IMPORTANT FOR LLM CLIENTS: - Use numeric ID (e.g., 123) or UUID string (e.g., "a1b2c3d4-...") diff --git a/tests/unit_tests/mcp_service/database/tool/test_database_tools.py b/tests/unit_tests/mcp_service/database/tool/test_database_tools.py index 0aee09ae86de..78ae81c9962e 100644 --- a/tests/unit_tests/mcp_service/database/tool/test_database_tools.py +++ b/tests/unit_tests/mcp_service/database/tool/test_database_tools.py @@ -203,6 +203,4 @@ async def test_get_database_info_not_found(mock_find, mcp_server): result = await client.call_tool( "get_database_info", {"request": {"identifier": 999}} ) - assert result.content is not None - data = json.loads(result.content[0].text) - assert "error" in data + assert result.data["error_type"] == "not_found" From 431d14e6831b14fb499433a5690f984c3e8b49d9 Mon Sep 17 00:00:00 2001 From: Amin Ghadersohi Date: Sat, 4 Apr 2026 21:45:10 -0400 Subject: [PATCH 7/7] fix(mcp): handle backend property in list queries and exclude sensitive filter columns - Add _get_backend() helper that safely handles backend @property access on row proxies from DAO list queries (property requires URI decryption which fails on column-only query results) - Add exclude_filter_columns parameter to ModelGetSchemaCore to strip sensitive columns (sqlalchemy_uri, password, encrypted_extra, server_cert) from filter discovery responses - Wire DATABASE_EXCLUDE_COLUMNS into database schema discovery --- superset/mcp_service/database/schemas.py | 15 ++++++++- superset/mcp_service/mcp_core.py | 33 +++++++++++++------ .../mcp_service/system/tool/get_schema.py | 2 ++ 3 files changed, 39 insertions(+), 11 deletions(-) diff --git a/superset/mcp_service/database/schemas.py b/superset/mcp_service/database/schemas.py index ff2821288b83..d93ea630b309 100644 --- a/superset/mcp_service/database/schemas.py +++ b/superset/mcp_service/database/schemas.py @@ -312,6 +312,19 @@ def _humanize_timestamp(dt: datetime | None) -> str | None: return humanize.naturaltime(now - dt) +def _get_backend(database: Any) -> str | None: + """Safely get backend from a Database object or row proxy. + + backend is a @property that decrypts sqlalchemy_uri, which fails on + row proxies returned by column-only DAO list queries. Fall back to None + when the property raises. + """ + try: + return database.backend + except (AttributeError, TypeError): + return None + + def serialize_database_object(database: Any) -> DatabaseInfo | None: if not database: return None @@ -322,7 +335,7 @@ def serialize_database_object(database: Any) -> DatabaseInfo | None: if getattr(database, "uuid", None) else None, database_name=getattr(database, "database_name", None), - backend=getattr(database, "backend", None), + backend=_get_backend(database), expose_in_sqllab=getattr(database, "expose_in_sqllab", None), allow_ctas=getattr(database, "allow_ctas", None), allow_cvas=getattr(database, "allow_cvas", None), diff --git a/superset/mcp_service/mcp_core.py b/superset/mcp_service/mcp_core.py index 88ca6e7bebef..0609ff27ce77 100644 --- a/superset/mcp_service/mcp_core.py +++ b/superset/mcp_service/mcp_core.py @@ -531,6 +531,7 @@ def __init__( search_columns: List[str], default_sort: str = "changed_on", default_sort_direction: Literal["asc", "desc"] = "desc", + exclude_filter_columns: set[str] | None = None, logger: logging.Logger | None = None, ) -> None: """ @@ -546,6 +547,8 @@ def __init__( search_columns: Column names used for text search default_sort: Default sort column default_sort_direction: Default sort direction + exclude_filter_columns: Column names to omit from filter discovery + (e.g., sensitive fields like passwords or connection URIs) logger: Optional logger instance """ super().__init__(logger) @@ -558,6 +561,7 @@ def __init__( self.search_columns = search_columns self.default_sort = default_sort self.default_sort_direction = default_sort_direction + self.exclude_filter_columns = exclude_filter_columns or set() def _get_filter_columns(self) -> Dict[str, List[str]]: """Get filterable columns and operators from the DAO.""" @@ -568,16 +572,25 @@ def _get_filter_columns(self) -> Dict[str, List[str]]: return {} # Convert to dict safely - handle both dict and dict-like objects if isinstance(filterable, dict): - return dict(filterable) - # Try to convert mapping-like objects - try: - return dict(filterable) - except (TypeError, ValueError): - self._log_warning( - f"Unexpected filter columns type for {self.model_type}: " - f"{type(filterable)}" - ) - return {} + result = dict(filterable) + else: + # Try to convert mapping-like objects + try: + result = dict(filterable) + except (TypeError, ValueError): + self._log_warning( + f"Unexpected filter columns type for {self.model_type}: " + f"{type(filterable)}" + ) + return {} + # Remove excluded columns (e.g., sensitive fields) + if self.exclude_filter_columns: + result = { + k: v + for k, v in result.items() + if k not in self.exclude_filter_columns + } + return result except Exception as e: self._log_warning( f"Failed to get filter columns for {self.model_type}: {e}" diff --git a/superset/mcp_service/system/tool/get_schema.py b/superset/mcp_service/system/tool/get_schema.py index ef770abd0c77..a0bc8213a4ff 100644 --- a/superset/mcp_service/system/tool/get_schema.py +++ b/superset/mcp_service/system/tool/get_schema.py @@ -118,6 +118,7 @@ def _get_database_schema_core() -> ModelGetSchemaCore[ModelSchemaInfo]: """Create database schema core with dynamically extracted columns.""" # Lazy import to avoid circular dependency at module load time from superset.daos.database import DatabaseDAO + from superset.mcp_service.common.schema_discovery import DATABASE_EXCLUDE_COLUMNS return ModelGetSchemaCore( model_type="database", @@ -129,6 +130,7 @@ def _get_database_schema_core() -> ModelGetSchemaCore[ModelSchemaInfo]: search_columns=DATABASE_SEARCH_COLUMNS, default_sort="changed_on", default_sort_direction="desc", + exclude_filter_columns=DATABASE_EXCLUDE_COLUMNS, logger=logger, )