diff --git a/README.md b/README.md index 4b681260..0f8ee2a7 100644 --- a/README.md +++ b/README.md @@ -245,6 +245,7 @@ All configuration options can be set via `tiger config set `: - `color` - Enable/disable colored output (default: `true`) - `debug` - Enable/disable debug logging (default: `false`) - `docs_mcp` - Enable/disable docs MCP proxy (default: `true`) +- `mcp_max_rows` - Maximum number of rows the `db_execute_query` MCP tool returns per result set before truncating, to limit how much data lands in an AI agent's context. Only applies to the MCP tool, not CLI commands. Default: `100` - `output` - Output format: `json`, `yaml`, or `table` (default: `table`) - `password_storage` - Password storage method: `keyring`, `pgpass`, or `none` (default: `keyring`) - `read_only` - When `true`, mutating operations are refused: the `tiger service create`/`fork`/`start`/`stop`/`resize`/`update-password`/`delete` CLI commands and their MCP equivalents return an error, and `tiger db connect`, `tiger db connection-string`, and the `db_execute_query` MCP tool open the database session in Tiger Cloud's immutable read-only mode (writes and DDL are rejected by the server). Read commands/tools are unaffected — `tiger db schema` and the `db_schema` MCP tool always open a read-only session regardless of this setting. Default: `false`. diff --git a/internal/tiger/cmd/config.go b/internal/tiger/cmd/config.go index afff70b8..d699cf1d 100644 --- a/internal/tiger/cmd/config.go +++ b/internal/tiger/cmd/config.go @@ -203,6 +203,9 @@ func outputTable(w io.Writer, cfg *config.ConfigOutput) error { if cfg.GatewayURL != nil { table.Append("gateway_url", *cfg.GatewayURL) } + if cfg.MCPMaxRows != nil { + table.Append("mcp_max_rows", fmt.Sprintf("%d", *cfg.MCPMaxRows)) + } if cfg.Color != nil { table.Append("color", fmt.Sprintf("%t", *cfg.Color)) } diff --git a/internal/tiger/cmd/config_test.go b/internal/tiger/cmd/config_test.go index 62ed5179..b73b9176 100644 --- a/internal/tiger/cmd/config_test.go +++ b/internal/tiger/cmd/config_test.go @@ -6,6 +6,7 @@ import ( "encoding/json" "os" "slices" + "strconv" "strings" "testing" @@ -101,6 +102,7 @@ password_storage: pgpass "password_storage": "pgpass", "debug": "false", "config_dir": tmpDir, + "mcp_max_rows": strconv.Itoa(config.DefaultMCPMaxRows), } for key, expectedLine := range expectedLines { @@ -155,6 +157,7 @@ password_storage: keyring "config_dir": tmpDir, "releases_url": "https://cli.tigerdata.com", "version_check": true, + "mcp_max_rows": float64(config.DefaultMCPMaxRows), } for key, expectedValue := range expectedValues { @@ -212,6 +215,7 @@ password_storage: keyring "config_dir": tmpDir, "releases_url": "https://cli.tigerdata.com", "version_check": true, + "mcp_max_rows": config.DefaultMCPMaxRows, } for key, expectedValue := range expectedValues { diff --git a/internal/tiger/config/config.go b/internal/tiger/config/config.go index e34a1db9..72334b35 100644 --- a/internal/tiger/config/config.go +++ b/internal/tiger/config/config.go @@ -26,6 +26,7 @@ type Config struct { DocsMCP bool `mapstructure:"docs_mcp"` DocsMCPURL string `mapstructure:"docs_mcp_url"` GatewayURL string `mapstructure:"gateway_url"` + MCPMaxRows int `mapstructure:"mcp_max_rows"` Output string `mapstructure:"output"` PasswordStorage string `mapstructure:"password_storage"` ReadOnly bool `mapstructure:"read_only"` @@ -45,6 +46,7 @@ type ConfigOutput struct { DocsMCP *bool `mapstructure:"docs_mcp" json:"docs_mcp,omitempty"` DocsMCPURL *string `mapstructure:"docs_mcp_url" json:"docs_mcp_url,omitempty"` GatewayURL *string `mapstructure:"gateway_url" json:"gateway_url,omitempty"` + MCPMaxRows *int `mapstructure:"mcp_max_rows" json:"mcp_max_rows,omitempty"` Output *string `mapstructure:"output" json:"output,omitempty"` PasswordStorage *string `mapstructure:"password_storage" json:"password_storage,omitempty"` ReadOnly *bool `mapstructure:"read_only" json:"read_only,omitempty"` @@ -63,6 +65,7 @@ const ( DefaultDocsMCP = true DefaultDocsMCPURL = "https://mcp.tigerdata.com/docs?disabled_skills=ghost-database" DefaultGatewayURL = "https://console.cloud.tigerdata.com/api" + DefaultMCPMaxRows = 100 DefaultOutput = "table" DefaultPasswordStorage = "keyring" DefaultReadOnly = false @@ -84,6 +87,7 @@ var defaultValues = map[string]any{ "docs_mcp": DefaultDocsMCP, "docs_mcp_url": DefaultDocsMCPURL, "gateway_url": DefaultGatewayURL, + "mcp_max_rows": DefaultMCPMaxRows, "output": DefaultOutput, "password_storage": DefaultPasswordStorage, "read_only": DefaultReadOnly, @@ -282,6 +286,14 @@ func setBool(key, val string) (bool, error) { return b, nil } +func setInt(key, val string) (int, error) { + n, err := strconv.Atoi(val) + if err != nil { + return 0, fmt.Errorf("invalid %s value: %s (must be an integer)", key, val) + } + return n, nil +} + // UpdateField updates the field in the Config struct corresponding to the given key. // It accepts either a string (from user input) or a typed value (string/bool from defaults). // The function validates the value and updates both the struct field and viper state. @@ -455,6 +467,26 @@ func (c *Config) UpdateField(key string, value any) (any, error) { return nil, fmt.Errorf("version_check must be string or bool, got %T", value) } + case "mcp_max_rows": + var n int + switch v := value.(type) { + case int: + n = v + case string: + parsed, err := setInt("mcp_max_rows", v) + if err != nil { + return nil, err + } + n = parsed + default: + return nil, fmt.Errorf("mcp_max_rows must be string or int, got %T", value) + } + if n < 1 { + return nil, fmt.Errorf("mcp_max_rows must be at least 1, got %d", n) + } + c.MCPMaxRows = n + validated = n + default: return nil, fmt.Errorf("unknown configuration key: %s", key) } diff --git a/internal/tiger/config/config_test.go b/internal/tiger/config/config_test.go index 594a6a5b..654c9789 100644 --- a/internal/tiger/config/config_test.go +++ b/internal/tiger/config/config_test.go @@ -73,6 +73,9 @@ func TestLoad_DefaultValues(t *testing.T) { if cfg.ReadOnly != DefaultReadOnly { t.Errorf("Expected ReadOnly %t, got %t", DefaultReadOnly, cfg.ReadOnly) } + if cfg.MCPMaxRows != DefaultMCPMaxRows { + t.Errorf("Expected MCPMaxRows %d, got %d", DefaultMCPMaxRows, cfg.MCPMaxRows) + } if cfg.ConfigDir != tmpDir { t.Errorf("Expected ConfigDir %s, got %s", tmpDir, cfg.ConfigDir) } @@ -418,6 +421,28 @@ func TestSet(t *testing.T) { value: "invalid", expectedError: true, }, + { + key: "mcp_max_rows", + value: "250", + checkFunc: func() bool { + return cfg.MCPMaxRows == 250 + }, + }, + { + key: "mcp_max_rows", + value: "0", + expectedError: true, + }, + { + key: "mcp_max_rows", + value: "-5", + expectedError: true, + }, + { + key: "mcp_max_rows", + value: "notanumber", + expectedError: true, + }, { key: "unknown_key", value: "value", diff --git a/internal/tiger/mcp/db_tools.go b/internal/tiger/mcp/db_tools.go index a757731c..31a27b67 100644 --- a/internal/tiger/mcp/db_tools.go +++ b/internal/tiger/mcp/db_tools.go @@ -13,10 +13,42 @@ import ( "go.uber.org/zap" "github.com/timescale/tiger-cli/internal/tiger/common" + "github.com/timescale/tiger-cli/internal/tiger/config" "github.com/timescale/tiger-cli/internal/tiger/logging" "github.com/timescale/tiger-cli/internal/tiger/util" ) +const ( + // mcpMaxResponseBytes caps total serialized row data per response, catching a + // few very wide rows the row cap alone would miss. Not user-configurable. + mcpMaxResponseBytes = 256 * 1024 +) + +// resolveMaxRows returns the row cap from mcp_max_rows, falling back to the +// default for non-positive config-file/env values (which skip set validation). +func resolveMaxRows(configured int) int { + if configured <= 0 { + return config.DefaultMCPMaxRows + } + return configured +} + +// approxRowSize estimates a row's serialized size in bytes for the byte budget, +// mirroring how it is ultimately marshaled to JSON for the client. +func approxRowSize(values []any) int { + if b, err := json.Marshal(values); err == nil { + return len(b) + } + // Fallback for the rare value that isn't JSON-marshalable. + return len(fmt.Sprint(values...)) +} + +// truncationNotice builds the actionable guidance returned to the model when a +// response is truncated. +func truncationNotice(maxRows int) string { + return fmt.Sprintf("Results were truncated to limit the amount of data returned (the configured mcp_max_rows=%d per result set, plus an overall response size cap). More rows exist. Do the work in the database instead of re-running this query: aggregate (GROUP BY, COUNT, SUM, AVG), filter (WHERE), or paginate (LIMIT/OFFSET).", maxRows) +} + // DBExecuteQueryInput represents input for db_execute_query type DBExecuteQueryInput struct { ServiceID string `json:"service_id"` @@ -67,12 +99,15 @@ type ResultSet struct { Columns []DBExecuteQueryColumn `json:"columns,omitempty"` Rows *[][]any `json:"rows,omitempty"` RowsAffected int64 `json:"rows_affected"` + Truncated bool `json:"truncated,omitempty"` } // DBExecuteQueryOutput represents output for db_execute_query type DBExecuteQueryOutput struct { ResultSets []ResultSet `json:"result_sets"` ExecutionTime string `json:"execution_time"` + Truncated bool `json:"truncated,omitempty"` + Notice string `json:"notice,omitempty"` } func (DBExecuteQueryOutput) Schema() *jsonschema.Schema { @@ -96,12 +131,18 @@ func (DBExecuteQueryOutput) Schema() *jsonschema.Schema { resultSetSchema.Properties["rows"].Description = "Result rows as arrays of values. Omitted for commands that don't return rows (INSERT, UPDATE, DELETE, etc.)" resultSetSchema.Properties["rows"].Examples = []any{[][]any{{1, "alice", "2024-01-01"}, {2, "bob", "2024-01-02"}}} - resultSetSchema.Properties["rows_affected"].Description = "Number of rows affected. For SELECT, this is the number of rows returned. For INSERT/UPDATE/DELETE, this is the number of rows modified. Returns 0 for statements that don't return or modify rows (e.g. CREATE TABLE)." + resultSetSchema.Properties["rows_affected"].Description = "Number of rows affected. For SELECT, this is the total number of rows the query produced; when truncated is true this exceeds the number of rows actually returned in this response. For INSERT/UPDATE/DELETE, this is the number of rows modified. Returns 0 for statements that don't return or modify rows (e.g. CREATE TABLE)." resultSetSchema.Properties["rows_affected"].Examples = []any{5, 42, 1000} + resultSetSchema.Properties["truncated"].Description = "True when this result set was capped (by the configured mcp_max_rows row limit or the overall response size limit) and additional rows exist that were not returned. Refine the query in SQL to get the data you need." + schema.Properties["execution_time"].Description = "Execution time as a human-readable duration string" schema.Properties["execution_time"].Examples = []any{"123ms", "1.5s", "45.2µs"} + schema.Properties["truncated"].Description = "True when any result set was truncated to limit the amount of data returned. See notice for guidance." + + schema.Properties["notice"].Description = "Present only when results were truncated. Actionable guidance for getting the needed data via SQL (aggregate, filter, paginate) instead of re-running the query." + return schema } @@ -116,6 +157,8 @@ Connects to a PostgreSQL database service in Tiger Cloud and executes the provid Multi-statement queries (semicolon-separated) are supported when no parameters are provided. All result sets are returned. By default, statements execute in an implicit transaction that automatically commits on success or rolls back on error. Explicit transactions (opened with BEGIN) must be explicitly committed with COMMIT, or they roll back when the connection closes. +Process data in the database, not in your context: aggregate, filter, sort/limit, and join in SQL rather than fetching raw rows. + WARNING: Can execute any SQL statement including INSERT, UPDATE, DELETE, and DDL commands. Always review queries before execution.`, InputSchema: DBExecuteQueryInput{}.Schema(), OutputSchema: DBExecuteQueryOutput{}.Schema(), @@ -308,6 +351,10 @@ func (s *Server) handleDBExecuteQuery(ctx context.Context, req *mcp.CallToolRequ } defer conn.Close(context.Background()) + // Bound how much data this call returns to the model's context. + maxRows := resolveMaxRows(cfg.MCPMaxRows) + remainingBytes := mcpMaxResponseBytes + // Execute query and measure time startTime := time.Now() @@ -324,6 +371,7 @@ func (s *Server) handleDBExecuteQuery(ctx context.Context, req *mcp.CallToolRequ // Process all result sets, collecting them all resultSets := make([]ResultSet, 0) + truncated := false for { rows, err := br.Query() if err != nil { @@ -338,16 +386,24 @@ func (s *Server) handleDBExecuteQuery(ctx context.Context, req *mcp.CallToolRequ return nil, DBExecuteQueryOutput{}, err } - // Process this result set - result, err := processResultSet(conn, rows) + // Process this result set, capping rows and the shared byte budget. + result, err := processResultSet(conn, rows, maxRows, &remainingBytes) if err != nil { return nil, DBExecuteQueryOutput{}, err } // Collect this result set resultSets = append(resultSets, result) + + if result.Truncated { + // Stop reading further sets; br.Close() below discards them. The + // query isn't cancelled, so all statements still run server-side. + truncated = true + break + } } + // Close the batch, discarding any result sets we didn't read. if err := br.Close(); err != nil { return nil, DBExecuteQueryOutput{}, err } @@ -357,12 +413,17 @@ func (s *Server) handleDBExecuteQuery(ctx context.Context, req *mcp.CallToolRequ ResultSets: resultSets, ExecutionTime: time.Since(startTime).String(), } + if truncated { + output.Truncated = true + output.Notice = truncationNotice(maxRows) + } return nil, output, nil } -// processResultSet reads all data from a pgx.Rows result set -func processResultSet(conn *pgx.Conn, rows pgx.Rows) (ResultSet, error) { +// processResultSet reads a result set, capping at maxRows and the shared byte +// budget. ResultSet.Truncated reports whether rows were dropped. +func processResultSet(conn *pgx.Conn, rows pgx.Rows, maxRows int, remainingBytes *int) (ResultSet, error) { defer rows.Close() // Get column metadata from field descriptions @@ -381,7 +442,7 @@ func processResultSet(conn *pgx.Conn, rows pgx.Rows) (ResultSet, error) { } } - // Collect all rows from this result set + // Collect rows from this result set var resultRows [][]any if len(columns) > 0 { // If any columns were returned, initialize resultRows to an empty @@ -392,25 +453,47 @@ func processResultSet(conn *pgx.Conn, rows pgx.Rows) (ResultSet, error) { // so we leave resultRows nil so it gets omitted from the JSON result. resultRows = make([][]any, 0) } + + truncated := false for rows.Next() { + // Row cap: another row exists but we already hold maxRows. + if len(resultRows) >= maxRows { + truncated = true + break + } + // Scan values into generic interface slice values, err := rows.Values() if err != nil { return ResultSet{}, err } + + // Byte safety net for wide rows, but always keep at least one row so an + // oversized first row doesn't yield an empty result. + remaining := *remainingBytes - approxRowSize(values) + if len(resultRows) > 0 && remaining < 0 { + truncated = true + break + } + *remainingBytes = remaining + resultRows = append(resultRows, values) } - // Check for errors during iteration + // Drain so the command tag reports the true row count even when truncated. + rows.Close() + if err := rows.Err(); err != nil { return ResultSet{}, err } commandTag := rows.CommandTag() + return ResultSet{ CommandTag: commandTag.String(), Columns: columns, Rows: util.PtrIfNonNil(resultRows), RowsAffected: commandTag.RowsAffected(), + Truncated: truncated, }, nil } diff --git a/internal/tiger/mcp/db_tools_test.go b/internal/tiger/mcp/db_tools_test.go new file mode 100644 index 00000000..039e5542 --- /dev/null +++ b/internal/tiger/mcp/db_tools_test.go @@ -0,0 +1,86 @@ +package mcp + +import ( + "strings" + "testing" + + "github.com/timescale/tiger-cli/internal/tiger/config" +) + +func TestResolveMaxRows(t *testing.T) { + tests := []struct { + name string + configured int + want int + }{ + { + name: "configured value is used", + configured: 250, + want: 250, + }, + { + // A config-file or TIGER_MCP_MAX_ROWS value bypasses `tiger config + // set` validation, so a zero (or negative) configured value can + // reach here and must be sanitized to the default. + name: "zero configured (env/file bypass) falls back to default", + configured: 0, + want: config.DefaultMCPMaxRows, + }, + { + name: "negative configured falls back to default", + configured: -1, + want: config.DefaultMCPMaxRows, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := resolveMaxRows(tt.configured); got != tt.want { + t.Errorf("resolveMaxRows(%d) = %d, want %d", tt.configured, got, tt.want) + } + }) + } +} + +func TestApproxRowSize(t *testing.T) { + // A small row should be smaller than a row with a large text value, and + // both should be positive. We don't assert exact byte counts (they track + // JSON encoding), only the ordering and positivity the byte budget relies on. + small := approxRowSize([]any{1, "a"}) + large := approxRowSize([]any{1, strings.Repeat("x", 1000)}) + + if small <= 0 { + t.Errorf("approxRowSize(small) = %d, want > 0", small) + } + if large <= small { + t.Errorf("approxRowSize(large)=%d should exceed approxRowSize(small)=%d", large, small) + } +} + +func TestTruncationNotice(t *testing.T) { + notice := truncationNotice(100) + // The notice must mention the actual cap and steer the model toward doing + // the work in SQL rather than re-running the query. + for _, want := range []string{"100", "LIMIT", "aggregate"} { + if !strings.Contains(notice, want) { + t.Errorf("truncationNotice() = %q, missing %q", notice, want) + } + } +} + +func TestDBExecuteQueryOutputSchemaHasTruncationFields(t *testing.T) { + schema := DBExecuteQueryOutput{}.Schema() + for _, name := range []string{"truncated", "notice"} { + prop, ok := schema.Properties[name] + if !ok { + t.Fatalf("expected %q property in output schema", name) + } + if prop.Description == "" { + t.Errorf("expected %q to have a description", name) + } + } + resultSet := schema.Properties["result_sets"].Items + if _, ok := resultSet.Properties["truncated"]; !ok { + t.Error("expected truncated property on result set schema") + } +} diff --git a/specs/spec.md b/specs/spec.md index 5d05e538..5bd9c951 100644 --- a/specs/spec.md +++ b/specs/spec.md @@ -46,6 +46,7 @@ All configuration options can be set via `tiger config set `: - `color` - Enable/disable colored output (default: true) - `debug` - Enable/disable debug logging (default: false) - `docs_mcp` - Enable/disable docs MCP proxy (default: true) +- `mcp_max_rows` - Maximum rows the `db_execute_query` MCP tool returns per result set before truncating, to limit data placed in an AI agent's context. MCP-only (does not affect CLI commands). (default: 100). See `specs/spec_mcp.md` for details. - `output` - Output format: json, yaml, or table (default: table) - `password_storage` - Password storage method: keyring, pgpass, or none (default: keyring) - `read_only` - When `true`, mutating operations are refused: `tiger service create`/`fork`/`start`/`stop`/`resize`/`update-password`/`delete` and their MCP equivalents return an error, and `tiger db connect`/`connection-string`/`db_execute_query` open against an immutable read-only database connection regardless of `--read-only` (default: false). See `specs/spec_mcp.md` for details. diff --git a/specs/spec_mcp.md b/specs/spec_mcp.md index 4e31c6d2..72c0b6a1 100644 --- a/specs/spec_mcp.md +++ b/specs/spec_mcp.md @@ -353,6 +353,8 @@ Execute a SQL query on a service database. **Returns:** Query results with rows, columns (including types), rows affected count, and execution metadata. +**Result limiting (context-window protection):** To keep large query results from overflowing an AI agent's context window, results are capped. Each result set returns at most the configured `mcp_max_rows` rows (default 100), and the total serialized row data across all result sets is bounded by a built-in safety net. When a cap is hit, the affected result set's `truncated` field is set to `true`, the top-level response sets `truncated: true`, and a `notice` field returns guidance to refine the query in SQL (aggregate, filter, or paginate with `LIMIT`/`OFFSET`) rather than re-running it. The query is not cancelled, so any writes and later statements in a multi-statement query still complete, and the result set's `rows_affected` reports the true number of rows the query produced even when fewer are returned. The tool description also instructs agents to push computation (aggregation, filtering, sorting, joins) into SQL instead of fetching raw rows. This limiting applies only to the MCP tool, not to CLI commands. + **Example Response:** ```json { @@ -375,6 +377,7 @@ Execute a SQL query on a service database. - `columns` includes both the column name and PostgreSQL data type for each column - Empty `rows` array for commands that don't return rows (INSERT, UPDATE, DELETE, DDL commands) - For parity with `tiger db connect` command, supports custom roles and connection pooling +- `truncated` (per result set and top-level) and `notice` are present only when results were capped; see "Result limiting" above ### High-Availability Management