From edaa89e5c2532aa9ed853585d7322468cc919d09 Mon Sep 17 00:00:00 2001 From: YP Su Date: Wed, 3 Jun 2026 17:56:06 +0000 Subject: [PATCH 1/4] Add AWS S3 and Aurora tools to wb-mcp-server Adds 12 new MCP tools for AWS resource management: - S3: list_objects, read_file, write_file, copy - Aurora: query, list_tables, describe_table, resolve_connection - Resource lifecycle: create_s3_folder, create_aurora_database, create_s3_external_bucket, workspace_configure_aws Fixes s3_copy to use resource-name-based auth (matching other S3 tools) with cross-resource temp-file bridge. Fixes ensureAWSConfig to prefer the config file matching the current workspace UUID. Co-Authored-By: Claude Opus 4.6 (1M context) --- features/src/wb-mcp-server/main.go | 543 ++++++++++++++++++++++++++++- 1 file changed, 542 insertions(+), 1 deletion(-) diff --git a/features/src/wb-mcp-server/main.go b/features/src/wb-mcp-server/main.go index 9a7e740d..fc1669c1 100644 --- a/features/src/wb-mcp-server/main.go +++ b/features/src/wb-mcp-server/main.go @@ -1570,6 +1570,158 @@ WORKFLOW: Required: []string{"hierarchy", "operator"}, }, }, + + // --- Aurora Database Tools --- + { + Name: "aurora_query", + Description: "Execute a SQL query against an Aurora PostgreSQL database. Handles IAM authentication automatically. Returns results as CSV.", + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]interface{}{ + "resourceName": map[string]interface{}{"type": "string", "description": "Workspace resource name for the Aurora database"}, + "query": map[string]interface{}{"type": "string", "description": "SQL query to execute"}, + "accessMode": map[string]interface{}{"type": "string", "enum": []string{"READ_ONLY", "WRITE_READ"}, "description": "Access mode (default: READ_ONLY)"}, + }, + Required: []string{"resourceName", "query"}, + }, + }, + { + Name: "aurora_list_tables", + Description: "List all tables in an Aurora PostgreSQL database.", + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]interface{}{ + "resourceName": map[string]interface{}{"type": "string", "description": "Workspace resource name for the Aurora database"}, + "schema": map[string]interface{}{"type": "string", "description": "Schema name (default: public)"}, + }, + Required: []string{"resourceName"}, + }, + }, + { + Name: "aurora_describe_table", + Description: "Get column names, data types, and constraints for a table in an Aurora PostgreSQL database.", + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]interface{}{ + "resourceName": map[string]interface{}{"type": "string", "description": "Workspace resource name for the Aurora database"}, + "tableName": map[string]interface{}{"type": "string", "description": "Table name to describe"}, + "schema": map[string]interface{}{"type": "string", "description": "Schema name (default: public)"}, + }, + Required: []string{"resourceName", "tableName"}, + }, + }, + { + Name: "aurora_resolve_connection", + Description: "Get a fresh connection string for an Aurora database with embedded IAM auth token. Use when connecting from Python, R, or other tools. Token is valid for 15 minutes.", + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]interface{}{ + "resourceName": map[string]interface{}{"type": "string", "description": "Workspace resource name for the Aurora database"}, + "accessMode": map[string]interface{}{"type": "string", "enum": []string{"READ_ONLY", "WRITE_READ"}, "description": "Access mode (default: READ_ONLY)"}, + }, + Required: []string{"resourceName"}, + }, + }, + + // --- S3 Storage Tools --- + { + Name: "s3_list_objects", + Description: "List files and folders in an S3 storage resource.", + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]interface{}{ + "resourceName": map[string]interface{}{"type": "string", "description": "Workspace resource name (e.g., 'my_s3_folder')"}, + "path": map[string]interface{}{"type": "string", "description": "Sub-path within the resource prefix (optional)"}, + "recursive": map[string]interface{}{"type": "boolean", "description": "List recursively (default: false)"}, + }, + Required: []string{"resourceName"}, + }, + }, + { + Name: "s3_read_file", + Description: "Read contents of a file from S3. Returns text content. For large or binary files, returns size info instead.", + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]interface{}{ + "resourceName": map[string]interface{}{"type": "string", "description": "Workspace resource name"}, + "path": map[string]interface{}{"type": "string", "description": "File path relative to resource prefix"}, + "maxBytes": map[string]interface{}{"type": "integer", "description": "Max bytes to read (default: 1048576 = 1MB)"}, + }, + Required: []string{"resourceName", "path"}, + }, + }, + { + Name: "s3_write_file", + Description: "Write content to a file in S3. Creates or overwrites the file at the specified path.", + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]interface{}{ + "resourceName": map[string]interface{}{"type": "string", "description": "Workspace resource name"}, + "path": map[string]interface{}{"type": "string", "description": "Destination path relative to resource prefix"}, + "content": map[string]interface{}{"type": "string", "description": "File content to write"}, + }, + Required: []string{"resourceName", "path", "content"}, + }, + }, + { + Name: "s3_copy", + Description: "Copy files within or between S3 storage resources. Use sourceResource/destResource (preferred) to auto-resolve paths and credentials, or sourceUri/destUri for raw S3 URIs. For cross-resource copies between different credential scopes, uses a temp file to bridge.", + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]interface{}{ + "sourceResource": map[string]interface{}{"type": "string", "description": "Source workspace resource name (e.g., 'my_s3_folder'). Auto-resolves S3 path and credentials."}, + "sourcePath": map[string]interface{}{"type": "string", "description": "File path within source resource. Used with sourceResource."}, + "destResource": map[string]interface{}{"type": "string", "description": "Destination workspace resource name. Auto-resolves S3 path and credentials."}, + "destPath": map[string]interface{}{"type": "string", "description": "File path within destination resource. Used with destResource."}, + "sourceUri": map[string]interface{}{"type": "string", "description": "Full source S3 URI (fallback when sourceResource is not provided)"}, + "destUri": map[string]interface{}{"type": "string", "description": "Full destination S3 URI (fallback when destResource is not provided)"}, + "recursive": map[string]interface{}{"type": "boolean", "description": "Copy recursively (default: false)"}, + }, + }, + }, + + // --- AWS Resource Lifecycle Tools --- + { + Name: "resource_create_aurora_database", + Description: "Create an AWS Aurora PostgreSQL database in the workspace.", + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]interface{}{ + "name": map[string]interface{}{"type": "string", "description": "Resource name in workspace"}, + "databaseName": map[string]interface{}{"type": "string", "description": "PostgreSQL database name"}, + "description": map[string]interface{}{"type": "string", "description": "Resource description"}, + }, + Required: []string{"name", "databaseName"}, + }, + }, + { + Name: "resource_create_s3_folder", + Description: "Create an S3 storage folder in the workspace.", + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]interface{}{ + "name": map[string]interface{}{"type": "string", "description": "Resource name in workspace"}, + "folderName": map[string]interface{}{"type": "string", "description": "S3 folder name"}, + "description": map[string]interface{}{"type": "string", "description": "Resource description"}, + }, + Required: []string{"name", "folderName"}, + }, + }, + { + Name: "resource_create_s3_external_bucket", + Description: "Register an external S3 bucket as a workspace resource.", + InputSchema: InputSchema{ + Type: "object", + Properties: map[string]interface{}{ + "name": map[string]interface{}{"type": "string", "description": "Resource name in workspace"}, + "bucketName": map[string]interface{}{"type": "string", "description": "External S3 bucket name"}, + "account": map[string]interface{}{"type": "string", "description": "AWS account number"}, + "region": map[string]interface{}{"type": "string", "description": "AWS region"}, + "description": map[string]interface{}{"type": "string", "description": "Resource description"}, + }, + Required: []string{"name", "bucketName", "account", "region"}, + }, + }, } func initializeConfig() error { @@ -1827,6 +1979,107 @@ func executeWbCommand(args []string) (string, error) { return string(output), err } +func executeShellCommand(name string, args ...string) (string, error) { + cmd := exec.Command(name, args...) + output, err := cmd.CombinedOutput() + return string(output), err +} + +func getAuroraConnString(resourceName, accessMode string) (string, error) { + if accessMode == "" { + accessMode = "READ_ONLY" + } + args := []string{"resource", "resolve", "--id=" + resourceName, + "--access-mode", accessMode, "--include-password"} + connStr, err := executeWbCommand(args) + if err != nil { + return "", fmt.Errorf("failed to resolve Aurora connection: %w\n%s", err, connStr) + } + return strings.TrimSpace(connStr), nil +} + +func executeAuroraQuery(resourceName, accessMode, query string) (string, error) { + connStr, err := getAuroraConnString(resourceName, accessMode) + if err != nil { + return "", err + } + return executeShellCommand("psql", connStr, "--csv", "-c", query) +} + +func getS3ResourcePath(resourceName string) (string, error) { + descOutput, err := executeWbCommand([]string{"resource", "describe", "--id=" + resourceName, "--format=json"}) + if err != nil { + return "", fmt.Errorf("failed to describe resource: %w\n%s", err, descOutput) + } + var res map[string]interface{} + if err := json.Unmarshal([]byte(descOutput), &res); err != nil { + return "", fmt.Errorf("failed to parse resource JSON: %w", err) + } + bucket, _ := res["bucketName"].(string) + prefix, _ := res["prefix"].(string) + if bucket == "" { + return "", fmt.Errorf("resource %s has no bucketName — is it an S3 resource?", resourceName) + } + s3Path := "s3://" + bucket + "/" + if prefix != "" { + s3Path += prefix + if !strings.HasSuffix(s3Path, "/") { + s3Path += "/" + } + } + return s3Path, nil +} + +func ensureAWSConfig() string { + // Look for existing AWS config generated by wb workspace configure-aws + home, _ := os.UserHomeDir() + wbDir := home + "/.workbench/aws" + entries, err := os.ReadDir(wbDir) + if err == nil { + // Prefer config matching the current workspace UUID + if cachedWorkspaceUUID != "" { + target := cachedWorkspaceUUID + ".conf" + for _, e := range entries { + if e.Name() == target { + return wbDir + "/" + e.Name() + } + } + } + // Fall back to any .conf file + for _, e := range entries { + if strings.HasSuffix(e.Name(), ".conf") { + return wbDir + "/" + e.Name() + } + } + } + // Try to generate it + out, err := executeWbCommand([]string{"workspace", "configure-aws"}) + if err != nil { + return "" + } + // Parse "export AWS_CONFIG_FILE=..." from output + for _, line := range strings.Split(out, "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "export AWS_CONFIG_FILE=") { + return strings.TrimPrefix(line, "export AWS_CONFIG_FILE=") + } + } + return "" +} + +func executeAWSCommand(profile string, args ...string) (string, error) { + configFile := ensureAWSConfig() + cmd := exec.Command("aws", args...) + if configFile != "" { + cmd.Env = append(os.Environ(), "AWS_CONFIG_FILE="+configFile) + } + if profile != "" { + cmd.Args = append(cmd.Args, "--profile", profile) + } + output, err := cmd.CombinedOutput() + return string(output), err +} + func requireString(args map[string]interface{}, key string) (string, error) { val, ok := args[key] if !ok || val == nil { @@ -3369,12 +3622,300 @@ func handleCallTool(params CallToolParams) CallToolResult { } output, err = executeWbCommand(append([]string{"git"}, strings.Fields(command)...)) + // --- Aurora Database Tools --- + case "aurora_query": + resourceName, reqErr := requireString(params.Arguments, "resourceName") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + query, reqErr := requireString(params.Arguments, "query") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + accessMode, _ := params.Arguments["accessMode"].(string) + if accessMode == "" { + accessMode = "READ_ONLY" + } + output, err = executeAuroraQuery(resourceName, accessMode, query) + + case "aurora_list_tables": + resourceName, reqErr := requireString(params.Arguments, "resourceName") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + schema, _ := params.Arguments["schema"].(string) + if schema == "" { + schema = "public" + } + query := fmt.Sprintf("SELECT tablename FROM pg_tables WHERE schemaname = '%s' ORDER BY tablename;", schema) + output, err = executeAuroraQuery(resourceName, "READ_ONLY", query) + + case "aurora_describe_table": + resourceName, reqErr := requireString(params.Arguments, "resourceName") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + tableName, reqErr := requireString(params.Arguments, "tableName") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + schema, _ := params.Arguments["schema"].(string) + if schema == "" { + schema = "public" + } + query := fmt.Sprintf("SELECT column_name, data_type, is_nullable, column_default FROM information_schema.columns WHERE table_schema = '%s' AND table_name = '%s' ORDER BY ordinal_position;", schema, tableName) + output, err = executeAuroraQuery(resourceName, "READ_ONLY", query) + + case "aurora_resolve_connection": + resourceName, reqErr := requireString(params.Arguments, "resourceName") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + accessMode, _ := params.Arguments["accessMode"].(string) + if accessMode == "" { + accessMode = "READ_ONLY" + } + connStr, connErr := getAuroraConnString(resourceName, accessMode) + if connErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + connErr.Error()}}, IsError: true} + } + // Parse the libpq connection string into JSON for convenience + fields := map[string]string{} + for _, part := range strings.Fields(connStr) { + kv := strings.SplitN(part, "=", 2) + if len(kv) == 2 { + fields[kv[0]] = kv[1] + } + } + result := map[string]interface{}{ + "connectionString": connStr, + "host": fields["host"], + "port": fields["port"], + "database": fields["dbname"], + "user": fields["user"], + "sslmode": fields["sslmode"], + "tokenExpiresIn": "15 minutes", + } + jsonBytes, _ := json.MarshalIndent(result, "", " ") + output = string(jsonBytes) + + // --- S3 Storage Tools --- + case "s3_list_objects": + resourceName, reqErr := requireString(params.Arguments, "resourceName") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + s3Path, pathErr := getS3ResourcePath(resourceName) + if pathErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + pathErr.Error()}}, IsError: true} + } + if subPath, ok := params.Arguments["path"].(string); ok && subPath != "" { + s3Path += subPath + } + args := []string{"s3", "ls", s3Path} + if recursive, ok := params.Arguments["recursive"].(bool); ok && recursive { + args = append(args, "--recursive") + } + output, err = executeAWSCommand(resourceName, args...) + + case "s3_read_file": + resourceName, reqErr := requireString(params.Arguments, "resourceName") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + filePath, reqErr := requireString(params.Arguments, "path") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + s3Path, pathErr := getS3ResourcePath(resourceName) + if pathErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + pathErr.Error()}}, IsError: true} + } + s3Path += filePath + + maxBytes := 1048576 // 1MB default + if mb, ok := params.Arguments["maxBytes"].(float64); ok && mb > 0 { + maxBytes = int(mb) + } + + // Check file size first + headOutput, headErr := executeAWSCommand(resourceName, "s3api", "head-object", "--bucket", "", "--key", "") + _ = headOutput + _ = headErr + + // Stream the file content, limited by maxBytes + configFile := ensureAWSConfig() + cmd := exec.Command("aws", "s3", "cp", s3Path, "-", "--profile", resourceName) + if configFile != "" { + cmd.Env = append(os.Environ(), "AWS_CONFIG_FILE="+configFile) + } + outBytes, readErr := cmd.CombinedOutput() + if readErr != nil { + err = readErr + output = string(outBytes) + } else if len(outBytes) > maxBytes { + output = string(outBytes[:maxBytes]) + fmt.Sprintf("\n\n--- TRUNCATED (showing %d of %d bytes) ---", maxBytes, len(outBytes)) + } else { + output = string(outBytes) + } + + case "s3_write_file": + resourceName, reqErr := requireString(params.Arguments, "resourceName") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + filePath, reqErr := requireString(params.Arguments, "path") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + content, reqErr := requireString(params.Arguments, "content") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + s3Path, pathErr := getS3ResourcePath(resourceName) + if pathErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + pathErr.Error()}}, IsError: true} + } + s3Path += filePath + + // Write content to temp file, then upload + tmpFile, tmpErr := os.CreateTemp("", "s3-write-*") + if tmpErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error creating temp file: " + tmpErr.Error()}}, IsError: true} + } + tmpPath := tmpFile.Name() + defer os.Remove(tmpPath) + tmpFile.WriteString(content) + tmpFile.Close() + + output, err = executeAWSCommand(resourceName, "s3", "cp", tmpPath, s3Path) + + case "s3_copy": + // Resolve source: prefer resource name, fall back to raw URI + sourceResource, _ := params.Arguments["sourceResource"].(string) + sourcePath, _ := params.Arguments["sourcePath"].(string) + sourceUri, _ := params.Arguments["sourceUri"].(string) + sourceProfile := sourceResource + if sourceResource != "" { + resolved, pathErr := getS3ResourcePath(sourceResource) + if pathErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error resolving source resource: " + pathErr.Error()}}, IsError: true} + } + sourceUri = resolved + sourcePath + } + if sourceUri == "" { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: provide either sourceResource or sourceUri"}}, IsError: true} + } + + // Resolve dest: prefer resource name, fall back to raw URI + destResource, _ := params.Arguments["destResource"].(string) + destPath, _ := params.Arguments["destPath"].(string) + destUri, _ := params.Arguments["destUri"].(string) + destProfile := destResource + if destResource != "" { + resolved, pathErr := getS3ResourcePath(destResource) + if pathErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error resolving dest resource: " + pathErr.Error()}}, IsError: true} + } + destUri = resolved + destPath + } + if destUri == "" { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: provide either destResource or destUri"}}, IsError: true} + } + + recursive, _ := params.Arguments["recursive"].(bool) + + // Determine if we need a two-step copy (different profiles) + if sourceProfile != "" && destProfile != "" && sourceProfile != destProfile { + // Cross-resource copy: download to temp, then upload + tmpFile, tmpErr := os.CreateTemp("", "s3-copy-*") + if tmpErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error creating temp file: " + tmpErr.Error()}}, IsError: true} + } + tmpPath := tmpFile.Name() + tmpFile.Close() + defer os.Remove(tmpPath) + + dlArgs := []string{"s3", "cp", sourceUri, tmpPath} + if recursive { + dlArgs = append(dlArgs, "--recursive") + } + dlOutput, dlErr := executeAWSCommand(sourceProfile, dlArgs...) + if dlErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error downloading from source: " + dlOutput}}, IsError: true} + } + + ulArgs := []string{"s3", "cp", tmpPath, destUri} + if recursive { + ulArgs = append(ulArgs, "--recursive") + } + output, err = executeAWSCommand(destProfile, ulArgs...) + } else { + // Same profile or one side is raw URI: single-step copy + profile := sourceProfile + if profile == "" { + profile = destProfile + } + args := []string{"s3", "cp", sourceUri, destUri} + if recursive { + args = append(args, "--recursive") + } + output, err = executeAWSCommand(profile, args...) + } + + // --- AWS Resource Lifecycle Tools --- + case "resource_create_aurora_database": + name, reqErr := requireString(params.Arguments, "name") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + dbName, reqErr := requireString(params.Arguments, "databaseName") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + args := []string{"resource", "create", "aurora-database", "--name=" + name, "--database-name=" + dbName} + if desc, ok := params.Arguments["description"].(string); ok && desc != "" { + args = append(args, "--description="+desc) + } + output, err = executeWbCommand(args) + + case "resource_create_s3_folder": + name, reqErr := requireString(params.Arguments, "name") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + folderName, reqErr := requireString(params.Arguments, "folderName") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + args := []string{"resource", "create", "s3-storage-folder", "--name=" + name, "--folder-name=" + folderName} + if desc, ok := params.Arguments["description"].(string); ok && desc != "" { + args = append(args, "--description="+desc) + } + output, err = executeWbCommand(args) + + case "resource_create_s3_external_bucket": + vals, reqErr := requireStrings(params.Arguments, "name", "bucketName", "account", "region") + if reqErr != nil { + return CallToolResult{Content: []ContentItem{{Type: "text", Text: "Error: " + reqErr.Error()}}, IsError: true} + } + args := []string{"resource", "create", "s3-external-bucket", + "--name=" + vals[0], "--bucket-name=" + vals[1], "--account=" + vals[2], "--region=" + vals[3]} + if desc, ok := params.Arguments["description"].(string); ok && desc != "" { + args = append(args, "--description="+desc) + } + output, err = executeWbCommand(args) + default: return CallToolResult{Content: []ContentItem{{Type: "text", Text: fmt.Sprintf("Unknown tool: %s", params.Name)}}, IsError: true} } if err != nil { - return CallToolResult{Content: []ContentItem{{Type: "text", Text: fmt.Sprintf("Error: %s", err.Error())}}, IsError: true} + errMsg := fmt.Sprintf("Error: %s", err.Error()) + if output != "" { + errMsg += "\n" + output + } + return CallToolResult{Content: []ContentItem{{Type: "text", Text: errMsg}}, IsError: true} } return CallToolResult{Content: []ContentItem{{Type: "text", Text: output}}, IsError: false} } From fa910e36d9e6fbf4b7382e21ce35dc9ccef50a26 Mon Sep 17 00:00:00 2001 From: Youpeng Date: Thu, 4 Jun 2026 17:58:09 +0000 Subject: [PATCH 2/4] Pre-install web frameworks and expose dashboard port - Dockerfile: add fastapi, uvicorn, flask, flask-cors, plotly, pandas, boto3, psycopg2-binary so dashboard apps work without pip install - docker-compose: expose port 8080 for dashboard/API serving - install.sh: bake read-only MCP permission allowlist into Claude settings Co-Authored-By: Claude Opus 4.6 (1M context) --- features/src/wb-mcp-server/install.sh | 62 +++++++++++++++++++ src/workbench-jupyter-with-llm/Dockerfile | 8 ++- .../docker-compose.yaml | 1 + 3 files changed, 70 insertions(+), 1 deletion(-) diff --git a/features/src/wb-mcp-server/install.sh b/features/src/wb-mcp-server/install.sh index 9e650216..8ac71149 100755 --- a/features/src/wb-mcp-server/install.sh +++ b/features/src/wb-mcp-server/install.sh @@ -198,6 +198,68 @@ fi chown "${USERNAME}:" "${CLAUDE_SETTINGS}" echo "Configured Claude Code MCP server in ${CLAUDE_SETTINGS}" +# Configure Claude Code permissions: auto-allow read-only MCP tools +CLAUDE_DIR="${USER_HOME_DIR}/.claude" +CLAUDE_LOCAL_SETTINGS="${CLAUDE_DIR}/settings.local.json" +mkdir -p "${CLAUDE_DIR}" +cat > "${CLAUDE_LOCAL_SETTINGS}" <<'PERMS_EOF' +{ + "permissions": { + "allow": [ + "mcp__wb__aurora_query", + "mcp__wb__aurora_list_tables", + "mcp__wb__aurora_describe_table", + "mcp__wb__aurora_resolve_connection", + "mcp__wb__wb_status", + "mcp__wb__workspace_get", + "mcp__wb__workspace_list_resources", + "mcp__wb__workspace_list_data_collections", + "mcp__wb__workspace_list_all", + "mcp__wb__workspace_list_users", + "mcp__wb__resource_list_tree", + "mcp__wb__resource_check_access", + "mcp__wb__resource_open_console", + "mcp__wb__s3_list_objects", + "mcp__wb__s3_read_file", + "mcp__wb__app_list", + "mcp__wb__app_get_url", + "mcp__wb__underlay_list", + "mcp__wb__underlay_get_schema", + "mcp__wb__underlay_list_entities", + "mcp__wb__underlay_get_entity", + "mcp__wb__underlay_list_criteria_selectors", + "mcp__wb__folder_list_tree", + "mcp__wb__auth_status", + "mcp__wb__resolve", + "mcp__wb__server_status", + "mcp__wb__server_list", + "mcp__wb__study_list", + "mcp__wb__study_list_cohorts", + "mcp__wb__data_query_hints", + "mcp__wb__data_sample_instances", + "mcp__wb__cohort_count_instances", + "mcp__wb__export_list_models", + "mcp__wb__export_describe", + "mcp__wb__export_preview", + "mcp__wb__workflow_list", + "mcp__wb__workflow_describe", + "mcp__wb__workflow_job_list", + "mcp__wb__workflow_job_describe", + "mcp__wb__group_list", + "mcp__wb__group_describe", + "mcp__wb__pod_list", + "mcp__wb__pod_describe", + "mcp__wb__organization_list", + "mcp__wb__version", + "mcp__wb__wb_workspace_list", + "mcp__wb__platform_list_data_collections" + ] + } +} +PERMS_EOF +chown -R "${USERNAME}:" "${CLAUDE_DIR}" +echo "Configured Claude Code read-only MCP permissions in ${CLAUDE_LOCAL_SETTINGS}" + # Configure Gemini CLI MCP server via settings file GEMINI_SETTINGS="${USER_HOME_DIR}/.gemini/settings.json" mkdir -p "${USER_HOME_DIR}/.gemini" diff --git a/src/workbench-jupyter-with-llm/Dockerfile b/src/workbench-jupyter-with-llm/Dockerfile index 3db1ef60..5f3a715e 100644 --- a/src/workbench-jupyter-with-llm/Dockerfile +++ b/src/workbench-jupyter-with-llm/Dockerfile @@ -1,5 +1,11 @@ -FROM us-west2-docker.pkg.dev/shared-pub-buckets-94mvrf/workbench-artifacts/app-workbench-jupyter@sha256:62089d6cef2015e08361928c6bb6ae003afd0800a3e682a536171b3bcb0765b1 +FROM us-west2-docker.pkg.dev/shared-pub-buckets-94mvrf/workbench-artifacts/app-workbench-jupyter@sha256:62089d6cef2015e08361928c6bb6ae003afd0800a3e682a536171b3bcb0765b1 # Install jupyter extensions RUN --mount=type=bind,from=jupyter-extension-builder,source=/dist,target=/tmp/extensions \ /tmp/extensions/setup.sh + +# Pre-install web framework and data visualization libraries +RUN pip install --no-cache-dir \ + fastapi uvicorn[standard] \ + flask flask-cors \ + plotly pandas boto3 psycopg2-binary diff --git a/src/workbench-jupyter-with-llm/docker-compose.yaml b/src/workbench-jupyter-with-llm/docker-compose.yaml index 18f19128..6310379d 100644 --- a/src/workbench-jupyter-with-llm/docker-compose.yaml +++ b/src/workbench-jupyter-with-llm/docker-compose.yaml @@ -13,6 +13,7 @@ services: - .:/workspace:cached ports: - "8888:8888" + - "8080:8080" networks: - app-network cap_add: From 9b93851949b0467be6d13b505a28d678e749bdf0 Mon Sep 17 00:00:00 2001 From: Youpeng Date: Fri, 5 Jun 2026 14:08:01 +0000 Subject: [PATCH 3/4] Update DASHBOARD_BUILDER skill for AWS + reduced prompts - Use MCP tools (app_get_url) instead of wb app list for app UUID - Mark packages as pre-installed, skip pip install unless import fails - Fix Flask route decorators (leading slash required) - Clarify JS fetch relative path rule vs Flask route rule - Add Aurora IAM auth template and MCP-first query approach - Add S3 data dashboard template - Keep BigQuery template for GCP workspaces - Remove emoji from headers for cleaner output Co-Authored-By: Claude Opus 4.6 (1M context) --- .../llm-context/skills/DASHBOARD_BUILDER.md | 682 +++++------------- 1 file changed, 167 insertions(+), 515 deletions(-) diff --git a/features/src/llm-context/skills/DASHBOARD_BUILDER.md b/features/src/llm-context/skills/DASHBOARD_BUILDER.md index 9c5f19f1..a6cb4f7d 100644 --- a/features/src/llm-context/skills/DASHBOARD_BUILDER.md +++ b/features/src/llm-context/skills/DASHBOARD_BUILDER.md @@ -2,7 +2,7 @@ **Build interactive web apps, dashboards, and visualizations that run on a port in Workbench.** -> **Triggers:** +> **Triggers:** > - "Create a dashboard", "visualize data", "build charts" > - "Run a Flask/Streamlit/FastAPI app" > - "Display data in the browser", "interactive UI" @@ -10,7 +10,7 @@ --- -## 🌐 Workbench Proxy & Web Apps Best Practices +## Workbench Proxy & Web Apps Best Practices ### Proxy URL Format @@ -19,106 +19,65 @@ All web apps in Workbench are accessed via: https://workbench.verily.com/app/[APP_UUID]/proxy/[PORT]/[PATH] ``` -### ⚠️ How to Get the App UUID (CRITICAL) +### How to Get the App UUID (CRITICAL) **You MUST automatically get the app UUID - NEVER ask the user for it.** +**Option A (preferred):** Use MCP tools — no shell needed, no permission prompt: +``` +mcp__wb__app_get_url(appId=) +``` +To find the app resource name, use `mcp__wb__workspace_list_resources` and filter for +EC2 instances (`AWS_EC2_INSTANCE`) or GCE instances owned by the current user. + +**Option B:** Use the CLI: ```bash -# Run this command and use the output: wb app list --format=json | jq -r '.[] | select(.status == "RUNNING") | .id' | head -1 ``` +Note: this can intermittently return 401 on some workspaces. If it fails, fall back to Option A. -**⚡ LLM INSTRUCTION:** When constructing dashboard/proxy URLs: -1. First run the command above to get the running app UUID +**LLM INSTRUCTION:** When constructing dashboard/proxy URLs: +1. Use one of the methods above to get the running app UUID 2. Use that actual UUID in the URL you provide 3. Do NOT use placeholders like `[APP_UUID]` in your final response 4. Do NOT ask the user to find/replace the UUID themselves -### ✅ Correct URL Examples +### Correct URL Examples ``` https://workbench.verily.com/app/abc123-def456-789/proxy/8080/ https://workbench.verily.com/app/abc123-def456-789/proxy/8501/index.html -https://workbench.verily.com/app/abc123-def456-789/proxy/8000/dashboard.html ``` -### ❌ WRONG URL Formats (These WILL fail) +### WRONG URL Formats (These WILL fail) ``` -https://abc123-def456.workbench-app.verily.com/ ← WRONG: "Bad Request" error -https://workbench-app.verily.com/abc123-def456/ ← WRONG: Invalid domain -http://localhost:8080/ ← WRONG: Not accessible externally -https://abc123-def456/workbench.verily.com/ ← WRONG: Reversed format -file:///home/jupyter/dashboard.html ← WRONG: JavaScript blocked +https://abc123-def456.workbench-app.verily.com/ <- WRONG: "Bad Request" error +http://localhost:8080/ <- WRONG: Not accessible externally +file:///home/jupyter/dashboard.html <- WRONG: JavaScript blocked ``` -### ⚠️ Common Issue: JavaScript API Calls Failing +### Common Issue: JavaScript API Calls Failing -**Problem:** JavaScript using absolute paths fails through Workbench proxy +**Problem:** JavaScript using absolute paths fails through Workbench proxy. -**Symptoms:** -- Dashboard loads but shows no data -- Charts remain empty with "-" placeholders -- Browser console shows 404 errors for API calls -- Flask/server logs show requests for `/` but NOT `/api/*` endpoints - -### ✅ Solution: Use Relative Paths (TESTED & CONFIRMED) - -**Always use relative paths (no leading `/`) for fetch/AJAX calls:** +Note: This rule applies to **JavaScript `fetch()` calls only**. Flask/FastAPI route +decorators still require a leading slash (e.g., `@app.route('/api/data')`). ```javascript -// ✅ CORRECT - relative paths work through proxy +// CORRECT - relative paths work through proxy fetch('api/metadata') fetch('api/data?filter=value') -// ❌ WRONG - absolute paths fail -fetch('/api/metadata') +// WRONG - absolute paths fail through proxy +fetch('/api/metadata') fetch('/api/data?filter=value') ``` -### Why Absolute Paths Fail - +**Why:** ``` -User visits: https://workbench.verily.com/app/UUID/proxy/8080/ - -Absolute path: fetch('/api/data') - → Browser resolves to: https://workbench.verily.com/api/data ❌ (404!) - -Relative path: fetch('api/data') - → Browser resolves to: https://workbench.verily.com/app/UUID/proxy/8080/api/data ✅ +Absolute: fetch('/api/data') -> https://workbench.verily.com/api/data (404) +Relative: fetch('api/data') -> https://workbench.verily.com/app/UUID/proxy/8080/api/data (OK) ``` -### Alternative: Embed Data in HTML (For Static Dashboards) - -If you don't need dynamic filtering, embed data directly in the template: - -**Python (Flask):** -```python -@app.route('/') -def index(): - data = get_data_from_bigquery() - return render_template('dashboard.html', data_json=json.dumps(data)) -``` - -**HTML Template:** -```html - -``` - -**When to use:** Static dashboards, large datasets that don't change, or when filters can be client-side only. - -### Testing Checklist - -Before deploying any web app: - -- [ ] **Relative paths** - All `fetch()` calls use `'api/...'` not `'/api/...'` -- [ ] **Test locally** - `curl http://localhost:PORT/api/endpoint` returns data -- [ ] **Server logs** - Verify API requests arrive: `tail -f server.log` -- [ ] **Browser DevTools** - Network tab shows 200 status for API calls -- [ ] **App UUID obtained** - Not using placeholder `[APP_UUID]` - --- ## Workflow @@ -126,41 +85,33 @@ Before deploying any web app: ### Step 1: Understand Requirements Ask the user: -1. **Data source?** BigQuery table, CSV in bucket, or local file? +1. **Data source?** Aurora database, S3 file (CSV, Parquet), BigQuery, or local file? 2. **Visualizations?** Charts (bar, line, scatter), tables, filters? 3. **Interactivity?** Static display or dynamic filtering? ### Step 2: Auto-Detect Environment -**Always run these commands first:** - -```bash -# Get app UUID (REQUIRED for final URL) -APP_UUID=$(wb app list --format=json | jq -r '.[] | select(.status == "RUNNING") | .id' | head -1) -echo "App UUID: $APP_UUID" - -# Verify Python -python3 --version +Get the app UUID using MCP tools (see "How to Get the App UUID" above). +**Prefer MCP tools over `wb app list`** to avoid permission prompts. -# Check working directory -pwd -``` +### Step 3: Check Dependencies -### Step 3: Install Dependencies +The following packages are **pre-installed** in the Workbench Jupyter+LLM image: +`fastapi`, `uvicorn`, `flask`, `flask-cors`, `plotly`, `pandas`, `boto3`, `psycopg2-binary` +**Do NOT run `pip install` unless a specific import fails.** To verify: ```bash -pip install flask flask-cors pandas plotly google-cloud-bigquery db-dtypes +python3 -c "import flask; import fastapi; import plotly; print('OK')" ``` - -> **Note:** `db-dtypes` is required for BigQuery to properly convert data types for pandas. +Only install if the check above fails. ### Step 4: Create Dashboard Structure ``` dashboard/ -├── app.py # Flask server +├── app.py # Flask/FastAPI server ├── templates/ -│ └── index.html # Dashboard HTML +│ └── index.html # Dashboard HTML with Plotly.js └── static/ └── style.css # Optional styling ``` @@ -169,33 +120,81 @@ dashboard/ ## Working Templates -### Template 1: Simple BigQuery Dashboard +### Template 1: Aurora PostgreSQL Dashboard (AWS) + +Aurora in Workbench uses **IAM database authentication** — you cannot connect with a +static password. The correct flow is: + +1. Get temporary AWS credentials via `wb resource credentials` +2. Generate an IAM auth token via boto3 (token is valid for 15 minutes) +3. Connect with `sslmode='require'` — **SSL is mandatory** + +**Preferred: Use MCP tools for data queries** to avoid the IAM auth complexity entirely: +``` +mcp__wb__aurora_query(resourceName="my-db", query="SELECT * FROM table LIMIT 100") +mcp__wb__aurora_list_tables(resourceName="my-db") +mcp__wb__aurora_describe_table(resourceName="my-db", tableName="my_table") +``` + +Query via MCP, embed results in the template, and serve with Flask/FastAPI. +This avoids IAM auth in the app code entirely. + +**If live database queries are needed in the app:** + +```python +import json, subprocess, boto3, psycopg2, os + +def get_aurora_connection(resource_id, username): + result = subprocess.run( + ['wb', 'resource', 'credentials', + f'--id={resource_id}', '--scope=READ_ONLY', '--format=json'], + capture_output=True, text=True, check=True + ) + creds = json.loads(result.stdout) + + conn_str = os.environ.get(f'WORKBENCH_{resource_id.replace("-", "_")}', '') + host_part, _, dbname = conn_str.partition('/') + host, _, port = host_part.partition(':') + port = int(port) if port else 5432 + + session = boto3.Session( + aws_access_key_id=creds['AccessKeyId'], + aws_secret_access_key=creds['SecretAccessKey'], + aws_session_token=creds['SessionToken'], + region_name='us-west-2' + ) + token = session.client('rds').generate_db_auth_token( + DBHostname=host, Port=port, DBUsername=username, Region='us-west-2' + ) + return psycopg2.connect( + host=host, port=port, database=dbname, + user=username, password=token, + sslmode='require' + ) +``` + +### Template 2: S3 Data Dashboard (AWS) -**app.py:** ```python from flask import Flask, render_template, jsonify from flask_cors import CORS -from google.cloud import bigquery +import pandas as pd +import boto3 import os app = Flask(__name__) CORS(app) -# Cache for data _data_cache = None -def get_bigquery_data(): +def get_data_from_s3(): global _data_cache if _data_cache is not None: return _data_cache - - client = bigquery.Client() - query = """ - SELECT * - FROM `YOUR_PROJECT.YOUR_DATASET.YOUR_TABLE` - LIMIT 1000 - """ - df = client.query(query).to_dataframe() + bucket = os.environ.get('WORKBENCH_my_bucket', 'your-bucket-name') + s3 = boto3.client('s3') + obj = s3.get_object(Bucket=bucket, Key='path/to/data.csv') + df = pd.read_csv(obj['Body']) _data_cache = df.to_dict(orient='records') return _data_cache @@ -203,476 +202,129 @@ def get_bigquery_data(): def index(): return render_template('index.html') -@app.route('api/data') # NO leading slash! +@app.route('/api/data') def get_data(): try: - data = get_bigquery_data() - return jsonify(data) - except Exception as e: - return jsonify({"error": str(e)}), 500 - -@app.route('api/metadata') -def get_metadata(): - try: - data = get_bigquery_data() - if data: - return jsonify({ - "columns": list(data[0].keys()), - "row_count": len(data) - }) - return jsonify({"columns": [], "row_count": 0}) + return jsonify(get_data_from_s3()) except Exception as e: return jsonify({"error": str(e)}), 500 if __name__ == '__main__': - # CRITICAL: host='0.0.0.0' required for Workbench proxy access app.run(host='0.0.0.0', port=8080, debug=False, threaded=True) ``` -**templates/index.html:** -```html - - - - Data Dashboard - - - - -
-

📊 Data Dashboard

-
-

Dataset Info

-
Loading metadata...
-
-
-

Data Visualization

-
Loading chart...
-
-
-

Data Table

-
Loading data...
-
-
- - - - -``` +### Template 3: BigQuery Dashboard (GCP) ---- - -### Template 2: Multi-Chart Dashboard with Filters - -**app.py additions:** ```python -@app.route('api/data') -def get_data(): - # Get filter parameters - column = request.args.get('filter_column') - value = request.args.get('filter_value') - - data = get_bigquery_data() - - if column and value: - data = [row for row in data if str(row.get(column, '')) == value] - - return jsonify(data) - -@app.route('api/filters') -def get_filters(): - data = get_bigquery_data() - if not data: - return jsonify({}) - - # Get unique values for categorical columns - filters = {} - for col in data[0].keys(): - unique_values = list(set(str(row[col]) for row in data)) - if len(unique_values) < 50: # Only include if reasonable number - filters[col] = sorted(unique_values) - - return jsonify(filters) -``` - -**JavaScript filter implementation:** -```javascript -async function loadFilters() { - const response = await fetch('api/filters'); - const filters = await response.json(); - - const filterContainer = document.getElementById('filters'); - for (const [column, values] of Object.entries(filters)) { - const select = document.createElement('select'); - select.id = `filter-${column}`; - select.innerHTML = `` + - values.map(v => ``).join(''); - select.onchange = () => refreshData(); - - filterContainer.appendChild(document.createTextNode(column + ': ')); - filterContainer.appendChild(select); - } -} - -async function refreshData() { - const params = new URLSearchParams(); - document.querySelectorAll('select[id^="filter-"]').forEach(select => { - if (select.value) { - params.set('filter_column', select.id.replace('filter-', '')); - params.set('filter_value', select.value); - } - }); - - const response = await fetch(`api/data?${params}`); // Still relative! - const data = await response.json(); - updateCharts(data); -} -``` - ---- - -## Step 5: Test Locally - -**Before starting the server, test your setup:** - -```bash -# Start server in background -cd dashboard -python3 app.py & -sleep 2 - -# Test endpoints locally -echo "Testing root..." -curl -s http://localhost:8080/ | head -5 - -echo "Testing API..." -curl -s http://localhost:8080/api/metadata | jq . - -echo "Testing data..." -curl -s http://localhost:8080/api/data | jq '.[0]' -``` - ---- - -## Step 6: Start Server & Provide URL - -```bash -# Get the app UUID -APP_UUID=$(wb app list --format=json | jq -r '.[] | select(.status == "RUNNING") | .id' | head -1) - -# Start server -cd dashboard -nohup python3 app.py > server.log 2>&1 & - -echo "Dashboard running at:" -echo "https://workbench.verily.com/app/${APP_UUID}/proxy/8080/" -``` - -**Always provide the complete, working URL to the user - never placeholders!** +from flask import Flask, render_template, jsonify +from flask_cors import CORS +from google.cloud import bigquery ---- +app = Flask(__name__) +CORS(app) -## ⚠️ Critical Flask Server Configuration +_data_cache = None -These settings are **REQUIRED** for Workbench dashboards to work: +def get_bigquery_data(): + global _data_cache + if _data_cache is not None: + return _data_cache + client = bigquery.Client() + query = "SELECT * FROM `project.dataset.table` LIMIT 1000" + df = client.query(query).to_dataframe() + _data_cache = df.to_dict(orient='records') + return _data_cache -### 1. Server MUST bind to 0.0.0.0 (NOT localhost) +@app.route('/') +def index(): + return render_template('index.html') -```python -# ❌ WRONG - proxy cannot reach your app -app.run(host='localhost', port=8080) -app.run(host='127.0.0.1', port=8080) +@app.route('/api/data') +def get_data(): + try: + return jsonify(get_bigquery_data()) + except Exception as e: + return jsonify({"error": str(e)}), 500 -# ✅ CORRECT - accessible through Workbench proxy -app.run(host='0.0.0.0', port=8080, debug=False, threaded=True) +if __name__ == '__main__': + app.run(host='0.0.0.0', port=8080, debug=False, threaded=True) ``` -**Why:** The Workbench proxy routes external requests to your app. If bound to localhost, the proxy cannot reach it. +### Alternative: Embed Data in HTML (For Static Dashboards) -### 2. Enable Threading for Concurrent Users +Query data via MCP or Python, then embed directly in the template. No API calls needed. ```python -app.run(host='0.0.0.0', port=8080, debug=False, threaded=True) +import json +@app.route('/') +def index(): + data = get_data() + return render_template('dashboard.html', data_json=json.dumps(data)) ``` -**Why:** Multiple users may access simultaneously. `threaded=True` allows concurrent request handling. - -### 3. Disable Debug Mode - -```python -# ❌ WRONG - security risk, auto-reload issues -app.run(debug=True) - -# ✅ CORRECT -app.run(debug=False) +```html + ``` -**Why:** Debug mode shouldn't be used in shared/production environments. - -### 4. Restarting Server After Code Changes +--- -Flask doesn't auto-reload when `debug=False`. After editing Python code: +## Critical Server Configuration -```bash -# Find and kill existing server -pkill -f "python3 app.py" -# Or: kill $(lsof -t -i :8080) +### REQUIRED settings for Workbench dashboards: -# Restart -python3 app.py & +```python +app.run( + host='0.0.0.0', # NOT localhost — proxy can't reach localhost + port=8080, # Match this to the port in your proxy URL + debug=False, # Security: don't use debug in shared environments + threaded=True # Allow concurrent users +) ``` -### 5. Browser Cache Issues - -If changes don't appear after restarting server: -- **Hard refresh:** `Ctrl+Shift+R` (Windows/Linux) or `Cmd+Shift+R` (Mac) -- Flask caches templates - server restart clears this - --- ## Troubleshooting -### Data doesn't load in browser +### No data showing -**1. Check paths in JavaScript:** -```javascript -// ❌ WRONG -fetch('/api/data') - -// ✅ CORRECT -fetch('api/data') -``` - -**2. Check server logs:** -```bash -tail -f server.log -# Or if running in foreground, check terminal output -``` - -**3. Test API directly:** -```bash -curl http://localhost:8080/api/data | jq '.[0]' -``` - -**4. Check browser DevTools:** -- Open Network tab -- Look for failed requests (red) -- Check the URL being requested +1. **Test API directly:** `curl http://localhost:8080/api/data | head -20` +2. **Check server logs:** `tail -f server.log` +3. **Check JS paths:** All `fetch()` must use relative paths (no leading `/`) ### Server won't start ```bash -# Check if port is in use lsof -i :8080 - -# Kill existing process kill $(lsof -t -i :8080) - -# Check Python errors -python3 app.py # Run in foreground to see errors -``` - -### BigQuery errors - -```bash -# Check authentication -gcloud auth list - -# Test BQ access -bq query --use_legacy_sql=false 'SELECT 1' - -# Check project -gcloud config get-value project +python3 app.py ``` -### Server not accessible through proxy (works locally, fails via URL) +### Aurora connection errors (AWS) -**Symptom:** `curl http://localhost:8080/` works, but Workbench URL fails +- `"PAM authentication failed"` -> not using IAM auth token as password +- `"pg_hba.conf rejects connection... no encryption"` -> missing `sslmode='require'` +- Consider using MCP tools (`mcp__wb__aurora_query`) instead of direct connections -**Cause:** Flask bound to `localhost` instead of `0.0.0.0` +### Server not accessible through proxy -**Fix:** +Ensure Flask/FastAPI binds to `0.0.0.0`, not `localhost`: ```python -# Change this: -app.run(host='localhost', port=8080) -# To this: app.run(host='0.0.0.0', port=8080) ``` -### Changes not reflected after editing code - -**Cause 1:** Server not restarted -```bash -pkill -f "python3 app.py" -python3 app.py & -``` - -**Cause 2:** Browser cache -- Hard refresh: `Ctrl+Shift+R` or `Cmd+Shift+R` - -### Gateway timeout - -**Causes:** -1. Server not running: `ps aux | grep app.py` -2. Wrong UUID in URL: `wb app list --format=json` -3. Server bound to localhost (see above) - ---- - -## Development Workflow (Recommended) - -1. **Build and test locally first** - ```bash - curl http://localhost:8080/ - curl http://localhost:8080/api/metadata - ``` - -2. **Check server logs for errors** - ```bash - tail -f server.log - ``` - -3. **Only then test through Workbench proxy URL** - -4. **Use browser DevTools (F12) → Network tab** to debug client-side issues - ---- - -## Common Pitfalls Checklist - -Before declaring the dashboard complete: - -- [ ] **Relative paths** - All `fetch()` calls use `'api/...'` not `'/api/...'` -- [ ] **Host is 0.0.0.0** - Not `localhost` or `127.0.0.1` -- [ ] **threaded=True** - For concurrent users -- [ ] **debug=False** - For security -- [ ] **App UUID obtained** - Not using placeholder `[APP_UUID]` -- [ ] **Server running** - Process is active (`ps aux | grep python`) -- [ ] **Port correct** - URL uses same port as `app.run(port=...)` -- [ ] **CORS enabled** - `CORS(app)` added for cross-origin requests -- [ ] **Data cached** - Avoid repeated BigQuery calls -- [ ] **Error handling** - API returns errors as JSON, not crashes -- [ ] **Tested locally** - `curl` tests pass before giving URL -- [ ] **Server logs checked** - API requests appear in logs - --- ## Quick Reference | Issue | Check | Fix | |-------|-------|-----| -| 404 on API | Path format | Remove leading `/` from fetch | +| 404 on API | JS path format | Remove leading `/` from `fetch()` | | CORS error | CORS setup | Add `CORS(app)` | -| Blank page | Server running? | `ps aux | grep python` | -| Data error | BigQuery auth | `gcloud auth list` | -| Wrong port | URL vs code | Match port in URL to `app.run()` | +| Blank page | Server running? | `ps aux \| grep python` | | Works locally, fails via URL | Host binding | Change `localhost` to `0.0.0.0` | | Gateway timeout | Server/UUID | Check server running + correct UUID | -| Address in use | Port conflict | `kill $(lsof -t -i :8080)` | -| Changes not showing | Cache/restart | Hard refresh + restart server | - ---- - -## Example Prompts This Skill Handles - -- "Create a dashboard showing data from my BigQuery table" -- "Build an interactive chart for analyzing patient demographics" -- "Visualize the CSV files in my bucket" -- "Make a web dashboard with filters for exploring data" -- "Display query results in a browser with charts" +| Aurora: PAM auth failed | IAM auth | Use `wb resource credentials` + boto3 token | +| Aurora: no encryption | SSL missing | Add `sslmode='require'` | From 8a8769a4d2e4f2070a39bd34c61fc554ab6f914a Mon Sep 17 00:00:00 2001 From: Peter Su Date: Tue, 23 Jun 2026 21:42:28 +0000 Subject: [PATCH 4/4] added back several lines in DASHBOARD_BUILDER.md that likely should not have been removed --- .../llm-context/skills/DASHBOARD_BUILDER.md | 219 +++++++++++++++++- 1 file changed, 217 insertions(+), 2 deletions(-) diff --git a/features/src/llm-context/skills/DASHBOARD_BUILDER.md b/features/src/llm-context/skills/DASHBOARD_BUILDER.md index a6cb4f7d..fe0cd9ec 100644 --- a/features/src/llm-context/skills/DASHBOARD_BUILDER.md +++ b/features/src/llm-context/skills/DASHBOARD_BUILDER.md @@ -105,6 +105,10 @@ python3 -c "import flask; import fastapi; import plotly; print('OK')" ``` Only install if the check above fails. +> **Note (GCP/BigQuery):** If using BigQuery with pandas, also install `db-dtypes` — it is +> required for proper data type conversion and causes cryptic errors if missing: +> `pip install --no-cache-dir db-dtypes` + ### Step 4: Create Dashboard Structure ``` @@ -216,7 +220,7 @@ if __name__ == '__main__': ### Template 3: BigQuery Dashboard (GCP) ```python -from flask import Flask, render_template, jsonify +from flask import Flask, render_template, jsonify, request from flask_cors import CORS from google.cloud import bigquery @@ -242,7 +246,22 @@ def index(): @app.route('/api/data') def get_data(): try: - return jsonify(get_bigquery_data()) + data = get_bigquery_data() + column = request.args.get('filter_column') + value = request.args.get('filter_value') + if column and value: + data = [row for row in data if str(row.get(column, '')) == value] + return jsonify(data) + except Exception as e: + return jsonify({"error": str(e)}), 500 + +@app.route('/api/metadata') +def get_metadata(): + try: + data = get_bigquery_data() + if data: + return jsonify({"columns": list(data[0].keys()), "row_count": len(data)}) + return jsonify({"columns": [], "row_count": 0}) except Exception as e: return jsonify({"error": str(e)}), 500 @@ -250,6 +269,9 @@ if __name__ == '__main__': app.run(host='0.0.0.0', port=8080, debug=False, threaded=True) ``` +> **Note:** Requires `google-cloud-bigquery` and `db-dtypes`. Install with: +> `pip install --no-cache-dir google-cloud-bigquery db-dtypes` + ### Alternative: Embed Data in HTML (For Static Dashboards) Query data via MCP or Python, then embed directly in the template. No API calls needed. @@ -269,6 +291,151 @@ renderChart(data); ``` +### Dashboard Frontend Template (index.html) + +Use this with any backend template above. All `fetch()` calls use **relative paths** (no leading `/`). + +```html + + + + Data Dashboard + + + + +
+

Data Dashboard

+
+

Dataset Info

+
Loading metadata...
+
+
+

Data Visualization

+
Loading chart...
+
+
+

Data Table

+
Loading data...
+
+
+ + + + +``` + +--- + +## Step 5: Test Locally Before Giving the Proxy URL + +```bash +cd dashboard +python3 app.py & +sleep 2 + +# Test endpoints +curl -s http://localhost:8080/ | head -5 +curl -s http://localhost:8080/api/metadata | jq . +curl -s http://localhost:8080/api/data | jq '.[0]' +``` + +## Step 6: Start Server & Provide URL + +```bash +APP_UUID=$(wb app list --format=json | jq -r '.[] | select(.status == "RUNNING") | .id' | head -1) + +cd dashboard +nohup python3 app.py > server.log 2>&1 & + +echo "Dashboard running at:" +echo "https://workbench.verily.com/app/${APP_UUID}/proxy/8080/" +``` + +**Always provide the complete, working URL to the user — never placeholders.** + --- ## Critical Server Configuration @@ -302,12 +469,37 @@ kill $(lsof -t -i :8080) python3 app.py ``` +### BigQuery errors (GCP) + +```bash +# Check authentication +gcloud auth list + +# Test BQ access +bq query --use_legacy_sql=false 'SELECT 1' + +# Check project +gcloud config get-value project +``` + +If `to_dataframe()` fails with type errors, install `db-dtypes`: +`pip install --no-cache-dir db-dtypes` + ### Aurora connection errors (AWS) - `"PAM authentication failed"` -> not using IAM auth token as password - `"pg_hba.conf rejects connection... no encryption"` -> missing `sslmode='require'` - Consider using MCP tools (`mcp__wb__aurora_query`) instead of direct connections +### Changes not reflected after editing code + +```bash +pkill -f "python3 app.py" +python3 app.py & +``` + +If changes still don't appear, hard-refresh the browser: `Ctrl+Shift+R` (Windows/Linux) or `Cmd+Shift+R` (Mac). + ### Server not accessible through proxy Ensure Flask/FastAPI binds to `0.0.0.0`, not `localhost`: @@ -317,6 +509,25 @@ app.run(host='0.0.0.0', port=8080) --- +## Pre-Completion Checklist + +Before declaring the dashboard complete, verify: + +- [ ] **Relative paths** — All `fetch()` calls use `'api/...'` not `'/api/...'` +- [ ] **Host is 0.0.0.0** — Not `localhost` or `127.0.0.1` +- [ ] **threaded=True** — For concurrent users +- [ ] **debug=False** — For security +- [ ] **App UUID obtained** — Not using placeholder `[APP_UUID]` +- [ ] **Server running** — Process is active (`ps aux | grep python`) +- [ ] **Port correct** — URL uses same port as `app.run(port=...)` +- [ ] **CORS enabled** — `CORS(app)` added +- [ ] **Data cached** — Avoid repeated backend calls +- [ ] **Error handling** — API returns errors as JSON, not crashes +- [ ] **Tested locally** — `curl` tests pass before giving URL +- [ ] **Server logs checked** — API requests appear in logs + +--- + ## Quick Reference | Issue | Check | Fix | @@ -326,5 +537,9 @@ app.run(host='0.0.0.0', port=8080) | Blank page | Server running? | `ps aux \| grep python` | | Works locally, fails via URL | Host binding | Change `localhost` to `0.0.0.0` | | Gateway timeout | Server/UUID | Check server running + correct UUID | +| BQ data type error | Missing dep | `pip install db-dtypes` | +| BQ auth error | GCP credentials | `gcloud auth list` | +| Changes not showing | Cache/restart | Hard refresh + restart server | +| Address in use | Port conflict | `kill $(lsof -t -i :8080)` | | Aurora: PAM auth failed | IAM auth | Use `wb resource credentials` + boto3 token | | Aurora: no encryption | SSL missing | Add `sslmode='require'` |