Skip to content

Commit da25cd1

Browse files
feat: add search tool with RAG (#57)
- add a new MCP Server tool `firebolt_search`. This tool allows to search Firebolt Documentation using RAG endpoint - remove full list of documentation articles from `friebolt_docs` tool - do not require docs_proof in `firebolt_connect` by default. Can be forced in config --------- Signed-off-by: Alex Kaplun <o.kaplun@firebolt.io>
1 parent ea3195c commit da25cd1

20 files changed

Lines changed: 824 additions & 385 deletions

File tree

README.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,14 +168,25 @@ To enable SSE, set the `--transport` CLI flag (or the `FIREBOLT_MCP_TRANSPORT` e
168168
169169
Optionally, you can specify the address the server should listen on by setting the `--transport-sse-listen-address` CLI flag (or the `FIREBOLT_MCP_TRANSPORT_SSE_LISTEN_ADDRESS` environment variable).
170170
171+
172+
#### Requiring LLMs to present docs read proof before connecting
173+
174+
To provide wider context to LLMs before connecting to Firebolt and running queries, by default `firebolt_connect` tool
175+
requires the LLM to present a read proof of the Firebolt documentation (by querying the `firebolt_docs_overview` tool).
176+
177+
While this provides a good starting point for LLMs ensuring it has full context of Firebolt documentation, at the same time this may lead to slower responses and higher token consumption.
178+
179+
To disable this requirement, set the `--skip-docs-proof` CLI bool flag (or the `FIREBOLT_MCP_SKIP_DOCS_PROOF` environment variable) to `false`.
180+
171181
## Architecture
172182
173183
Firebolt MCP Server implements the [Model Context Protocol](https://modelcontextprotocol.io/introduction), providing:
174184
175185
1. **Tools** - Task-specific capabilities provided to the LLM:
176-
- `firebolt_docs`: Access Firebolt documentation
186+
- `firebolt_docs_overview`: Access basic Firebolt documentation overview
177187
- `firebolt_connect`: Establish connections to Firebolt engines and databases
178188
- `firebolt_query`: Execute SQL queries against Firebolt
189+
- `firebolt_docs_search`: Search Firebolt documentation for any details
179190
180191
2. **Resources** - Data that can be referenced by the LLM:
181192
- Documentation articles

cmd/firebolt-mcp-server/main.go

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@ import (
2020
"github.com/firebolt-db/mcp-server/pkg/prompts"
2121
"github.com/firebolt-db/mcp-server/pkg/resources"
2222
"github.com/firebolt-db/mcp-server/pkg/server"
23-
"github.com/firebolt-db/mcp-server/pkg/tools"
23+
"github.com/firebolt-db/mcp-server/pkg/tools/tool_connect"
24+
"github.com/firebolt-db/mcp-server/pkg/tools/tool_docs"
25+
"github.com/firebolt-db/mcp-server/pkg/tools/tool_query"
26+
"github.com/firebolt-db/mcp-server/pkg/tools/tool_search"
2427
)
2528

2629
var (
@@ -94,6 +97,14 @@ func main() {
9497
Usage: "Firebolt environment to connect to",
9598
Sources: cli.EnvVars("FIREBOLT_MCP_ENVIRONMENT"),
9699
},
100+
&cli.BoolFlag{
101+
Name: "skip-docs-proof",
102+
Category: "MCP Tools Configuration",
103+
Value: false,
104+
Usage: "Skip the requirement for LLM to provide a token as a proof it has reviewed documentation overview. When enabled, LLM " +
105+
"will not be forced to gather more starting context and become smarter, but this means more tokens consumed and slower responses.",
106+
Sources: cli.EnvVars("FIREBOLT_MCP_SKIP_DOCS_PROOF"),
107+
},
97108
},
98109
Action: run,
99110
}
@@ -129,21 +140,40 @@ func run(ctx context.Context, cmd *cli.Command) error {
129140
}
130141

131142
// Initialize MCP server
132-
docsProof := generateRandomSecret()
143+
docsProofToken := generateRandomSecret()
133144
disableResources := cmd.Bool("disable-resources")
134-
resourceDocs := resources.NewDocs(fireboltdocs.FS, docsProof)
145+
resourceDocs := resources.NewDocs(fireboltdocs.FS, docsProofToken)
135146
resourceAccounts := resources.NewAccounts(discoveryClient)
136147
resourceDatabases := resources.NewDatabases(dbPool)
137148
resourceEngines := resources.NewEngines(dbPool)
149+
150+
searchCfg := tool_search.Config{
151+
BaseURL: fmt.Sprintf("https://api.%s", cmd.String("environment")),
152+
ClientID: clientID,
153+
ClientSecret: clientSecret,
154+
TokenURL: fmt.Sprintf("https://id.%s/oauth/token", cmd.String("environment")),
155+
}
156+
157+
searchTool, err := tool_search.NewSearch(ctx, searchCfg)
158+
if err != nil {
159+
return fmt.Errorf("failed to create search tool: %w", err)
160+
}
161+
162+
var docsProof *string
163+
if !cmd.Bool("skip-docs-proof") {
164+
docsProof = &docsProofToken
165+
}
166+
138167
srv := server.NewServer(
139168
logger,
140169
fullVersion(),
141170
cmd.String("transport"),
142171
cmd.String("transport-sse-listen-address"),
143172
[]server.Tool{
144-
tools.NewConnect(resourceAccounts, resourceDatabases, resourceEngines, docsProof, disableResources),
145-
tools.NewDocs(resourceDocs, disableResources),
146-
tools.NewQuery(dbPool),
173+
tool_connect.NewConnect(resourceAccounts, resourceDatabases, resourceEngines, docsProof, disableResources),
174+
tool_docs.NewDocs(resourceDocs, disableResources),
175+
tool_query.NewQuery(dbPool),
176+
searchTool,
147177
},
148178
[]server.Prompt{
149179
prompts.NewFireboltExpert(),

docs/overview.md

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,10 @@ graph TD
2020
Server["MCP Server Core"]
2121
2222
subgraph "Tools"
23-
DocsT["firebolt_docs"]
23+
DocsT["firebolt_docs_overview"]
2424
ConnectT["firebolt_connect"]
2525
QueryT["firebolt_query"]
26+
SearchT["firebolt_docs_search"]
2627
end
2728
2829
subgraph "Resources"
@@ -48,7 +49,8 @@ graph TD
4849
4950
Server --> DocsT
5051
Server --> ConnectT
51-
Server --> QueryT
52+
Server --> QueryT
53+
Server --> SearchT
5254
Server --> DocsR
5355
Server --> AccountsR
5456
Server --> DatabasesR
@@ -81,21 +83,25 @@ Key features:
8183

8284
Tools are executable capabilities exposed to LLMs through the MCP interface:
8385

84-
1. **firebolt_docs** (`pkg/tools/docs.go`)
86+
1. **firebolt_docs_overview** (`pkg/tools/tool_docs/docs.go`)
8587
- Provides access to Firebolt documentation
8688
- Returns embedded markdown content for various documentation articles
8789
- Helps LLMs understand Firebolt concepts, SQL syntax, and best practices
8890

89-
2. **firebolt_connect** (`pkg/tools/connect.go`)
91+
2. **firebolt_connect** (`pkg/tools/tool_connect/connect.go`)
9092
- Lists available Firebolt accounts, databases, and engines
9193
- Requires a "proof" from documentation to ensure the LLM has read basic Firebolt information
9294
- Enables discovery of resources before executing queries
9395

94-
3. **firebolt_query** (`pkg/tools/query.go`)
96+
3. **firebolt_query** (`pkg/tools/tool_query/query.go`)
9597
- Executes SQL queries against Firebolt databases
9698
- Manages connections to the specified account, database, and engine
9799
- Returns query results in JSON format
98100

101+
4. **firebolt_docs_search** (`pkg/tools/tool_docs/search.go`)
102+
- Provides RAG search functionality for Firebolt documentation
103+
- Allows LLMs to find specific articles in the documentation using semantic search
104+
99105
### 3. Resources
100106

101107
Resources are information objects that can be accessed by the LLM:
@@ -135,9 +141,10 @@ The MCP server includes specialized clients to interact with Firebolt services:
135141
sequenceDiagram
136142
participant LLM as LLM Client
137143
participant Server as MCP Server
138-
participant Docs as firebolt_docs
144+
participant Docs as firebolt_docs_overview
139145
participant Connect as firebolt_connect
140146
participant Query as firebolt_query
147+
participant Search as firebolt_docs_search
141148
participant FireboltAPI as Firebolt API
142149
participant FireboltDB as Firebolt Database
143150
@@ -146,10 +153,18 @@ sequenceDiagram
146153
147154
Note over LLM, Server: Documentation Flow
148155
LLM->>Server: Request documentation
149-
Server->>Docs: Call firebolt_docs
156+
Server->>Docs: Call firebolt_docs_overview
150157
Docs->>Server: Return documentation resources
151158
Server->>LLM: Documentation content
152159
160+
Note over LLM, Server: Search Flow
161+
LLM->>Server: Request documentation search
162+
Server->>Search: Call firebolt_docs_search
163+
Search->>FireboltAPI: Call API to run search
164+
FireboltAPI->>Search: Return search results
165+
Search->>Server: Return search results
166+
Server->>LLM: Search results content
167+
153168
Note over LLM, Server: Connection Flow
154169
LLM->>Server: Request resources
155170
Server->>Connect: Call firebolt_connect

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ go 1.24.1
55
require (
66
github.com/JohannesKaufmann/html-to-markdown/v2 v2.5.0
77
github.com/firebolt-db/firebolt-go-sdk v1.13.0
8+
github.com/go-ozzo/ozzo-validation/v4 v4.3.0
89
github.com/gocolly/colly/v2 v2.3.0
910
github.com/modelcontextprotocol/go-sdk v1.2.0
1011
github.com/neilotoole/slogt v1.1.0
@@ -21,6 +22,7 @@ require (
2122
github.com/antchfx/htmlquery v1.3.5 // indirect
2223
github.com/antchfx/xmlquery v1.5.0 // indirect
2324
github.com/antchfx/xpath v1.3.5 // indirect
25+
github.com/asaskevich/govalidator v0.0.0-20200108200545-475eaeb16496 // indirect
2426
github.com/astaxie/beego v1.12.3 // indirect
2527
github.com/bits-and-blooms/bitset v1.24.4 // indirect
2628
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect

0 commit comments

Comments
 (0)