Skip to content

Commit a3cee75

Browse files
authored
rust(feature): mcp tools for download/uploading datasets (#586)
1 parent 5b21aee commit a3cee75

34 files changed

Lines changed: 4686 additions & 529 deletions

File tree

Cargo.toml

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,9 @@ license = "MIT"
2626

2727
[workspace.dependencies]
2828
anyhow = "1.0"
29-
arrow-array = "58.1.0"
30-
arrow-schema = "58.1.0"
29+
arrow = "58.3.0"
30+
arrow-array = "58.3.0"
31+
arrow-schema = "58.3.0"
3132
async-channel = "2.2"
3233
async-trait = "^0.1"
3334
mockall = "0.14.0"
@@ -49,11 +50,12 @@ hyper = { version = "1.8", features = ["server", "http1"] }
4950
hyper-util = { version = "0.1.20", features = ["service", "server", "tokio"] }
5051
indicatif = "0.18"
5152
indoc = "2.0"
52-
parquet = "58.0"
53+
parquet = "58.3.0"
5354
prost = "^0.14"
5455
prost-types = "^0.14"
5556
pbjson = "^0.9"
5657
pbjson-types = "^0.9"
58+
polars = { version = "0.53.0", features = ["sql", "lazy", "parquet"] }
5759
pyo3 = "0.28"
5860
pyo3-async-runtimes = { version = "0.28", features = ["tokio-runtime"] }
5961
pyo3-stub-gen = "0.10"

rust/crates/sift_cli/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@ repository.workspace = true
99
keywords.workspace = true
1010
readme.workspace = true
1111
license.workspace = true
12-
description = "CLI to streamline programmatic workflows with Sift's API"
13-
changelog = "CHANGELOG.md"
12+
description = "Sift CLI"
1413

1514
[[bin]]
1615
name = "sift-cli"
@@ -31,6 +30,7 @@ parquet = { workspace = true }
3130
pbjson-types = { workspace = true }
3231
reqwest = { workspace = true }
3332
serde_json = { workspace = true }
33+
sift_mcp.workspace = true
3434
sift_pbfs = { workspace = true }
3535
tdms = { workspace = true }
3636
hdf5 = { workspace = true }

rust/crates/sift_cli/src/cli/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ pub enum Cmd {
5353
#[command(subcommand)]
5454
Import(ImportCmd),
5555

56+
/// Start the Sift MCP server
57+
Mcp,
58+
5659
/// Ping the Sift API to verify credentials and connectivity
5760
Ping,
5861
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
use std::process::ExitCode;
2+
3+
use anyhow::Result;
4+
use sift_rs::Credentials;
5+
6+
use crate::cmd::Context;
7+
8+
pub async fn run(ctx: Context) -> Result<ExitCode> {
9+
let credentials = Credentials::Config {
10+
uri: ctx.grpc_uri,
11+
apikey: ctx.api_key,
12+
};
13+
match sift_mcp::run(credentials, !ctx.disable_tls).await {
14+
Ok(_) => Ok(ExitCode::SUCCESS),
15+
Err(err) => Err(err),
16+
}
17+
}

rust/crates/sift_cli/src/cmd/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ pub mod completions;
88
pub mod config;
99
pub mod export;
1010
pub mod import;
11+
pub mod mcp;
1112
pub mod ping;
1213

1314
pub struct Context {

rust/crates/sift_cli/src/main.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,18 @@ where
4040
runtime.block_on(fut)
4141
}
4242

43+
fn run_future_mt<F>(fut: F) -> Result<ExitCode>
44+
where
45+
F: Future<Output = Result<ExitCode>> + 'static,
46+
{
47+
let runtime = runtime::Builder::new_multi_thread()
48+
.enable_all()
49+
.build()
50+
.context("failed to initialize Tokio runtime")?;
51+
52+
runtime.block_on(fut)
53+
}
54+
4355
fn run(clargs: cli::Args) -> Result<ExitCode> {
4456
// These commands don't require `Context`
4557
match clargs.cmd {
@@ -56,11 +68,17 @@ fn run(clargs: cli::Args) -> Result<ExitCode> {
5668
_ => (),
5769
}
5870

71+
let ctx = Context::new(clargs.profile.clone(), clargs.disable_tls)?;
72+
73+
// Mcp Server
74+
if let Cmd::Mcp = clargs.cmd {
75+
return run_future_mt(cmd::mcp::run(ctx));
76+
}
77+
5978
let profile = clargs
6079
.profile
6180
.as_ref()
6281
.map_or_else(|| "default".to_string().cyan(), |s| s.clone().cyan());
63-
let ctx = Context::new(clargs.profile, clargs.disable_tls)?;
6482

6583
Output::new()
6684
.line(format!("{} profile '{profile}'", "Using".green()))

rust/crates/sift_mcp/CLAUDE.md

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# sift_mcp — guidance for Claude
2+
3+
## Writing tool descriptions
4+
5+
Tool descriptions in this crate are read by other agents at call time. They are the *only* documentation the calling LLM gets, so optimize for an agent making a decision under context pressure, not for a human reading the source.
6+
7+
### Structure
8+
9+
Use this section ordering. Skip a section only when it has no content; do not reorder.
10+
11+
1. **One-line purpose.** First sentence states what the tool does and where the output goes ("Retrieve X and write to Y", "List Z filtered by W"). The agent should be able to match intent from this line alone.
12+
2. **Output schema.** When the tool returns structured data or writes a file, describe the shape — column names, types, what null means, where metadata lives. The agent will consume this output; don't make it guess.
13+
3. **Parameters.** One bullet per parameter, in declaration order. Spell out:
14+
- Whether matching is exact or pattern-based.
15+
- Conditional requirements ("required when X is omitted").
16+
- Sentinel values and their meaning (e.g. `sample_ms = 0` → raw samples).
17+
- Mutually exclusive choices (e.g. `Names` vs `Regex` variants).
18+
- Side effects on the filesystem or external state (truncate mode, idempotency).
19+
4. **Errors.** Name the actual `ErrorData` variants the tool returns (`RESOURCE_NOT_FOUND`, `INVALID_PARAMS`, etc.) and the condition that triggers each. The agent can then recover with different inputs instead of treating every failure as terminal.
20+
5. **Guidance.** Performance characteristics, recommended call patterns, when to chunk, when to prefer one parameter shape over another. Keep this to load-bearing advice — the agent doesn't need general SQL/Arrow background.
21+
22+
### Style rules
23+
24+
- Write in direct voice. "Retrieve …", not "This tool retrieves …".
25+
- Use backticks for parameter names, enum variants, and field names so they survive Markdown rendering on the client.
26+
- Escape inner double quotes as `\"` — the description is a Rust string literal inside the `#[tool(...)]` attribute.
27+
- Prefer bullets over paragraphs for multi-fact sections (output schema, parameters, errors). Paragraphs hide structure.
28+
- Don't restate the obvious from the type signature. The parameter's name and type already tell the agent it's a `String` or `Option<i64>`; the description adds what isn't in the type — semantics, constraints, defaults.
29+
- No marketing or filler ("powerful", "easy to use"). Every line should change what the agent does.
30+
- Cap length around 30–40 lines. Beyond that, agents start truncating mentally; trim the guidance section first.
31+
32+
### Reference
33+
34+
`tool/data/mod.rs::get_data` is the canonical example. Mirror its layout when adding a new tool.
35+
36+
### list_router tools — sourcing from protos
37+
38+
Tools in `tool/list/` (`list_assets`, `list_runs`, `list_channels`, etc.) are thin wrappers over `sift_rs::<service>::<version>::List<Resource>Request`. Their parameters and per-parameter semantics MUST be derived from the proto comments on that message, not invented.
39+
40+
When you add or update a list-router tool:
41+
42+
1. **Open the matching proto.** Path pattern: `protos/sift/<service>/<version>/<service>.proto`. Find the `message List<Resource>Request { ... }` block. Examples:
43+
- `list_assets``protos/sift/assets/v1/assets.proto::ListAssetsRequest`
44+
- `list_runs``protos/sift/runs/v2/runs.proto::ListRunsRequest`
45+
- `list_channels``protos/sift/channels/v3/channels.proto::ListChannelsRequest`
46+
2. **Copy the field comments verbatim into the tool description.** The proto authors curate the filterable/orderable field lists, default sort, page-size caps, and metadata syntax. Re-stating those in your own words risks drift; quoting from the proto keeps the tool spec aligned with the API.
47+
3. **Map every wrapped field to a bullet** under the `Parameters:` section of the description, using the structure in `### Structure` above. Include:
48+
- Filter: list every filterable field named in the proto's `filter` comment. Preserve metadata-key syntax notes (`metadata.{key}`) and CEL helper notes (`duration(...)`).
49+
- Order-by: list every orderable field, the default sort if the field is empty (assets/runs default to `created_date desc`; channels defaults to `created_date` ascending — these differ, do not assume), and the `\"FIELD_NAME[ desc],...\"` format.
50+
- Limit: describe the `1..=1000` cap behavior of `service::common::paging` (different from the proto's raw `page_size`, which caps higher for some services).
51+
4. **Re-read the proto whenever the resource changes.** If a new filterable or orderable field is added to the proto, update the tool description in the same change. Stale descriptions are worse than missing ones because agents will trust them.
52+
53+
If the proto's comments are themselves wrong or incomplete, fix the proto first and regenerate — the tool description is downstream of it.

rust/crates/sift_mcp/Cargo.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,15 @@ tokio.workspace = true
2121
tonic.workspace = true
2222
anyhow.workspace = true
2323
clap = { workspace = true, features = ["cargo"] }
24+
pbjson-types.workspace = true
25+
prost.workspace = true
26+
arrow.workspace = true
27+
parquet.workspace = true
28+
polars = { workspace = true, features = ["lazy", "parquet", "sql"] }
29+
tokio-stream.workspace = true
2430

2531
[dev-dependencies]
2632
sift_test_util.workspace = true
2733
tokio-stream.workspace = true
34+
bytes.workspace = true
35+
tempdir.workspace = true

rust/crates/sift_mcp/src/error/mod.rs

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,16 @@
11
use rmcp::model::{CallToolResult, ErrorCode};
2-
use serde_json::json;
32
use tonic::{Code, Status};
43

54
pub type McpResult = Result<CallToolResult, rmcp::ErrorData>;
65

7-
pub fn from_grpc_status(status: Status) -> rmcp::ErrorData {
8-
let code = from_grpc_code(status.code());
9-
let message = status.message().to_string();
10-
let data = Some(json!({
11-
"grpc_code": status.code().to_string(),
12-
}));
13-
14-
rmcp::ErrorData {
15-
code,
16-
message: message.into(),
17-
data,
18-
}
19-
}
20-
216
pub fn from_anyhow(error: anyhow::Error) -> rmcp::ErrorData {
22-
let code = ErrorCode::INTERNAL_ERROR;
7+
let mut code = ErrorCode::INTERNAL_ERROR;
238
let message = format!("{error:?}");
249

10+
if let Ok(grpc_status) = error.downcast::<Status>() {
11+
code = from_grpc_code(grpc_status.code());
12+
}
13+
2514
rmcp::ErrorData {
2615
code,
2716
message: message.into(),

rust/crates/sift_mcp/src/lib.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,12 @@ use clap::{crate_name, crate_version};
33
use rmcp::{ServiceExt, transport::stdio};
44
use sift_rs::{Credentials, SiftChannelBuilder};
55

6-
pub(crate) mod server;
6+
mod server;
77
use server::SiftMcpServer;
88

9-
pub mod tool;
10-
119
mod error;
10+
mod service;
11+
mod tool;
1212

1313
pub async fn run(credentials: Credentials, use_tls: bool) -> Result<()> {
1414
let channel = SiftChannelBuilder::new(credentials)

0 commit comments

Comments
 (0)