-
Notifications
You must be signed in to change notification settings - Fork 0
feat: dot notation for indexes create and databases load #92
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -320,34 +320,31 @@ pub enum IndexesCommands { | |
| output: String, | ||
| }, | ||
|
|
||
| /// Create an index on a table or dataset | ||
| /// Create an index on a table or dataset. | ||
| /// | ||
| /// Pass either connection scope (--connection-id + --schema + --table) OR | ||
| /// dataset scope (--dataset-id), not both. | ||
| /// For connection-scoped indexes, pass the table and columns using bracket notation: | ||
| /// `connection.table[col1,col2]` or `connection.schema.table[col1,col2]` | ||
| /// (schema defaults to `public` when omitted) | ||
| /// | ||
| /// For dataset-scoped indexes, use `--dataset-id` with `--columns`. | ||
| Create { | ||
| /// Connection ID (use with --schema and --table) | ||
| #[arg(long, short = 'c', conflicts_with = "dataset_id", requires_all = ["schema", "table"])] | ||
| connection_id: Option<String>, | ||
|
|
||
| /// Schema name (requires --connection-id) | ||
| #[arg(long, requires = "connection_id")] | ||
| schema: Option<String>, | ||
|
|
||
| /// Table name (requires --connection-id) | ||
| #[arg(long, requires = "connection_id")] | ||
| table: Option<String>, | ||
| /// Table and columns to index: `connection.table[col1,col2]` | ||
| /// or `connection.schema.table[col1,col2]`. Schema defaults to `public`. | ||
| #[arg(conflicts_with = "dataset_id")] | ||
| target: Option<String>, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: README.md is now out of sync with this command — lines 252–261 still document |
||
|
|
||
| /// Dataset ID (alternative scope to --connection-id) | ||
| #[arg(long, conflicts_with_all = ["connection_id", "schema", "table"])] | ||
| /// Dataset ID (alternative scope to the positional target) | ||
| #[arg(long, conflicts_with = "target")] | ||
| dataset_id: Option<String>, | ||
|
|
||
| /// Index name | ||
| /// Columns to index (comma-separated). Required with --dataset-id; | ||
| /// for connection scope use bracket notation in the target instead. | ||
| #[arg(long)] | ||
| name: String, | ||
| columns: Option<String>, | ||
|
|
||
| /// Columns to index (comma-separated). Vector indexes accept exactly one column. | ||
| /// Index name (derived from table, columns, and type if omitted) | ||
| #[arg(long)] | ||
| columns: String, | ||
| name: Option<String>, | ||
|
|
||
| /// Index type — required (no default; choose deliberately) | ||
| #[arg(long, value_parser = ["sorted", "bm25", "vector"])] | ||
|
|
@@ -580,6 +577,25 @@ pub enum DatabasesCommands { | |
| name_or_id: String, | ||
| }, | ||
|
|
||
| /// Load a parquet file into a table using dot notation: `database.table` or `database.schema.table` | ||
| Load { | ||
| /// Table to load into: `database.table` or `database.schema.table`. | ||
| /// Schema defaults to `public` when omitted. | ||
| target: String, | ||
|
|
||
| /// Path to a local parquet file to upload and load | ||
| #[arg(long, conflicts_with_all = ["upload_id", "url"])] | ||
| file: Option<String>, | ||
|
|
||
| /// URL of a remote parquet file to download and load | ||
| #[arg(long, conflicts_with_all = ["file", "upload_id"])] | ||
| url: Option<String>, | ||
|
|
||
| /// Use a previously staged upload ID from `POST /v1/files` instead of uploading | ||
| #[arg(long, conflicts_with_all = ["file", "url"])] | ||
| upload_id: Option<String>, | ||
| }, | ||
|
|
||
| /// Manage tables inside a managed database | ||
| Tables { | ||
| #[command(subcommand)] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -157,6 +157,26 @@ struct ListResponse { | |
| connections: Vec<Connection>, | ||
| } | ||
|
|
||
| /// Resolve a connection name or ID to a connection ID, exiting on failure. | ||
| pub fn resolve_connection_id(api: &ApiClient, name_or_id: &str) -> String { | ||
| let body: ListResponse = api.get("/connections"); | ||
| match body | ||
| .connections | ||
| .iter() | ||
| .find(|c| c.id == name_or_id || c.name == name_or_id) | ||
| { | ||
| Some(conn) => conn.id.clone(), | ||
| None => { | ||
| use crossterm::style::Stylize; | ||
| eprintln!( | ||
| "{}", | ||
| format!("error: no connection named or with id '{name_or_id}'").red() | ||
| ); | ||
| std::process::exit(1); | ||
| } | ||
| } | ||
| } | ||
|
Comment on lines
+165
to
+190
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. super nit: this lists every connection in the workspace even when the caller already has a raw ID. For workspaces with many connections (or once |
||
|
|
||
| pub fn get(workspace_id: &str, connection_id: &str, format: &str) { | ||
| let api = ApiClient::new(Some(workspace_id)); | ||
| let is_table = format == "table"; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -396,6 +396,23 @@ fn main() { | |
| Some(DatabasesCommands::Delete { name_or_id }) => { | ||
| databases::delete(&workspace_id, &name_or_id) | ||
| } | ||
| Some(DatabasesCommands::Load { | ||
| target, | ||
| file, | ||
| url, | ||
| upload_id, | ||
| }) => { | ||
| let (database, schema, table) = parse_db_target(&target); | ||
| databases::tables_load( | ||
| &workspace_id, | ||
| &database, | ||
| &table, | ||
| Some(schema.as_str()), | ||
| file.as_deref(), | ||
| url.as_deref(), | ||
| upload_id.as_deref(), | ||
| ) | ||
| } | ||
| Some(DatabasesCommands::Tables { command }) => match command { | ||
| DatabaseTablesCommands::List { | ||
| database, | ||
|
|
@@ -565,12 +582,10 @@ fn main() { | |
| &output, | ||
| ), | ||
| IndexesCommands::Create { | ||
| connection_id, | ||
| schema, | ||
| table, | ||
| target, | ||
| dataset_id, | ||
| name, | ||
| columns, | ||
| name, | ||
| r#type, | ||
| metric, | ||
| r#async, | ||
|
|
@@ -579,32 +594,64 @@ fn main() { | |
| output_column, | ||
| description, | ||
| } => { | ||
| let scope = match ( | ||
| dataset_id.as_deref(), | ||
| connection_id.as_deref(), | ||
| schema.as_deref(), | ||
| table.as_deref(), | ||
| ) { | ||
| (Some(did), _, _, _) => indexes::IndexScope::Dataset { dataset_id: did }, | ||
| (None, Some(cid), Some(sch), Some(tbl)) => { | ||
| indexes::IndexScope::Connection { | ||
| connection_id: cid, | ||
| schema: sch, | ||
| table: tbl, | ||
| let api = api::ApiClient::new(Some(&workspace_id)); | ||
| let (scope, resolved_columns, auto_name) = | ||
| match (target.as_deref(), dataset_id.as_deref()) { | ||
| (Some(tgt), None) => { | ||
| let (conn_name, schema, table, cols) = | ||
| parse_index_target(tgt); | ||
| let conn_id = | ||
| connections::resolve_connection_id(&api, &conn_name); | ||
| let auto = format!( | ||
| "{table}_{cols}_{type}", | ||
| cols = cols.join("_"), | ||
| type = r#type | ||
| ); | ||
| ( | ||
| (conn_id, schema, table), | ||
| cols.join(","), | ||
| auto, | ||
| ) | ||
| } | ||
| } | ||
| _ => { | ||
| eprintln!( | ||
| "error: provide either --dataset-id or all three of --connection-id, --schema, --table" | ||
| ); | ||
| std::process::exit(1); | ||
| (None, Some(did)) => { | ||
| let cols = | ||
| columns.as_deref().unwrap_or_else(|| { | ||
| eprintln!( | ||
| "error: --columns is required with --dataset-id" | ||
| ); | ||
| std::process::exit(1); | ||
| }); | ||
| let auto = format!("dataset_{type}", type = r#type); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: auto-name for the dataset branch is |
||
| ( | ||
| (did.to_string(), String::new(), String::new()), | ||
| cols.to_string(), | ||
| auto, | ||
| ) | ||
| } | ||
| _ => { | ||
| eprintln!( | ||
| "error: provide either <target> (e.g. airbnb.listings[col1,col2]) or --dataset-id with --columns" | ||
| ); | ||
| std::process::exit(1); | ||
| } | ||
| }; | ||
| let index_name = name.unwrap_or(auto_name); | ||
| let is_dataset = dataset_id.is_some(); | ||
| let (conn_id, schema, table) = scope; | ||
| let resolved_scope = if is_dataset { | ||
| indexes::IndexScope::Dataset { dataset_id: &conn_id } | ||
| } else { | ||
| indexes::IndexScope::Connection { | ||
| connection_id: &conn_id, | ||
| schema: &schema, | ||
| table: &table, | ||
| } | ||
| }; | ||
| indexes::create( | ||
| &workspace_id, | ||
| scope, | ||
| &name, | ||
| &columns, | ||
| resolved_scope, | ||
| &index_name, | ||
| &resolved_columns, | ||
| &r#type, | ||
| metric.as_deref(), | ||
| r#async, | ||
|
|
@@ -911,6 +958,61 @@ fn main() { | |
| } | ||
| } | ||
|
|
||
| /// Parse a database target like `airbnb.listings` or `airbnb.public.listings` | ||
| /// into `(database, schema, table)`. Schema defaults to `public`. | ||
| fn parse_db_target(target: &str) -> (String, String, String) { | ||
| let parts: Vec<&str> = target.splitn(4, '.').collect(); | ||
| match parts.as_slice() { | ||
| [db, tbl] => (db.to_string(), "public".to_string(), tbl.to_string()), | ||
| [db, schema, tbl] => (db.to_string(), schema.to_string(), tbl.to_string()), | ||
| _ => { | ||
| eprintln!( | ||
| "error: target must be 'database.table' or 'database.schema.table'" | ||
| ); | ||
| std::process::exit(1); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// Parse an index target like `airbnb.listings[col1,col2]` or | ||
| /// `airbnb.public.listings[col1,col2]` into `(conn_name, schema, table, columns)`. | ||
| /// Schema defaults to `public` when only two dot-parts are given. | ||
| fn parse_index_target(target: &str) -> (String, String, String, Vec<String>) { | ||
| let Some(bracket_pos) = target.find('[') else { | ||
| eprintln!( | ||
| "error: target must include columns in brackets, e.g. airbnb.listings[col1,col2]" | ||
| ); | ||
| std::process::exit(1); | ||
| }; | ||
| let table_part = &target[..bracket_pos]; | ||
| let cols_raw = target[bracket_pos + 1..].trim_end_matches(']'); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: this silently accepts unclosed/garbled brackets. |
||
|
|
||
| let parts: Vec<&str> = table_part.splitn(4, '.').collect(); | ||
| let (conn, schema, table) = match parts.as_slice() { | ||
| [c, t] => (c.to_string(), "public".to_string(), t.to_string()), | ||
| [c, s, t] => (c.to_string(), s.to_string(), t.to_string()), | ||
| _ => { | ||
| eprintln!( | ||
| "error: target must be 'connection.table[cols]' or 'connection.schema.table[cols]'" | ||
| ); | ||
| std::process::exit(1); | ||
| } | ||
| }; | ||
|
|
||
| let columns: Vec<String> = cols_raw | ||
| .split(',') | ||
| .map(|s| s.trim().to_string()) | ||
| .filter(|s| !s.is_empty()) | ||
| .collect(); | ||
|
|
||
| if columns.is_empty() { | ||
| eprintln!("error: no columns specified in brackets"); | ||
| std::process::exit(1); | ||
| } | ||
|
|
||
| (conn, schema, table, columns) | ||
|
Comment on lines
+967
to
+1023
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: these two parsers ( |
||
| } | ||
|
|
||
| pub fn get_styles() -> clap::builder::Styles { | ||
| Styles::styled() | ||
| .header(AnsiColor::Yellow.on_default()) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
super nit:
[and]are glob metacharacters in zsh (and in bash withextglob/failglob), sohotdata indexes create airbnb.listings[description] --type bm25will fail withno matches foundfor many users out of the box. Worth a line in the doc comment telling users to quote the target ('airbnb.listings[description]'). Same note would help onDatabasesCommands::Loadeven though it has no brackets, since the README example uses brackets in nearby commands. (not blocking)