Skip to content

Commit 3659414

Browse files
eddietejedaclaude
andauthored
feat(search): infer --type and --column from indexes; default schema to public (#90)
* feat(search): infer --type and --column from indexes; default schema to public Make --type and --column optional in hotdata search. When either is omitted, the CLI fetches the table indexes, filters to searchable types (bm25/vector), and resolves them automatically. Exits with a clear error when the result is ambiguous or no index exists. Accept connection.table as shorthand for connection.public.table in --table; schema defaults to public when omitted. Before: hotdata search query --type bm25 --table airbnb.public.listings --column description --limit 5 After: hotdata search query --table airbnb.listings --limit 5 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix(search): explicit error on empty index columns; add unit tests for inference logic - Extract resolve_search_params as a pure function returning Result so the matching logic is testable without API mocking - Replace unwrap_or_default() on columns with ok_or_else() so an index with no columns produces a clear error instead of silent empty string - Add 9 unit tests covering single bm25/vector, hint narrowing, no indexes, multiple indexes, hint type mismatch, and empty columns Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Eddie A Tejeda <669988+eddietejeda@users.noreply.github.com> Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 647657d commit 3659414

3 files changed

Lines changed: 241 additions & 11 deletions

File tree

src/command.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -158,24 +158,28 @@ pub enum Commands {
158158
/// Search query text — required for both --type bm25 and --type vector
159159
query: String,
160160

161-
/// Search type — required (no default; choose deliberately)
161+
/// Search type (`bm25` or `vector`). Inferred automatically when the table has exactly
162+
/// one search index — required only when multiple indexes exist.
162163
///
163164
/// `vector` runs server-side `vector_distance(col, 'text')` — the server resolves the
164165
/// embedding column, model, and metric from the index metadata.
165166
///
166167
/// `bm25` runs server-side `bm25_search(table, col, 'text')` and requires a BM25 index
167168
/// on the column.
168169
#[arg(long, value_parser = ["vector", "bm25"])]
169-
r#type: String,
170+
r#type: Option<String>,
170171

171-
/// Table to search (connection.schema.table)
172+
/// Table to search (`connection.table` or `connection.schema.table`).
173+
/// Schema defaults to `public` when omitted.
172174
#[arg(long)]
173175
table: String,
174176

175-
/// Column to search. For `--type vector`, name the source text column — the server
176-
/// resolves the embedding column from the index metadata.
177+
/// Column to search. Inferred automatically when the table has exactly one search index
178+
/// of the resolved type — required only when multiple indexed columns exist.
179+
/// For `--type vector`, name the source text column — the server resolves the embedding
180+
/// column from the index metadata.
177181
#[arg(long)]
178-
column: String,
182+
column: Option<String>,
179183

180184
/// Columns to display (comma-separated, defaults to all)
181185
#[arg(long)]

src/indexes.rs

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,104 @@ fn list_one_table_scan(
147147
}
148148
}
149149

150+
/// Pure matching logic for search inference — extracted for testability.
151+
///
152+
/// Filters `indexes` to searchable types (`bm25`, `vector`), narrows by `hint_type` /
153+
/// `hint_column` when provided, and returns `Ok((index_type, column))` on an unambiguous
154+
/// match. Returns `Err(message)` on no match, multiple matches, or an index with no columns.
155+
/// `location` is used only in error messages (e.g. `"mydb.public.listings"`).
156+
fn resolve_search_params(
157+
indexes: &[Index],
158+
hint_type: Option<&str>,
159+
hint_column: Option<&str>,
160+
location: &str,
161+
) -> Result<(String, String), String> {
162+
let matches: Vec<&Index> = indexes
163+
.iter()
164+
.filter(|i| {
165+
let t = i.index_type.as_str();
166+
(t == "bm25" || t == "vector")
167+
&& hint_type.map_or(true, |ht| ht == t)
168+
&& hint_column.map_or(true, |hc| i.columns.iter().any(|c| c == hc))
169+
})
170+
.collect();
171+
172+
match matches.as_slice() {
173+
[] => {
174+
let what = match hint_type {
175+
Some(t) => format!("{} index", t),
176+
None => "BM25 or vector index".to_string(),
177+
};
178+
Err(format!(
179+
"No {} found on {} — run 'hotdata indexes create' first.",
180+
what, location
181+
))
182+
}
183+
[one] => {
184+
let index_type = one.index_type.clone();
185+
let column = one.columns.first().cloned().ok_or_else(|| {
186+
format!("Index '{}' has no columns.", one.index_name)
187+
})?;
188+
Ok((index_type, column))
189+
}
190+
_ => {
191+
let types: Vec<&str> = matches.iter().map(|i| i.index_type.as_str()).collect();
192+
let cols: Vec<String> = matches
193+
.iter()
194+
.flat_map(|i| i.columns.iter().cloned())
195+
.collect();
196+
Err(format!(
197+
"Multiple search indexes found (types: {}, columns: {}) — specify --type and --column.",
198+
types.join(", "),
199+
cols.join(", ")
200+
))
201+
}
202+
}
203+
}
204+
205+
/// Infers `(index_type, column)` for `hotdata search` when `--type` or `--column` are omitted.
206+
///
207+
/// Fetches the indexes on `connection_name.schema.table`, filters to searchable types
208+
/// (`bm25`, `vector`), and narrows further by `hint_type` / `hint_column` when provided.
209+
/// Exits with an error when the result is ambiguous (multiple matches) or no index exists.
210+
pub fn infer_for_search(
211+
workspace_id: &str,
212+
connection_name: &str,
213+
schema: &str,
214+
table: &str,
215+
hint_type: Option<&str>,
216+
hint_column: Option<&str>,
217+
) -> (String, String) {
218+
use crossterm::style::Stylize;
219+
220+
let api = ApiClient::new(Some(workspace_id));
221+
222+
// Resolve connection name → ID
223+
let conn_map = connection_lookup(&api);
224+
let connection_id = match conn_map.get(connection_name) {
225+
Some(id) => id.clone(),
226+
None => {
227+
eprintln!(
228+
"{}",
229+
format!("Connection '{}' not found.", connection_name).red()
230+
);
231+
std::process::exit(1);
232+
}
233+
};
234+
235+
// Fetch indexes for this table
236+
let indexes = list_one_table(&api, &connection_id, schema, table);
237+
238+
let location = format!("{}.{}.{}", connection_name, schema, table);
239+
match resolve_search_params(&indexes, hint_type, hint_column, &location) {
240+
Ok(result) => result,
241+
Err(msg) => {
242+
eprintln!("{}", msg.red());
243+
std::process::exit(1);
244+
}
245+
}
246+
}
247+
150248
pub fn list(
151249
workspace_id: &str,
152250
connection_id: Option<&str>,
@@ -574,4 +672,95 @@ mod tests {
574672
mock.assert();
575673
assert!(rows.is_empty());
576674
}
675+
676+
fn make_index(name: &str, index_type: &str, columns: &[&str]) -> Index {
677+
Index {
678+
index_name: name.into(),
679+
index_type: index_type.into(),
680+
columns: columns.iter().map(|c| c.to_string()).collect(),
681+
metric: None,
682+
status: "ready".into(),
683+
created_at: "2020-01-01T00:00:00Z".into(),
684+
updated_at: "2020-01-01T00:00:00Z".into(),
685+
}
686+
}
687+
688+
#[test]
689+
fn resolve_search_params_single_bm25_returns_type_and_column() {
690+
let indexes = vec![make_index("fts", "bm25", &["description"])];
691+
let result = resolve_search_params(&indexes, None, None, "db.public.t");
692+
assert_eq!(result, Ok(("bm25".into(), "description".into())));
693+
}
694+
695+
#[test]
696+
fn resolve_search_params_single_vector_returns_type_and_column() {
697+
let indexes = vec![make_index("vec", "vector", &["embedding"])];
698+
let result = resolve_search_params(&indexes, None, None, "db.public.t");
699+
assert_eq!(result, Ok(("vector".into(), "embedding".into())));
700+
}
701+
702+
#[test]
703+
fn resolve_search_params_non_search_indexes_ignored() {
704+
let indexes = vec![
705+
make_index("sorted_idx", "sorted", &["created_at"]),
706+
make_index("fts", "bm25", &["body"]),
707+
];
708+
let result = resolve_search_params(&indexes, None, None, "db.public.t");
709+
assert_eq!(result, Ok(("bm25".into(), "body".into())));
710+
}
711+
712+
#[test]
713+
fn resolve_search_params_hint_type_narrows_to_single() {
714+
let indexes = vec![
715+
make_index("fts", "bm25", &["description"]),
716+
make_index("vec", "vector", &["embedding"]),
717+
];
718+
let result = resolve_search_params(&indexes, Some("bm25"), None, "db.public.t");
719+
assert_eq!(result, Ok(("bm25".into(), "description".into())));
720+
}
721+
722+
#[test]
723+
fn resolve_search_params_hint_column_narrows_to_single() {
724+
let indexes = vec![
725+
make_index("fts_desc", "bm25", &["description"]),
726+
make_index("fts_name", "bm25", &["name"]),
727+
];
728+
let result = resolve_search_params(&indexes, None, Some("name"), "db.public.t");
729+
assert_eq!(result, Ok(("bm25".into(), "name".into())));
730+
}
731+
732+
#[test]
733+
fn resolve_search_params_no_search_indexes_returns_error() {
734+
let indexes = vec![make_index("sorted_idx", "sorted", &["id"])];
735+
let result = resolve_search_params(&indexes, None, None, "db.public.t");
736+
assert!(result.is_err());
737+
assert!(result.unwrap_err().contains("No BM25 or vector index found"));
738+
}
739+
740+
#[test]
741+
fn resolve_search_params_no_index_error_mentions_hint_type() {
742+
let indexes = vec![make_index("fts", "bm25", &["description"])];
743+
let result = resolve_search_params(&indexes, Some("vector"), None, "db.public.t");
744+
assert!(result.is_err());
745+
assert!(result.unwrap_err().contains("vector index"));
746+
}
747+
748+
#[test]
749+
fn resolve_search_params_multiple_matches_returns_error() {
750+
let indexes = vec![
751+
make_index("fts_desc", "bm25", &["description"]),
752+
make_index("fts_name", "bm25", &["name"]),
753+
];
754+
let result = resolve_search_params(&indexes, None, None, "db.public.t");
755+
assert!(result.is_err());
756+
assert!(result.unwrap_err().contains("Multiple search indexes found"));
757+
}
758+
759+
#[test]
760+
fn resolve_search_params_index_with_no_columns_returns_error() {
761+
let indexes = vec![make_index("fts", "bm25", &[])];
762+
let result = resolve_search_params(&indexes, None, None, "db.public.t");
763+
assert!(result.is_err());
764+
assert!(result.unwrap_err().contains("has no columns"));
765+
}
577766
}

src/main.rs

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -706,9 +706,46 @@ fn main() {
706706
output,
707707
} => {
708708
let workspace_id = resolve_workspace(workspace_id);
709+
710+
// Parse `connection.table` or `connection.schema.table`.
711+
// Schema defaults to `public` when omitted.
712+
let parts: Vec<&str> = table.splitn(4, '.').collect();
713+
let (conn_name, schema, table_name) = match parts.as_slice() {
714+
[conn, schema, tbl] => {
715+
(conn.to_string(), schema.to_string(), tbl.to_string())
716+
}
717+
[conn, tbl] => (conn.to_string(), "public".to_string(), tbl.to_string()),
718+
_ => {
719+
eprintln!(
720+
"error: --table must be 'connection.table' or 'connection.schema.table'"
721+
);
722+
std::process::exit(1);
723+
}
724+
};
725+
let normalized_table = format!("{}.{}.{}", conn_name, schema, table_name);
726+
727+
// Infer --type and --column from the table's indexes when either is omitted.
728+
let (resolved_type, resolved_column) =
729+
if r#type.is_some() && column.is_some() {
730+
(r#type.unwrap(), column.unwrap())
731+
} else {
732+
let (inferred_type, inferred_column) = indexes::infer_for_search(
733+
&workspace_id,
734+
&conn_name,
735+
&schema,
736+
&table_name,
737+
r#type.as_deref(),
738+
column.as_deref(),
739+
);
740+
(
741+
r#type.unwrap_or(inferred_type),
742+
column.unwrap_or(inferred_column),
743+
)
744+
};
745+
709746
let select_cols = select.as_deref().unwrap_or("*");
710747

711-
let sql = match r#type.as_str() {
748+
let sql = match resolved_type.as_str() {
712749
"bm25" => {
713750
let bm25_columns = match select.as_deref() {
714751
Some(cols) => format!("{}, score", cols),
@@ -717,8 +754,8 @@ fn main() {
717754
format!(
718755
"SELECT {} FROM bm25_search('{}', '{}', '{}') ORDER BY score DESC LIMIT {}",
719756
bm25_columns,
720-
table.replace('\'', "''"),
721-
column.replace('\'', "''"),
757+
normalized_table.replace('\'', "''"),
758+
resolved_column.replace('\'', "''"),
722759
query.replace('\'', "''"),
723760
limit,
724761
)
@@ -728,9 +765,9 @@ fn main() {
728765
"vector" => format!(
729766
"SELECT {}, vector_distance({}, '{}') AS dist FROM {} ORDER BY dist LIMIT {}",
730767
select_cols,
731-
column,
768+
resolved_column,
732769
query.replace('\'', "''"),
733-
table,
770+
normalized_table,
734771
limit,
735772
),
736773
_ => unreachable!(),

0 commit comments

Comments
 (0)