Skip to content

Commit 934b0ff

Browse files
committed
feat(search): add fuzzy search and "did you mean?" suggestions
1 parent 084979d commit 934b0ff

8 files changed

Lines changed: 207 additions & 27 deletions

File tree

Cargo.lock

Lines changed: 11 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ libsqlite3-sys = { version = ">=0.30.1,<0.36.0", features = [ "bundled" ]}
4343
miette = { version = "7.6.0", features = ["fancy"] }
4444
minisign-verify = "0.2.4"
4545
nix = { version = "0.30.1", features = ["fs", "ioctl", "term", "user"] }
46+
nucleo-matcher = "0.3.1"
4647
nu-ansi-term = "0.50.3"
4748
once_cell = "1.21"
4849
percent-encoding = "2.3.2"

crates/soar-cli/src/inspect.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use soar_db::repository::{
1111
metadata::MetadataRepository,
1212
};
1313
use soar_dl::http_client::SHARED_AGENT;
14+
use soar_operations::search;
1415
use soar_utils::bytes::format_bytes;
1516
use tracing::{error, info};
1617
use ureq::http::header::CONTENT_LENGTH;
@@ -115,6 +116,11 @@ pub async fn inspect_log(package: &str, inspect_type: InspectType) -> SoarResult
115116

116117
if packages.is_empty() {
117118
error!("Package {} not found", package);
119+
if let Ok(suggestions) = search::suggest_similar(&ctx, package, 3).await {
120+
if !suggestions.is_empty() {
121+
info!("Did you mean: {}?", suggestions.join(", "));
122+
}
123+
}
118124
} else {
119125
let selected_pkg = if packages.len() > 1 {
120126
&select_package_interactively(packages, &query.name.unwrap_or(package.to_string()))?

crates/soar-cli/src/install.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
use nu_ansi_term::Color::{Blue, Cyan, Green, Magenta, Red, Yellow};
22
use soar_core::{package::install::InstallTarget, SoarResult};
3-
use soar_operations::{install, InstallOptions, InstallReport, ResolveResult, SoarContext};
3+
use soar_operations::{install, search, InstallOptions, InstallReport, ResolveResult, SoarContext};
44
use tabled::{
55
builder::Builder,
66
settings::{themes::BorderCorrection, Panel, Style},
@@ -89,6 +89,11 @@ pub async fn install_packages(
8989
}
9090
ResolveResult::NotFound(name) => {
9191
error!("Package {} not found", name);
92+
if let Ok(suggestions) = search::suggest_similar(ctx, &name, 3).await {
93+
if !suggestions.is_empty() {
94+
info!("Did you mean: {}?", suggestions.join(", "));
95+
}
96+
}
9297
}
9398
ResolveResult::AlreadyInstalled {
9499
pkg_name,
@@ -177,6 +182,11 @@ async fn install_with_show(
177182
}
178183
ResolveResult::NotFound(name) => {
179184
error!("Package {} not found", name);
185+
if let Ok(suggestions) = search::suggest_similar(ctx, &name, 3).await {
186+
if !suggestions.is_empty() {
187+
info!("Did you mean: {}?", suggestions.join(", "));
188+
}
189+
}
180190
}
181191
ResolveResult::AlreadyInstalled {
182192
pkg_name,
@@ -250,7 +260,13 @@ async fn install_with_show(
250260
};
251261

252262
if repo_pkgs.is_empty() {
253-
error!("Package {} not found", query.name.as_ref().unwrap());
263+
let name = query.name.as_ref().unwrap();
264+
error!("Package {} not found", name);
265+
if let Ok(suggestions) = search::suggest_similar(ctx, name, 3).await {
266+
if !suggestions.is_empty() {
267+
info!("Did you mean: {}?", suggestions.join(", "));
268+
}
269+
}
254270
continue;
255271
}
256272

crates/soar-db/src/models/metadata.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,18 @@ pub struct PackageListing {
137137
pub version: String,
138138
}
139139

140+
/// Minimal package data for fuzzy search candidate matching.
141+
/// Only fetches fields needed for matching, avoiding expensive JSON deserialization.
142+
#[derive(Debug, Clone, Queryable, Selectable)]
143+
#[diesel(table_name = packages)]
144+
#[diesel(check_for_backend(diesel::sqlite::Sqlite))]
145+
pub struct FuzzyCandidate {
146+
pub id: i32,
147+
pub pkg_name: String,
148+
pub pkg_id: String,
149+
pub description: Option<String>,
150+
}
151+
140152
/// Package with repository name attached.
141153
/// This is used when querying across multiple repositories.
142154
#[derive(Debug, Clone)]

crates/soar-db/src/repository/metadata.rs

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ use super::core::SortDirection;
1515
use crate::{
1616
models::{
1717
metadata::{
18-
Maintainer, NewMaintainer, NewPackage, NewPackageMaintainer, NewRepository, Package,
19-
PackageListing,
18+
FuzzyCandidate, Maintainer, NewMaintainer, NewPackage, NewPackageMaintainer,
19+
NewRepository, Package, PackageListing,
2020
},
2121
types::PackageProvide,
2222
},
@@ -61,6 +61,26 @@ impl MetadataRepository {
6161
result
6262
}
6363

64+
/// Loads lightweight package data for fuzzy matching.
65+
/// Returns only (id, pkg_name, pkg_id, description) to minimize memory and deserialization.
66+
pub fn load_fuzzy_candidates(conn: &mut SqliteConnection) -> QueryResult<Vec<FuzzyCandidate>> {
67+
trace!("loading fuzzy candidates");
68+
packages::table
69+
.select(FuzzyCandidate::as_select())
70+
.load(conn)
71+
}
72+
73+
/// Fetches full package details for a set of package IDs.
74+
pub fn find_by_ids(conn: &mut SqliteConnection, ids: &[i32]) -> QueryResult<Vec<Package>> {
75+
if ids.is_empty() {
76+
return Ok(Vec::new());
77+
}
78+
packages::table
79+
.filter(packages::id.eq_any(ids))
80+
.select(Package::as_select())
81+
.load(conn)
82+
}
83+
6484
/// Lists packages with pagination and sorting using Diesel DSL.
6585
pub fn list_paginated(
6686
conn: &mut SqliteConnection,

crates/soar-operations/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ categories.workspace = true
1212
[dependencies]
1313
fast-glob = { workspace = true }
1414
minisign-verify = { workspace = true }
15+
nucleo-matcher = { workspace = true }
1516
once_cell = { workspace = true }
1617
rayon = { workspace = true }
1718
soar-config = { workspace = true }

crates/soar-operations/src/search.rs

Lines changed: 136 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,26 @@
11
use std::collections::HashMap;
22

3+
use nucleo_matcher::{
4+
pattern::{CaseMatching, Normalization, Pattern},
5+
Config, Matcher, Utf32String,
6+
};
37
use rayon::iter::{IntoParallelIterator, ParallelIterator};
48
use soar_config::config::get_config;
59
use soar_core::{database::models::Package, package::query::PackageQuery, SoarResult};
6-
use soar_db::repository::{
7-
core::{CoreRepository, SortDirection},
8-
metadata::MetadataRepository,
10+
use soar_db::{
11+
models::metadata::FuzzyCandidate,
12+
repository::{
13+
core::{CoreRepository, SortDirection},
14+
metadata::MetadataRepository,
15+
},
916
};
1017
use tracing::{debug, trace};
1118

1219
use crate::{SearchEntry, SearchResult, SoarContext};
1320

1421
/// Search for packages across all repositories.
22+
///
23+
/// Uses fuzzy matching by default. Falls back to SQL LIKE for case-sensitive searches.
1524
pub async fn search_packages(
1625
ctx: &SoarContext,
1726
query: &str,
@@ -24,27 +33,27 @@ pub async fn search_packages(
2433
limit = ?limit,
2534
"searching packages"
2635
);
36+
2737
let metadata_mgr = ctx.metadata_manager().await?;
2838
let diesel_db = ctx.diesel_core_db()?;
39+
let search_limit = limit.or(get_config().search_limit).unwrap_or(20);
2940

30-
let search_limit = limit.or(get_config().search_limit).unwrap_or(20) as i64;
31-
trace!(search_limit = search_limit, "using search limit");
32-
33-
let packages: Vec<Package> = metadata_mgr.query_all_flat(|repo_name, conn| {
34-
let pkgs = if case_sensitive {
35-
MetadataRepository::search_case_sensitive(conn, query, Some(search_limit))?
36-
} else {
37-
MetadataRepository::search(conn, query, Some(search_limit))?
38-
};
39-
Ok(pkgs
40-
.into_iter()
41-
.map(|p| {
42-
let mut pkg: Package = p.into();
43-
pkg.repo_name = repo_name.to_string();
44-
pkg
45-
})
46-
.collect())
47-
})?;
41+
let packages: Vec<Package> = if case_sensitive {
42+
let sql_limit = search_limit as i64;
43+
metadata_mgr.query_all_flat(|repo_name, conn| {
44+
let pkgs = MetadataRepository::search_case_sensitive(conn, query, Some(sql_limit))?;
45+
Ok(pkgs
46+
.into_iter()
47+
.map(|p| {
48+
let mut pkg: Package = p.into();
49+
pkg.repo_name = repo_name.to_string();
50+
pkg
51+
})
52+
.collect())
53+
})?
54+
} else {
55+
fuzzy_search(ctx, query, search_limit).await?
56+
};
4857

4958
let installed_pkgs: HashMap<(String, String, String), bool> = diesel_db
5059
.with_conn(|conn| {
@@ -58,15 +67,14 @@ pub async fn search_packages(
5867

5968
let entries: Vec<SearchEntry> = packages
6069
.into_iter()
61-
.take(search_limit as usize)
70+
.take(search_limit)
6271
.map(|package| {
6372
let key = (
6473
package.repo_name.clone(),
6574
package.pkg_id.clone(),
6675
package.pkg_name.clone(),
6776
);
6877
let installed = installed_pkgs.get(&key).copied().unwrap_or(false);
69-
7078
SearchEntry {
7179
package,
7280
installed,
@@ -80,6 +88,111 @@ pub async fn search_packages(
8088
})
8189
}
8290

91+
/// Returns top fuzzy-matched packages across all repositories.
92+
async fn fuzzy_search(ctx: &SoarContext, query: &str, limit: usize) -> SoarResult<Vec<Package>> {
93+
let metadata_mgr = ctx.metadata_manager().await?;
94+
95+
let candidates: Vec<(String, FuzzyCandidate)> =
96+
metadata_mgr.query_all_flat(|repo_name, conn| {
97+
let items = MetadataRepository::load_fuzzy_candidates(conn)?;
98+
Ok(items
99+
.into_iter()
100+
.map(|c| (repo_name.to_string(), c))
101+
.collect())
102+
})?;
103+
104+
let scored = score_candidates(query, &candidates);
105+
let top: Vec<_> = scored.into_iter().take(limit).collect();
106+
107+
let mut repo_ids: HashMap<&str, Vec<i32>> = HashMap::new();
108+
for &(_, idx) in &top {
109+
let (repo_name, candidate) = &candidates[idx];
110+
repo_ids
111+
.entry(repo_name.as_str())
112+
.or_default()
113+
.push(candidate.id);
114+
}
115+
116+
let mut full_packages: HashMap<(String, i32), Package> = HashMap::new();
117+
for (repo_name, ids) in &repo_ids {
118+
if let Some(pkgs) =
119+
metadata_mgr.query_repo(repo_name, |conn| MetadataRepository::find_by_ids(conn, ids))?
120+
{
121+
for p in pkgs {
122+
let db_id = p.id;
123+
let mut pkg: Package = p.into();
124+
pkg.repo_name = repo_name.to_string();
125+
full_packages.insert((repo_name.to_string(), db_id), pkg);
126+
}
127+
}
128+
}
129+
130+
let packages: Vec<Package> = top
131+
.into_iter()
132+
.filter_map(|(_, idx)| {
133+
let (repo_name, candidate) = &candidates[idx];
134+
full_packages.remove(&(repo_name.clone(), candidate.id))
135+
})
136+
.collect();
137+
138+
Ok(packages)
139+
}
140+
141+
/// Suggest similar package names for "did you mean?" messages.
142+
pub async fn suggest_similar(
143+
ctx: &SoarContext,
144+
query: &str,
145+
max: usize,
146+
) -> SoarResult<Vec<String>> {
147+
let metadata_mgr = ctx.metadata_manager().await?;
148+
149+
let candidates: Vec<(String, FuzzyCandidate)> =
150+
metadata_mgr.query_all_flat(|repo_name, conn| {
151+
let items = MetadataRepository::load_fuzzy_candidates(conn)?;
152+
Ok(items
153+
.into_iter()
154+
.map(|c| (repo_name.to_string(), c))
155+
.collect())
156+
})?;
157+
158+
let scored = score_candidates(query, &candidates);
159+
160+
let suggestions: Vec<String> = scored
161+
.into_iter()
162+
.take(max)
163+
.map(|(_, idx)| {
164+
let (_, candidate) = &candidates[idx];
165+
candidate.pkg_name.clone()
166+
})
167+
.collect();
168+
169+
Ok(suggestions)
170+
}
171+
172+
fn score_candidates(query: &str, candidates: &[(String, FuzzyCandidate)]) -> Vec<(u32, usize)> {
173+
let mut matcher = Matcher::new(Config::DEFAULT);
174+
let pattern = Pattern::parse(query, CaseMatching::Ignore, Normalization::Smart);
175+
176+
let mut scored: Vec<(u32, usize)> = Vec::new();
177+
178+
for (idx, (_repo_name, candidate)) in candidates.iter().enumerate() {
179+
let name_buf = Utf32String::from(candidate.pkg_name.as_str());
180+
let name_score = pattern.score(name_buf.slice(..), &mut matcher);
181+
182+
let id_buf = Utf32String::from(candidate.pkg_id.as_str());
183+
let id_score = pattern.score(id_buf.slice(..), &mut matcher);
184+
185+
let best_score = [name_score, id_score].into_iter().flatten().max();
186+
187+
if let Some(score) = best_score {
188+
scored.push((score, idx));
189+
}
190+
}
191+
192+
scored.sort_by(|a, b| b.0.cmp(&a.0));
193+
scored
194+
}
195+
83196
/// Query detailed package information.
84197
///
85198
/// Accepts query strings in the format `name#pkg_id@version:repo`.

0 commit comments

Comments
 (0)