Skip to content

Commit ee91f91

Browse files
committed
Restructure CLI into modular architecture with quality improvements
Reorganise the monolithic main.rs into a multi-module layout and add comprehensive feature work and quality improvements: Architecture: - Split into src/commands/{search,account,locations,archive,login}.rs - Add src/{config,error,params,jq,output}.rs as focused utility modules - Expose library interface via src/lib.rs for integration tests - Add [lib]/[[bin]] split in Cargo.toml Features: - search: pagination with --all-pages/--max-pages and cycle detection - login: interactive authentication with saved config (~/.config/serpapi) - locations: public endpoint, no auth required - archive: retrieve cached search results by ID - jq: client-side jq filtering via pure-Rust jaq library - output: colored JSON in TTY, plain JSON in pipes (IsTerminal detection) - params: key=value CLI argument parsing with fields/json_restrictor support Security: - Redact api_key= from network error messages printed to stderr - Create config directory with 0o700 permissions (unix) - Use rpassword for echo-free API key entry in login - Atomic config writes (tmp file + rename); clean up temp on failure - Re-inject api_key into every pagination URL; reject api_key= as param - 30s tokio::time::timeout on all API calls Performance: - current_thread Tokio runtime (sequential CLI needs no thread pool) - make_client created once per paginated search (reuses TLS connection) - BTreeMap for canonical URL param key (no sort-buffer Vec) - Single HashSet::insert for cycle detection (was contains + insert) - params_to_hashmap takes Vec<Param> by value; jq::apply takes Value by value Modernization: - thiserror derive for CliError; remove manual Display/Error impls - network_err() helper replaces 6 identical map_err closures - API_KEY_PARAM constant; HashMap::from in make_client - save_config returns CliError directly; login uses check_api_error - let-else in jq.rs; std::iter; Login handled inline in match arm - print_jq_value accepts &mut impl Write for testability Dependencies: - clap 4.5, tokio 1 (rt + time + macros), thiserror 2, rpassword 7 - jaq-core/interpret 1.5, jaq-std 1.6; remove dead direct reqwest dep - Fat LTO + single CGU + symbol stripping in dist profile - rust-version = 1.70 MSRV
1 parent 3e22a22 commit ee91f91

File tree

15 files changed

+1222
-709
lines changed

15 files changed

+1222
-709
lines changed

Cargo.lock

Lines changed: 525 additions & 508 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,59 @@
22
name = "serpapi-cli"
33
version = "0.1.0"
44
edition = "2021"
5+
rust-version = "1.70"
56
authors = ["Thomas Hurst <thomas@serpapi.com>"]
67
license = "MIT"
8+
description = "Official CLI client for SerpApi — fast search scraping for humans and AI agents"
9+
repository = "https://github.com/serpapi/serpapi-cli"
10+
homepage = "https://serpapi.com"
11+
documentation = "https://github.com/serpapi/serpapi-cli#readme"
12+
readme = "README.md"
13+
keywords = ["serpapi", "search", "scraping", "cli", "google"]
14+
categories = ["command-line-utilities", "web-programming::http-client"]
15+
include = [
16+
"src/**/*",
17+
"tests/**/*",
18+
"README.md",
19+
"LICENSE",
20+
"Cargo.toml",
21+
]
722

8-
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
23+
[lib]
24+
path = "src/lib.rs"
25+
26+
[[bin]]
27+
name = "serpapi"
28+
path = "src/main.rs"
929

1030
[dependencies]
11-
clap = { version = "4.4.11", features = ["derive", "env"] }
12-
color-eyre = "0.6.2"
13-
colored_json = "4.1.0"
14-
html2text = { version = "0.6.0", features = ["ansi_colours"] }
15-
reqwest = "0.11.22"
16-
serde = { version = "1.0.193", features = ["derive"] }
17-
serde_json = "1.0.108"
18-
serde_json_path = "0.6.4"
19-
serpapi = { version = "0.1.0", git = "https://github.com/serpapi/serpapi-rust.git", branch="serpapi-cli" }
20-
termion = "2.0.3"
21-
textwrap = "0.16.0"
22-
tokio = { version = "1.35.0", features = ["full"] }
23-
zstd = "0.13.0"
31+
clap = { version = "4.5", features = ["derive", "env"] }
32+
colored_json = "4.1"
33+
dirs = "5"
34+
serde = { version = "1.0", features = ["derive"] }
35+
serde_json = "1.0"
36+
serpapi = "1.1.0"
37+
thiserror = "2"
38+
rpassword = "7"
39+
tokio = { version = "1", features = ["rt", "time", "macros"] }
40+
toml = "0.8"
41+
url = "2"
42+
jaq-core = "1.5"
43+
jaq-interpret = "1.5"
44+
jaq-parse = "1.0"
45+
jaq-std = "1.6"
2446

2547
[package.metadata.docs.rs]
26-
# Docs are identical across targets.
2748
targets = []
2849

50+
51+
[dev-dependencies]
52+
assert_cmd = "2"
53+
predicates = "3"
54+
55+
# The profile that 'dist' will build with
56+
[profile.dist]
57+
inherits = "release"
58+
lto = true
59+
codegen-units = 1
60+
strip = "symbols"

src/commands/account.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
use crate::commands::{check_api_error, make_client, network_err};
2+
use crate::error::CliError;
3+
use serde_json::Value;
4+
use std::collections::HashMap;
5+
6+
/// Fetch and return the account information for the given API key.
7+
pub async fn run(api_key: &str) -> Result<Value, CliError> {
8+
let client = make_client(api_key)?;
9+
let result = client
10+
.account(HashMap::new())
11+
.await
12+
.map_err(network_err)?;
13+
check_api_error(result)
14+
}

src/commands/archive.rs

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
use crate::commands::{check_api_error, make_client, network_err};
2+
use crate::error::CliError;
3+
use serde_json::Value;
4+
5+
/// Retrieve a previously cached search result from the SerpApi archive by its ID.
6+
pub async fn run(id: &str, api_key: &str) -> Result<Value, CliError> {
7+
let client = make_client(api_key)?;
8+
let result = client
9+
.search_archive(id)
10+
.await
11+
.map_err(network_err)?;
12+
check_api_error(result)
13+
}

src/commands/locations.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
use crate::commands::{check_api_error, make_client, network_err};
2+
use crate::error::CliError;
3+
use crate::params::{self, Param};
4+
use serde_json::Value;
5+
6+
/// Search the SerpApi locations index using the provided parameters.
7+
pub async fn run(params: Vec<Param>) -> Result<Value, CliError> {
8+
let params_map = params::params_to_hashmap(params);
9+
// Locations endpoint is public – no API key needed.
10+
let client = make_client("")?;
11+
let result = client
12+
.location(params_map)
13+
.await
14+
.map_err(network_err)?;
15+
check_api_error(result)
16+
}

src/commands/login.rs

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
use crate::commands::{check_api_error, make_client, network_err};
2+
use crate::config;
3+
use crate::error::CliError;
4+
use std::collections::HashMap;
5+
6+
/// Prompt the user for their SerpApi API key, verify it, and persist it to the config file.
7+
pub async fn run() -> Result<(), CliError> {
8+
let api_key = rpassword::prompt_password("Enter your SerpApi API key: ")
9+
.map_err(|e| CliError::UsageError {
10+
message: format!("Failed to read input: {e}"),
11+
})?;
12+
let api_key = api_key.trim();
13+
14+
let client = make_client(api_key)?;
15+
let result = client
16+
.account(HashMap::new())
17+
.await
18+
.map_err(network_err)?;
19+
20+
let result = check_api_error(result)?;
21+
let email = result
22+
.get("account_email")
23+
.and_then(|v| v.as_str())
24+
.unwrap_or("unknown");
25+
config::save_config(api_key)?;
26+
eprintln!(
27+
"Logged in as {email}. API key saved to {:?}",
28+
config::config_path()
29+
);
30+
Ok(())
31+
}

src/commands/mod.rs

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
use serde_json::Value;
2+
use serpapi::serpapi::Client;
3+
use std::collections::HashMap;
4+
5+
use crate::error::CliError;
6+
7+
pub mod search;
8+
pub mod account;
9+
pub mod locations;
10+
pub mod archive;
11+
pub mod login;
12+
13+
/// The query-parameter name used to pass the SerpApi key to every request.
14+
pub(crate) const API_KEY_PARAM: &str = "api_key";
15+
16+
/// Convert a `Box<dyn Error>` from the serpapi client into a [`CliError::NetworkError`].
17+
pub(crate) fn network_err(e: Box<dyn std::error::Error>) -> CliError {
18+
CliError::NetworkError { message: e.to_string() }
19+
}
20+
21+
/// Build a `serpapi::Client` authenticated with the given API key.
22+
pub fn make_client(api_key: &str) -> Result<Client, CliError> {
23+
let params = HashMap::from([(API_KEY_PARAM.to_string(), api_key.to_string())]);
24+
Client::new(params).map_err(|e: Box<dyn std::error::Error>| CliError::NetworkError {
25+
message: e.to_string(),
26+
})
27+
}
28+
29+
/// Inspect a successful API response and surface any embedded error field
30+
/// as a `CliError::ApiError`, passing through all other values unchanged.
31+
pub fn check_api_error(result: Value) -> Result<Value, CliError> {
32+
if let Value::Object(ref map) = result {
33+
if let Some(error_val) = map.get("error") {
34+
let message = error_val
35+
.as_str()
36+
.unwrap_or("Unknown API error")
37+
.to_string();
38+
return Err(CliError::ApiError { message });
39+
}
40+
}
41+
Ok(result)
42+
}

src/commands/search.rs

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
use std::collections::{HashMap, HashSet};
2+
use std::time::Duration;
3+
use url::Url;
4+
use crate::commands::{check_api_error, make_client, network_err, API_KEY_PARAM};
5+
use crate::error::CliError;
6+
use crate::params::{self, Param};
7+
use serde_json::Value;
8+
9+
/// Execute a SerpApi search, optionally accumulating all pages into a single result.
10+
pub async fn run(
11+
params: Vec<Param>,
12+
api_key: &str,
13+
fields: Option<&str>,
14+
all_pages: bool,
15+
max_pages: Option<usize>,
16+
) -> Result<Value, CliError> {
17+
let mut params_map = params::params_to_hashmap(params);
18+
params::apply_fields(&mut params_map, fields);
19+
20+
if !all_pages {
21+
let client = make_client(api_key)?;
22+
let result = tokio::time::timeout(
23+
Duration::from_secs(30),
24+
client.search(params_map),
25+
)
26+
.await
27+
.map_err(|_| CliError::NetworkError { message: "Request timed out after 30s".to_string() })?
28+
.map_err(network_err)?;
29+
return check_api_error(result);
30+
}
31+
32+
// Ensure api_key is in the initial params map so it survives page transitions.
33+
params_map.insert(API_KEY_PARAM.to_string(), api_key.to_string());
34+
35+
let client = make_client(api_key)?;
36+
let mut current_params = params_map;
37+
let mut accumulated: Option<Value> = None;
38+
let mut seen: HashSet<String> = HashSet::new();
39+
let mut pages_fetched: usize = 0;
40+
41+
loop {
42+
let result = tokio::time::timeout(
43+
Duration::from_secs(30),
44+
client.search(current_params.clone()),
45+
)
46+
.await
47+
.map_err(|_| CliError::NetworkError { message: "Request timed out after 30s".to_string() })?
48+
.map_err(network_err)?;
49+
let page = check_api_error(result)?;
50+
pages_fetched += 1;
51+
52+
let next_url = page
53+
.get("serpapi_pagination")
54+
.and_then(|p| p.get("next"))
55+
.and_then(|n| n.as_str())
56+
.map(String::from);
57+
58+
match &mut accumulated {
59+
None => accumulated = Some(page),
60+
Some(acc) => {
61+
if let (Value::Object(acc_map), Value::Object(page_map)) = (acc, &page) {
62+
for (key, val) in page_map {
63+
if let Value::Array(new_items) = val {
64+
// Intentionally only merge array fields across pages.
65+
// Scalar/object fields (search_metadata, pagination info, etc.)
66+
// are kept from the first page, as they describe the overall
67+
// query rather than per-page state.
68+
match acc_map.get_mut(key) {
69+
Some(Value::Array(existing)) => existing.extend(new_items.iter().cloned()),
70+
_ => { acc_map.insert(key.clone(), val.clone()); }
71+
}
72+
}
73+
}
74+
}
75+
}
76+
}
77+
78+
match next_url {
79+
None => break,
80+
Some(url) => {
81+
if max_pages.is_some_and(|limit| pages_fetched >= limit) {
82+
break;
83+
}
84+
let mut next_params = parse_next_params(&url)?;
85+
next_params.insert(API_KEY_PARAM.to_string(), api_key.to_string());
86+
let canonical = canonical_params_key(&next_params);
87+
if !seen.insert(canonical) {
88+
break;
89+
}
90+
current_params = next_params;
91+
}
92+
}
93+
}
94+
95+
// Strip the pagination metadata — it's misleading in a merged result.
96+
if let Some(Value::Object(ref mut map)) = accumulated {
97+
map.remove("serpapi_pagination");
98+
}
99+
100+
accumulated.ok_or_else(|| CliError::ApiError {
101+
message: "No results returned".to_string(),
102+
})
103+
}
104+
105+
pub(crate) fn parse_next_params(next_url: &str) -> Result<HashMap<String, String>, CliError> {
106+
let parsed = Url::parse(next_url).map_err(|e| CliError::NetworkError {
107+
message: format!("Invalid pagination URL: {e}"),
108+
})?;
109+
Ok(parsed
110+
.query_pairs()
111+
.map(|(k, v)| (k.into_owned(), v.into_owned()))
112+
.collect())
113+
}
114+
115+
/// Produce a stable, unambiguous key from query params regardless of original URL order.
116+
/// Values are percent-encoded so that `=` and `&` inside values cannot collide with
117+
/// the key=value and pair-joining separators.
118+
pub(crate) fn canonical_params_key(params: &HashMap<String, String>) -> String {
119+
let sorted: std::collections::BTreeMap<_, _> = params.iter().collect();
120+
url::form_urlencoded::Serializer::new(String::new())
121+
.extend_pairs(sorted)
122+
.finish()
123+
}
124+
125+
#[cfg(test)]
126+
mod tests {
127+
use super::*;
128+
129+
#[test]
130+
fn test_canonical_params_key_is_order_independent() {
131+
let mut a = HashMap::new();
132+
a.insert("q".to_string(), "test".to_string());
133+
a.insert("start".to_string(), "0".to_string());
134+
a.insert("api_key".to_string(), "abc".to_string());
135+
136+
let mut b = HashMap::new();
137+
b.insert("start".to_string(), "0".to_string());
138+
b.insert("api_key".to_string(), "abc".to_string());
139+
b.insert("q".to_string(), "test".to_string());
140+
141+
assert_eq!(canonical_params_key(&a), canonical_params_key(&b));
142+
}
143+
}

0 commit comments

Comments
 (0)