Skip to content

Commit d05950e

Browse files
committed
chore: regresql-stub
1 parent 2fe33db commit d05950e

4 files changed

Lines changed: 113 additions & 13 deletions

File tree

crates/qshape-cli/src/attribute.rs

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,22 @@
22
//! on each cluster's canonical and walking the plan tree
33
44
use std::collections::HashMap;
5-
use std::fs;
6-
use std::io::{self, Read, Write};
5+
use std::io::{self, Write};
76
use std::sync::OnceLock;
87

98
use anyhow::{Context, Result, anyhow};
109
use postgres::{Client, NoTls, SimpleQueryMessage};
1110
use qshape_core::{
12-
CURRENT_SCHEMA_VERSION, ParamAttribution, cast_func_param_refs, load_clusters_doc,
13-
max_param_number, normalize,
11+
CURRENT_SCHEMA_VERSION, ParamAttribution, cast_func_param_refs, max_param_number, normalize,
1412
};
1513
use regex::Regex;
1614
use serde::Deserialize;
1715

16+
use crate::loader::load_clusters;
1817
use crate::typecast::TypecastCache;
1918

2019
pub fn run(in_path: Option<&str>, conn_str: &str, top: usize) -> Result<()> {
21-
let bytes = match in_path {
22-
Some(p) => fs::read(p).with_context(|| format!("read {p}"))?,
23-
None => {
24-
let mut buf = Vec::new();
25-
io::stdin().read_to_end(&mut buf).context("read stdin")?;
26-
buf
27-
}
28-
};
29-
let mut doc = load_clusters_doc(&bytes).context("decode clusters.json")?;
20+
let mut doc = load_clusters(in_path)?;
3021

3122
let mut client = Client::connect(conn_str, NoTls).context("connect")?;
3223
let mut cache = TypecastCache::new(&mut client);

crates/qshape-cli/src/loader.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
//! Shared CLI helpers
2+
3+
use std::fs;
4+
use std::io::{self, Read};
5+
6+
use anyhow::{Context, Result};
7+
use qshape_core::{ClustersDoc, load_clusters_doc};
8+
9+
/// Load and validate clusters.json from `--in <path>` or stdin
10+
pub fn load_clusters(in_path: Option<&str>) -> Result<ClustersDoc> {
11+
let bytes = match in_path {
12+
Some(p) => fs::read(p).with_context(|| format!("read {p}"))?,
13+
None => {
14+
let mut buf = Vec::new();
15+
io::stdin().read_to_end(&mut buf).context("read stdin")?;
16+
buf
17+
}
18+
};
19+
load_clusters_doc(&bytes).context("decode clusters.json")
20+
}

crates/qshape-cli/src/main.rs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ use clap::{Parser, Subcommand};
55

66
mod attribute;
77
mod capture;
8+
mod loader;
9+
mod regresql_stub;
810
mod typecast;
911

1012
#[derive(Debug, Parser)]
@@ -53,6 +55,26 @@ enum Command {
5355
top: usize,
5456
},
5557

58+
/// Generate regresql sql/ skeletons from clusters.json
59+
///
60+
/// For each of the top N clusters, writes <out>/sql/<slug>.sql with
61+
/// the canonical SQL ($N rewritten to :paramN) and a header carrying
62+
/// fingerprint + call count
63+
RegresqlStub {
64+
/// Input clusters.json (default: stdin)
65+
#[arg(long)]
66+
r#in: Option<String>,
67+
/// Output directory
68+
#[arg(long, default_value = "regresql-stubs")]
69+
out: String,
70+
/// Number of top clusters to emit
71+
#[arg(long, default_value_t = 10)]
72+
top: usize,
73+
/// Skip clusters with total_calls <= this
74+
#[arg(long, default_value_t = 0)]
75+
min_calls: i64,
76+
},
77+
5678
/// Fetch pg_stat_statements (with timing) from a live PG and cluster it
5779
Capture {
5880
/// libpq connection string.
@@ -98,6 +120,9 @@ fn main() -> Result<()> {
98120
Command::Attribute { conn, r#in, top } => {
99121
attribute::run(r#in.as_deref(), &conn, top)?;
100122
}
123+
Command::RegresqlStub { r#in, out, top, min_calls } => {
124+
regresql_stub::run(r#in.as_deref(), &out, top, min_calls)?;
125+
}
101126
}
102127

103128
Ok(())
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
//! Generate regresql `sql/` stubs for the top-N clusters
2+
3+
use std::fs;
4+
use std::path::Path;
5+
use std::sync::OnceLock;
6+
7+
use anyhow::{Context, Result};
8+
use qshape_core::Cluster;
9+
use regex::Regex;
10+
11+
use crate::loader::load_clusters;
12+
13+
pub fn run(in_path: Option<&str>, out_dir: &str, top: usize, min_calls: i64) -> Result<()> {
14+
let doc = load_clusters(in_path)?;
15+
16+
let sql_dir = Path::new(out_dir).join("sql");
17+
fs::create_dir_all(&sql_dir).with_context(|| format!("create {}", sql_dir.display()))?;
18+
19+
let mut emitted = 0usize;
20+
for c in &doc.clusters {
21+
if emitted >= top {
22+
break;
23+
}
24+
if c.fingerprint.is_empty() || c.total_calls <= min_calls {
25+
continue;
26+
}
27+
emitted += 1;
28+
let slug = stub_slug(emitted, &c.fingerprint);
29+
let sql = rewrite_params(&c.canonical);
30+
let path = sql_dir.join(format!("{slug}.sql"));
31+
write_sql_stub(&path, &slug, c, &sql)?;
32+
}
33+
34+
eprintln!("wrote {emitted} stubs to {out_dir}");
35+
Ok(())
36+
}
37+
38+
fn stub_slug(rank: usize, fp: &str) -> String {
39+
let body = fp.strip_prefix("sha1:").unwrap_or(fp);
40+
let prefix: String = body.chars().take(8).collect();
41+
format!("q{rank:02}-{prefix}")
42+
}
43+
44+
// $N to:paramN
45+
fn rewrite_params(sql: &str) -> String {
46+
static R: OnceLock<Regex> = OnceLock::new();
47+
let re = R.get_or_init(|| Regex::new(r"\$(\d+)").expect("static regex"));
48+
re.replace_all(sql, ":param$1").into_owned()
49+
}
50+
51+
fn write_sql_stub(path: &Path, slug: &str, c: &Cluster, sql: &str) -> Result<()> {
52+
let trailing = if sql.ends_with('\n') { "" } else { "\n" };
53+
let content = format!(
54+
"-- name: {slug}\n\
55+
-- Generated from qshape cluster {fp}\n\
56+
-- Total calls (prod): {tc} across {n} member variants\n\
57+
-- TODO: rename this slug, review canonical SQL, replace :paramN with meaningful names\n\
58+
{sql}{trailing}",
59+
fp = c.fingerprint,
60+
tc = c.total_calls,
61+
n = c.members.len(),
62+
);
63+
fs::write(path, content).with_context(|| format!("write {}", path.display()))
64+
}

0 commit comments

Comments
 (0)