diff --git a/Cargo.toml b/Cargo.toml index a2d04c3..02bfa54 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,6 +29,11 @@ exclude = [ name = "sql-splitter" path = "src/main.rs" +[features] +default = ["duckdb-query", "compression"] +duckdb-query = ["dep:duckdb", "compression"] +compression = ["dep:flate2", "dep:bzip2", "dep:xz2", "dep:zstd"] + [dependencies] clap = { version = "4", features = ["derive"] } clap_complete = "4" @@ -38,10 +43,10 @@ memchr = "2" once_cell = "1" ahash = "0.8" anyhow = "1" -flate2 = "1" -bzip2 = "0.6" -xz2 = "0.1" -zstd = "0.13" +flate2 = { version = "1", optional = true } +bzip2 = { version = "0.6", optional = true } +xz2 = { version = "0.1", optional = true } +zstd = { version = "0.13", optional = true } indicatif = "0.18" rand = "0.10" smallvec = { version = "1.13", features = ["union"] } @@ -53,7 +58,7 @@ serde_yaml_ng = "0.10" fake = { version = "5", features = ["derive"] } sha2 = "0.11" hex = "0.4" -duckdb = { version = "1.10502", features = ["bundled"] } +duckdb = { version = "1.10502", features = ["bundled"], optional = true } dirs = "6" rustyline = "17" schemars = "1" diff --git a/README.md b/README.md index 1ce4b4d..895fa1e 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,19 @@ Split large SQL dump files into individual table files. Fast, memory-efficient, cargo install sql-splitter ``` +### Cargo features (library consumers) + +`sql-splitter` enables all features by default. To reduce dependency footprint: + +```toml +sql-splitter = { version = "1", default-features = false } +``` + +Optional features: + +- `compression` (gzip/bzip2/xz/zstd support) +- `duckdb-query` (enables the `query` command and DuckDB integration) + ### From source ```bash diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs index ae7a388..86e16a9 100644 --- a/src/cmd/mod.rs +++ b/src/cmd/mod.rs @@ -5,6 +5,7 @@ mod glob_util; pub(crate) mod graph; pub(crate) mod merge; mod order; +#[cfg(feature = "duckdb-query")] mod query; pub(crate) mod redact; pub(crate) mod sample; @@ -700,6 +701,7 @@ pub enum Commands { }, /// Query SQL dumps using DuckDB's analytical engine + #[cfg(feature = "duckdb-query")] #[command(visible_alias = "qy")] #[command(after_help = "\x1b[1mExamples:\x1b[0m sql-splitter query dump.sql \"SELECT COUNT(*) FROM users\" @@ -1055,6 +1057,7 @@ pub fn run(cli: Cli) -> anyhow::Result<()> { dry_run, reverse, } => order::run(file, output, dialect, check, dry_run, reverse), + #[cfg(feature = "duckdb-query")] Commands::Query(args) => query::run(args), Commands::Schema { output, diff --git a/src/duckdb/mod.rs b/src/duckdb/mod.rs index 1c1e09f..c222724 100644 --- a/src/duckdb/mod.rs +++ b/src/duckdb/mod.rs @@ -5,7 +5,7 @@ //! //! # Features //! -//! - **Zero dependencies**: DuckDB is bundled and compiled into sql-splitter +//! - **Bundled engine**: DuckDB is embedded via the optional `duckdb-query` feature //! - **Multi-dialect support**: MySQL, PostgreSQL, and SQLite dumps //! - **Memory management**: Auto-switches to disk mode for large dumps //! - **Caching**: Optional persistent cache for repeated queries diff --git a/src/lib.rs b/src/lib.rs index 015bee1..73c5c6c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ pub mod analyzer; pub mod cmd; pub mod convert; pub mod differ; +#[cfg(feature = "duckdb-query")] pub mod duckdb; pub mod graph; pub mod json_schema; diff --git a/src/main.rs b/src/main.rs index 9edc912..7cf9ffc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,7 @@ mod analyzer; mod cmd; mod convert; mod differ; +#[cfg(feature = "duckdb-query")] mod duckdb; mod graph; mod json_schema; diff --git a/src/splitter/mod.rs b/src/splitter/mod.rs index 68c1e97..80dc809 100644 --- a/src/splitter/mod.rs +++ b/src/splitter/mod.rs @@ -70,10 +70,21 @@ impl Compression { ) -> std::io::Result> { Ok(match self { Compression::None => reader, + #[cfg(feature = "compression")] Compression::Gzip => Box::new(flate2::read::GzDecoder::new(reader)), + #[cfg(feature = "compression")] Compression::Bzip2 => Box::new(bzip2::read::BzDecoder::new(reader)), + #[cfg(feature = "compression")] Compression::Xz => Box::new(xz2::read::XzDecoder::new(reader)), + #[cfg(feature = "compression")] Compression::Zstd => Box::new(zstd::stream::read::Decoder::new(reader)?), + #[cfg(not(feature = "compression"))] + _ => { + return Err(std::io::Error::new( + std::io::ErrorKind::Unsupported, + "compressed input requires the `compression` feature", + )) + } }) } } diff --git a/tests/splitter_unit_test.rs b/tests/splitter_unit_test.rs index 6fd41ca..e73f25d 100644 --- a/tests/splitter_unit_test.rs +++ b/tests/splitter_unit_test.rs @@ -110,6 +110,7 @@ fn test_splitter_data_only() { } #[test] +#[cfg(feature = "compression")] fn test_splitter_gzip_compressed() { use flate2::write::GzEncoder; use flate2::Compression as GzCompression; @@ -135,6 +136,29 @@ fn test_splitter_gzip_compressed() { assert!(output_dir.join("users.sql").exists()); } +#[test] +#[cfg(not(feature = "compression"))] +fn test_splitter_compressed_input_requires_feature() { + let temp_dir = TempDir::new().unwrap(); + let input_file = temp_dir.path().join("input.sql.gz"); + let output_dir = temp_dir.path().join("output"); + + std::fs::write( + &input_file, + b"\x1f\x8b\x08\x00\\\x06\x03j\x02\xffs\x0eru\x0cqU\x08qt\xf2qU(-N-*V\xd0\xc8LQ\xf0\xf4\x0b\xd1\xb4\xe6\x02\x00^\xb7Dc\x1d\x00\x00\x00", + ) + .unwrap(); + + let splitter = Splitter::new(input_file, output_dir); + let err = match splitter.split() { + Ok(_) => panic!("expected compressed input to fail without `compression` feature"), + Err(err) => err, + }; + assert!(err + .chain() + .any(|cause| cause.to_string() == "compressed input requires the `compression` feature")); +} + #[test] fn test_compression_detection() { assert_eq!(