From 1ec9705f196328fc2e4beb2b56934a9806e82cae Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Thu, 24 Apr 2025 11:52:42 +1000 Subject: [PATCH 01/50] chore: generate.rs module for creating EQL test data --- .../cipherstash-proxy-integration/Cargo.toml | 10 +- .../src/generate.rs | 265 ++++++++++++++++++ .../cipherstash-proxy-integration/src/lib.rs | 1 + packages/cipherstash-proxy/Cargo.toml | 8 +- 4 files changed, 281 insertions(+), 3 deletions(-) create mode 100644 packages/cipherstash-proxy-integration/src/generate.rs diff --git a/packages/cipherstash-proxy-integration/Cargo.toml b/packages/cipherstash-proxy-integration/Cargo.toml index 546e1ff8..974f72ef 100644 --- a/packages/cipherstash-proxy-integration/Cargo.toml +++ b/packages/cipherstash-proxy-integration/Cargo.toml @@ -24,5 +24,13 @@ tracing-subscriber = { workspace = true } webpki-roots = "0.26.7" [dev-dependencies] +# cipherstash-client = { version = "0.18.0-pre.1", features = ["tokio"] } +cipherstash-client = { path = "/Users/tobyhede/src/cipherstash-suite/packages/cipherstash-client", features = [ + "tokio", +] } +# cipherstash-config = "0.2.3" +cipherstash-config = { path = "/Users/tobyhede/src/cipherstash-suite/packages/cipherstash-config" } clap = "4.5.32" -fake = { version = "4", features = ["derive"] } +fake = { version = "4", features = ["chrono", "derive"] } +hex = "0.4.3" +uuid = { version = "1.11.0", features = ["serde", "v4"] } diff --git a/packages/cipherstash-proxy-integration/src/generate.rs b/packages/cipherstash-proxy-integration/src/generate.rs new file mode 100644 index 00000000..146b539f --- /dev/null +++ b/packages/cipherstash-proxy-integration/src/generate.rs @@ -0,0 +1,265 @@ +#[cfg(test)] +mod tests { + use crate::common::{clear, connect_with_tls, id, trace, PROXY}; + use cipherstash_client::config::EnvSource; + use cipherstash_client::credentials::auto_refresh::AutoRefresh; + use cipherstash_client::ejsonpath::Selector; + use cipherstash_client::encryption::{ + Encrypted, EncryptedEntry, EncryptedSteVecTerm, JsonIndexer, JsonIndexerOptions, OreTerm, + Plaintext, PlaintextTarget, QueryBuilder, ReferencedPendingPipeline, + }; + use cipherstash_client::{ + encryption::{ScopedCipher, SteVec}, + zerokms::{encrypted_record, EncryptedRecord}, + }; + use cipherstash_client::{ConsoleConfig, CtsConfig, ZeroKMSConfig}; + use cipherstash_config::column::{Index, IndexType}; + use cipherstash_config::{ColumnConfig, ColumnMode, ColumnType}; + use cipherstash_proxy::Identifier; + use rustls::unbuffered::EncodeError; + use serde::{Deserialize, Serialize}; + use std::sync::Arc; + use tracing::info; + use uuid::Uuid; + + pub mod option_mp_base85 { + use cipherstash_client::zerokms::encrypted_record::formats::mp_base85; + use cipherstash_client::zerokms::EncryptedRecord; + use serde::{Deserialize, Deserializer, Serialize, Serializer}; + + pub fn serialize( + value: &Option, + serializer: S, + ) -> Result + where + S: Serializer, + { + match value { + Some(record) => mp_base85::serialize(record, serializer), + None => serializer.serialize_none(), + } + } + + pub fn deserialize<'de, D>(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + { + let result = Option::::deserialize(deserializer)?; + Ok(result) + } + } + + #[derive(Debug, Deserialize, Serialize)] + + pub struct EqlEncrypted { + #[serde(rename = "c", with = "option_mp_base85")] + ciphertext: Option, + #[serde(rename = "i")] + identifier: Identifier, + #[serde(rename = "v")] + version: u16, + + #[serde(rename = "o")] + ore_index: Option>, + #[serde(rename = "m")] + match_index: Option>, + #[serde(rename = "u")] + unique_index: Option, + + #[serde(rename = "s")] + selector: Option, + + #[serde(rename = "b")] + blake3_index: Option, + + #[serde(rename = "ocf")] + ore_cclw_fixed_index: Option, + #[serde(rename = "ocv")] + ore_cclw_var_index: Option, + + #[serde(rename = "sv")] + ste_vec_index: Option>, + } + + #[derive(Debug, Deserialize, Serialize)] + pub struct EqlSteVecEncrypted { + #[serde(rename = "c", with = "option_mp_base85")] + ciphertext: Option, + + #[serde(rename = "s")] + selector: Option, + #[serde(rename = "b")] + blake3_index: Option, + #[serde(rename = "ocf")] + ore_cclw_fixed_index: Option, + #[serde(rename = "ocv")] + ore_cclw_var_index: Option, + } + + impl EqlEncrypted { + pub fn ste_vec(ste_vec_index: Vec) -> Self { + Self { + ste_vec_index: Some(ste_vec_index), + ciphertext: None, + identifier: Identifier { + table: "blah".to_string(), + column: "vtha".to_string(), + }, + version: 1, + ore_index: None, + match_index: None, + unique_index: None, + selector: None, + ore_cclw_fixed_index: None, + ore_cclw_var_index: None, + blake3_index: None, + } + } + } + impl EqlSteVecEncrypted { + pub fn ste_vec_element(selector: String, record: EncryptedRecord) -> Self { + Self { + ciphertext: Some(record), + selector: Some(selector), + ore_cclw_fixed_index: None, + ore_cclw_var_index: None, + blake3_index: None, + } + } + } + + #[tokio::test] + async fn generate_ste_vec() { + trace(); + + // clear().await; + // let client = connect_with_tls(PROXY).await; + + let console_config = ConsoleConfig::builder().with_env().build().unwrap(); + let cts_config = CtsConfig::builder().with_env().build().unwrap(); + let zerokms_config = ZeroKMSConfig::builder() + .add_source(EnvSource::default()) + .console_config(&console_config) + .cts_config(&cts_config) + .build_with_client_key() + .unwrap(); + let zerokms_client = zerokms_config + .create_client_with_credentials(AutoRefresh::new(zerokms_config.credentials())); + + let dataset_id = Uuid::parse_str("295504329cb045c398dc464c52a287a1").unwrap(); + + let cipher = Arc::new( + ScopedCipher::init(Arc::new(zerokms_client), Some(dataset_id)) + .await + .unwrap(), + ); + + let prefix = "prefix".to_string(); + + let column_config = ColumnConfig::build("column_name".to_string()) + .casts_as(ColumnType::JsonB) + .add_index(Index::new(IndexType::SteVec { + prefix: prefix.to_owned(), + })); + + // let mut value = + // serde_json::from_str::("{\"hello\": \"one\", \"n\": 10}").unwrap(); + + // let mut value = + // serde_json::from_str::("{\"hello\": \"two\", \"n\": 20}").unwrap(); + + let mut value = + serde_json::from_str::("{\"hello\": \"two\", \"n\": 30}").unwrap(); + + // let mut value = + // serde_json::from_str::("{\"hello\": \"world\", \"n\": 42}").unwrap(); + + // let mut value = + // serde_json::from_str::("{\"hello\": \"world\", \"n\": 42}").unwrap(); + + // let mut value = + // serde_json::from_str::("{\"blah\": { \"vtha\": 42 }}").unwrap(); + + let plaintext = Plaintext::JsonB(Some(value)); + + let idx = 0; + + let mut pipeline = ReferencedPendingPipeline::new(cipher.clone()); + let encryptable = PlaintextTarget::new(plaintext, column_config); + pipeline + .add_with_ref::(encryptable, idx) + .unwrap(); + + let mut encrypteds = vec![]; + + let mut result = pipeline.encrypt(None).await.unwrap(); + if let Some(Encrypted::SteVec(ste_vec)) = result.remove(idx) { + for entry in ste_vec { + let selector = hex::encode(entry.0 .0); + let term = entry.1; + let record = entry.2; + + let mut e = EqlSteVecEncrypted::ste_vec_element(selector, record); + + match term { + EncryptedSteVecTerm::Mac(items) => { + e.blake3_index = Some(hex::encode(&items)); + } + EncryptedSteVecTerm::OreFixed(o) => { + e.ore_cclw_fixed_index = Some(hex::encode(o.bytes)); + } + EncryptedSteVecTerm::OreVariable(o) => { + e.ore_cclw_var_index = Some(hex::encode(o.bytes)); + } + } + + encrypteds.push(e); + } + // info!("{:?}" = encrypteds); + } + + info!("---------------------------------------------"); + + let e = EqlEncrypted::ste_vec(encrypteds); + info!("{:?}" = ?e); + + let json = serde_json::to_value(e).unwrap(); + info!("{}", json); + + let indexer = JsonIndexer::new(JsonIndexerOptions { prefix }); + + info!("---------------------------------------------"); + + // Path + // let path: String = "$.blah.vtha".to_string(); + // let selector = Selector::parse(&path).unwrap(); + // let selector = indexer.generate_selector(selector, cipher.index_key()); + // let selector = hex::encode(selector.0); + // info!("{}", selector); + + // Comparison + let n = 30; + let term = OreTerm::Number(n); + + let term = indexer.generate_term(term, cipher.index_key()).unwrap(); + + match term { + EncryptedSteVecTerm::Mac(items) => todo!(), + EncryptedSteVecTerm::OreFixed(ore_cllw8_v1) => { + let term = hex::encode(ore_cllw8_v1.bytes); + info!("{n}: {term}"); + } + EncryptedSteVecTerm::OreVariable(ore_cllw8_variable_v1) => todo!(), + } + + // if let Some(ste_vec_index) = e.ste_vec_index { + // for e in ste_vec_index { + // info!("{}", e); + // if let Some(ct) = e.ciphertext { + // let decrypted = cipher.decrypt(encrypted).await?; + // info!("{}", decrypted); + // } + // } + // } + } +} diff --git a/packages/cipherstash-proxy-integration/src/lib.rs b/packages/cipherstash-proxy-integration/src/lib.rs index dcff8072..db8e84f8 100644 --- a/packages/cipherstash-proxy-integration/src/lib.rs +++ b/packages/cipherstash-proxy-integration/src/lib.rs @@ -1,6 +1,7 @@ mod common; mod empty_result; mod extended_protocol_error_messages; +mod generate; mod map_concat; mod map_literals; mod map_match_index; diff --git a/packages/cipherstash-proxy/Cargo.toml b/packages/cipherstash-proxy/Cargo.toml index d63a2513..a5c47ea9 100644 --- a/packages/cipherstash-proxy/Cargo.toml +++ b/packages/cipherstash-proxy/Cargo.toml @@ -8,8 +8,12 @@ bigdecimal = { version = "0.4.6", features = ["serde-json"] } arc-swap = "1.7.1" bytes = { version = "1.9", default-features = false } chrono = { version = "0.4.39", features = ["clock"] } -cipherstash-client = { version = "0.18.0-pre.1", features = ["tokio"] } -cipherstash-config = "0.2.3" +# cipherstash-client = { version = "0.18.0-pre.1", features = ["tokio"] } +cipherstash-client = { path = "/Users/tobyhede/src/cipherstash-suite/packages/cipherstash-client", features = [ + "tokio", +] } +# cipherstash-config = "0.2.3" +cipherstash-config = { path = "/Users/tobyhede/src/cipherstash-suite/packages/cipherstash-config" } clap = { version = "4.5.31", features = ["derive", "env"] } config = { version = "0.15", features = [ "async", From a9b6a5c04b73f3b11096a94d46a42021b53454b6 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Wed, 30 Apr 2025 14:08:50 +1000 Subject: [PATCH 02/50] chore: formatting --- packages/cipherstash-proxy-integration/src/generate.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/cipherstash-proxy-integration/src/generate.rs b/packages/cipherstash-proxy-integration/src/generate.rs index 146b539f..49e4bf06 100644 --- a/packages/cipherstash-proxy-integration/src/generate.rs +++ b/packages/cipherstash-proxy-integration/src/generate.rs @@ -50,7 +50,6 @@ mod tests { } #[derive(Debug, Deserialize, Serialize)] - pub struct EqlEncrypted { #[serde(rename = "c", with = "option_mp_base85")] ciphertext: Option, From 0156d2d6d84222c71bd19186219644c03b026e39 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Wed, 30 Apr 2025 15:18:34 +1000 Subject: [PATCH 03/50] feat: encrypted JSON should use the new EQL schema --- Cargo.lock | 212 ++++++++++++++++-- .../cipherstash-proxy-integration/Cargo.toml | 4 +- packages/cipherstash-proxy/Cargo.toml | 4 +- packages/cipherstash-proxy/src/encrypt/mod.rs | 154 ++++++++----- packages/cipherstash-proxy/src/eql/mod.rs | 132 +++++------ packages/cipherstash-proxy/src/lib.rs | 2 +- .../src/postgresql/backend.rs | 4 +- .../src/postgresql/frontend.rs | 8 +- .../src/postgresql/messages/bind.rs | 2 +- .../src/postgresql/messages/data_row.rs | 4 +- .../src/inference/infer_type_impls/expr.rs | 2 +- packages/eql-mapper/src/lib.rs | 82 +++++++ packages/eql-mapper/src/model/type_system.rs | 7 + packages/eql-mapper/src/test_helpers.rs | 36 ++- .../eql-mapper/src/type_checked_statement.rs | 47 +++- 15 files changed, 541 insertions(+), 159 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 70247c27..e6a2dec8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -181,6 +181,7 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" dependencies = [ + "serde", "zeroize", ] @@ -287,6 +288,61 @@ dependencies = [ "fs_extra", ] +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tower", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http", + "http-body", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "backtrace" version = "0.3.74" @@ -317,6 +373,12 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" +[[package]] +name = "base32" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "022dfe9eb35f19ebbcb51e0b40a5ab759f46ad60cadf7297e0bd085afb50e076" + [[package]] name = "base64" version = "0.22.1" @@ -588,9 +650,7 @@ dependencies = [ [[package]] name = "cipherstash-client" -version = "0.18.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f099b1db6cf37b0ca36e9c8e0c2dade20f2035804e225f52475d44e750dd5dd5" +version = "0.20.0" dependencies = [ "aes-gcm-siv", "anyhow", @@ -605,6 +665,7 @@ dependencies = [ "cipherstash-config", "cipherstash-core", "cllw-ore", + "cts-common", "derive_more", "dirs", "futures", @@ -620,7 +681,7 @@ dependencies = [ "percent-encoding", "rand 0.8.5", "rand_chacha 0.3.1", - "recipher", + "recipher 0.1.3", "reqwest", "reqwest-middleware", "reqwest-retry", @@ -648,8 +709,6 @@ dependencies = [ [[package]] name = "cipherstash-config" version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30104045751da6e528e83804f4b22d0cddcb27aacce0e1c79604872ddb076bbf" dependencies = [ "serde", "thiserror 1.0.69", @@ -658,8 +717,6 @@ dependencies = [ [[package]] name = "cipherstash-core" version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd56dfac0a35146968ef6696fb822b22f70a664a8739874385876d5452844b7a" dependencies = [ "hmac", "lazy_static", @@ -694,7 +751,7 @@ dependencies = [ "postgres-protocol", "postgres-types", "rand 0.9.0", - "recipher", + "recipher 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "regex", "ring", "rust_decimal", @@ -724,11 +781,14 @@ name = "cipherstash-proxy-integration" version = "0.1.0" dependencies = [ "chrono", + "cipherstash-client", + "cipherstash-config", "cipherstash-proxy", "clap", "fake 4.2.0", + "hex", "rand 0.9.0", - "recipher", + "recipher 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "rustls", "serde", "serde_json", @@ -739,6 +799,7 @@ dependencies = [ "tokio-rustls", "tracing", "tracing-subscriber", + "uuid", "webpki-roots", ] @@ -796,8 +857,6 @@ checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "cllw-ore" version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1b01c26e11101044f85802e31d842483ef983a890c03472d9489f6969cf865a" dependencies = [ "bit-vec", "bitvec", @@ -953,6 +1012,25 @@ dependencies = [ "cipher 0.4.4", ] +[[package]] +name = "cts-common" +version = "0.2.0" +dependencies = [ + "arrayvec", + "axum", + "base32", + "diesel", + "fake 3.1.0", + "http", + "miette", + "rand 0.8.5", + "regex", + "serde", + "thiserror 1.0.69", + "url", + "vitaminc", +] + [[package]] name = "darling" version = "0.20.10" @@ -973,6 +1051,7 @@ dependencies = [ "ident_case", "proc-macro2", "quote", + "strsim", "syn 2.0.100", ] @@ -1075,6 +1154,39 @@ version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc55fe0d1f6c107595572ec8b107c0999bb1a2e0b75e37429a4fb0d6474a0e7d" +[[package]] +name = "diesel" +version = "2.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff3e1edb1f37b4953dd5176916347289ed43d7119cc2e6c7c3f7849ff44ea506" +dependencies = [ + "chrono", + "diesel_derives", + "uuid", +] + +[[package]] +name = "diesel_derives" +version = "2.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68d4216021b3ea446fd2047f5c8f8fe6e98af34508a254a01e4d6bc1e844f84d" +dependencies = [ + "diesel_table_macro_syntax", + "dsl_auto_type", + "proc-macro2", + "quote", + "syn 2.0.100", +] + +[[package]] +name = "diesel_table_macro_syntax" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "209c735641a413bc68c4923a9d6ad4bcb3ca306b794edaa7eb0b3228a99ffb25" +dependencies = [ + "syn 2.0.100", +] + [[package]] name = "diff" version = "0.1.13" @@ -1123,6 +1235,20 @@ dependencies = [ "syn 2.0.100", ] +[[package]] +name = "dsl_auto_type" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139ae9aca7527f85f26dd76483eb38533fd84bd571065da1739656ef71c5ff5b" +dependencies = [ + "darling", + "either", + "heck", + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "dummy" version = "0.8.0" @@ -1135,6 +1261,18 @@ dependencies = [ "syn 2.0.100", ] +[[package]] +name = "dummy" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abcba80bdf851db5616e27ff869399468e2d339d7c6480f5887681e6bdfc2186" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn 2.0.100", +] + [[package]] name = "dummy" version = "0.11.0" @@ -1223,12 +1361,24 @@ dependencies = [ "uuid", ] +[[package]] +name = "fake" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef603df4ba9adbca6a332db7da6f614f21eafefbaf8e087844e452fdec152d0" +dependencies = [ + "deunicode", + "dummy 0.9.2", + "rand 0.8.5", +] + [[package]] name = "fake" version = "4.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b591050272097cc85b2f3c1cc4817ba4560057d10fcae6f7339f1cf622da0a0f" dependencies = [ + "chrono", "deunicode", "dummy 0.11.0", "rand 0.9.0", @@ -2034,6 +2184,12 @@ dependencies = [ "regex-automata 0.1.10", ] +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + [[package]] name = "md-5" version = "0.10.6" @@ -2788,6 +2944,25 @@ dependencies = [ "bitflags 2.9.0", ] +[[package]] +name = "recipher" +version = "0.1.3" +dependencies = [ + "aes", + "async-trait", + "cmac", + "hex", + "hex-literal", + "opaque-debug", + "rand 0.8.5", + "rand_chacha 0.3.1", + "serde", + "serde_cbor", + "sha2", + "thiserror 1.0.69", + "zeroize", +] + [[package]] name = "recipher" version = "0.1.3" @@ -3328,6 +3503,16 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59fab13f937fa393d08645bf3a84bdfe86e296747b506ada67bb15f10f218b2a" +dependencies = [ + "itoa", + "serde", +] + [[package]] name = "serde_spanned" version = "0.6.8" @@ -3905,6 +4090,7 @@ dependencies = [ "tokio", "tower-layer", "tower-service", + "tracing", ] [[package]] @@ -4848,8 +5034,6 @@ dependencies = [ [[package]] name = "zerokms-protocol" version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a9d0d8103cfa862b451f2c35144301df25a233f7fae041666b890a1578c3b1" dependencies = [ "async-trait", "base64", diff --git a/packages/cipherstash-proxy-integration/Cargo.toml b/packages/cipherstash-proxy-integration/Cargo.toml index 974f72ef..aa79a2e9 100644 --- a/packages/cipherstash-proxy-integration/Cargo.toml +++ b/packages/cipherstash-proxy-integration/Cargo.toml @@ -25,11 +25,11 @@ webpki-roots = "0.26.7" [dev-dependencies] # cipherstash-client = { version = "0.18.0-pre.1", features = ["tokio"] } -cipherstash-client = { path = "/Users/tobyhede/src/cipherstash-suite/packages/cipherstash-client", features = [ +cipherstash-client = { path = "../../../cipherstash-suite/packages/cipherstash-client", features = [ "tokio", ] } # cipherstash-config = "0.2.3" -cipherstash-config = { path = "/Users/tobyhede/src/cipherstash-suite/packages/cipherstash-config" } +cipherstash-config = { path = "../../../cipherstash-suite/packages/cipherstash-config" } clap = "4.5.32" fake = { version = "4", features = ["chrono", "derive"] } hex = "0.4.3" diff --git a/packages/cipherstash-proxy/Cargo.toml b/packages/cipherstash-proxy/Cargo.toml index a5c47ea9..09bc0399 100644 --- a/packages/cipherstash-proxy/Cargo.toml +++ b/packages/cipherstash-proxy/Cargo.toml @@ -9,11 +9,11 @@ arc-swap = "1.7.1" bytes = { version = "1.9", default-features = false } chrono = { version = "0.4.39", features = ["clock"] } # cipherstash-client = { version = "0.18.0-pre.1", features = ["tokio"] } -cipherstash-client = { path = "/Users/tobyhede/src/cipherstash-suite/packages/cipherstash-client", features = [ +cipherstash-client = { path = "../../../cipherstash-suite/packages/cipherstash-client", features = [ "tokio", ] } # cipherstash-config = "0.2.3" -cipherstash-config = { path = "/Users/tobyhede/src/cipherstash-suite/packages/cipherstash-config" } +cipherstash-config = { path = "../../../cipherstash-suite/packages/cipherstash-config" } clap = { version = "4.5.31", features = ["derive", "env"] } config = { version = "0.15", features = [ "async", diff --git a/packages/cipherstash-proxy/src/encrypt/mod.rs b/packages/cipherstash-proxy/src/encrypt/mod.rs index faac044e..30bf52c9 100644 --- a/packages/cipherstash-proxy/src/encrypt/mod.rs +++ b/packages/cipherstash-proxy/src/encrypt/mod.rs @@ -3,7 +3,8 @@ mod schema; use crate::{ config::TandemConfig, - connect, eql, + connect, + eql::{self, EqlEncryptedBody, EqlEncryptedIndexes}, error::{EncryptError, Error}, log::ENCRYPT, postgresql::Column, @@ -13,10 +14,9 @@ use cipherstash_client::{ config::EnvSource, credentials::{auto_refresh::AutoRefresh, ServiceCredentials}, encryption::{ - self, Encrypted, EncryptionError, IndexTerm, Plaintext, PlaintextTarget, - ReferencedPendingPipeline, + self, Encrypted, EncryptedEntry, EncryptedSteVecTerm, IndexTerm, Plaintext, + PlaintextTarget, ReferencedPendingPipeline, }, - zerokms::EncryptedRecord, ConsoleConfig, CtsConfig, ZeroKMSConfig, }; use cipherstash_config::ColumnConfig; @@ -88,7 +88,7 @@ impl Encrypt { &self, plaintexts: Vec>, columns: &[Option], - ) -> Result>, Error> { + ) -> Result>, Error> { let mut pipeline = ReferencedPendingPipeline::new(self.cipher.clone()); for (idx, item) in plaintexts.into_iter().zip(columns.iter()).enumerate() { @@ -141,22 +141,17 @@ impl Encrypt { /// pub async fn decrypt( &self, - ciphertexts: Vec>, + ciphertexts: Vec>, ) -> Result>, Error> { // Create a mutable vector to hold the decrypted results let mut results = vec![None; ciphertexts.len()]; // Collect the index and ciphertext details for every Some(ciphertext) - let (indices, encrypted) = ciphertexts + let (indices, encrypted): (Vec<_>, Vec<_>) = ciphertexts .into_iter() .enumerate() - .filter_map(|(idx, opt)| { - opt.map(|ct| { - eql_encrypted_to_encrypted_record(ct) - .map(|encrypted_record| (idx, encrypted_record)) - }) - }) - .collect::, Vec<_>), _>>()?; + .filter_map(|(idx, eql)| Some((idx, eql?.body.ciphertext))) + .collect::<_>(); // Decrypt the ciphertexts let decrypted = self.cipher.decrypt(encrypted).await?; @@ -236,56 +231,120 @@ async fn init_cipher(config: &TandemConfig) -> Result { fn to_eql_encrypted( encrypted: Encrypted, identifier: &Identifier, -) -> Result { +) -> Result { debug!(target: ENCRYPT, msg = "Encrypted to EQL", ?identifier); match encrypted { Encrypted::Record(ciphertext, terms) => { - struct Indexes { - match_index: Option>, - ore_index: Option>, - unique_index: Option, - } - - let mut indexes = Indexes { - match_index: None, - ore_index: None, - unique_index: None, - }; + let mut match_index: Option> = None; + let mut ore_index: Option> = None; + let mut unique_index: Option = None; + let mut blake3_index: Option = None; + let mut ore_cclw_fixed_index: Option = None; + let mut ore_cclw_var_index: Option = None; + let mut selector: Option = None; for index_term in terms { match index_term { IndexTerm::Binary(bytes) => { - indexes.unique_index = Some(format_index_term_binary(&bytes)) + unique_index = Some(format_index_term_binary(&bytes)) } - IndexTerm::BitMap(inner) => indexes.match_index = Some(inner), - IndexTerm::OreArray(vec_of_bytes) => { - indexes.ore_index = Some(format_index_term_ore_array(&vec_of_bytes)); + IndexTerm::BitMap(inner) => match_index = Some(inner), + IndexTerm::OreArray(bytes) => { + ore_index = Some(format_index_term_ore_array(&bytes)); } IndexTerm::OreFull(bytes) => { - indexes.ore_index = Some(format_index_term_ore(&bytes)); + ore_index = Some(format_index_term_ore(&bytes)); } IndexTerm::OreLeft(bytes) => { - indexes.ore_index = Some(format_index_term_ore(&bytes)); + ore_index = Some(format_index_term_ore(&bytes)); } + IndexTerm::BinaryVec(_) => todo!(), + IndexTerm::SteVecSelector(s) => { + selector = Some(hex::encode(s.0)); + } + IndexTerm::SteVecTerm(ste_vec_term) => match ste_vec_term { + EncryptedSteVecTerm::Mac(bytes) => blake3_index = Some(hex::encode(bytes)), + EncryptedSteVecTerm::OreFixed(ore) => { + ore_cclw_fixed_index = Some(hex::encode(ore.bytes)) + } + EncryptedSteVecTerm::OreVariable(ore) => { + ore_cclw_var_index = Some(hex::encode(ore.bytes)) + } + }, + IndexTerm::SteQueryVec(query) => {} // TODO: what do we do here? IndexTerm::Null => {} - _ => return Err(EncryptError::UnknownIndexTerm(identifier.to_owned()).into()), }; } - Ok(eql::Encrypted::Ciphertext { - ciphertext, + Ok(eql::EqlEncrypted { identifier: identifier.to_owned(), - match_index: indexes.match_index, - ore_index: indexes.ore_index, - unique_index: indexes.unique_index, version: 1, + body: EqlEncryptedBody { + ciphertext, + indexes: EqlEncryptedIndexes { + match_index, + ore_index, + unique_index, + blake3_index, + ore_cclw_fixed_index, + ore_cclw_var_index, + selector, + ste_vec_index: None, + }, + }, + }) + } + Encrypted::SteVec(ste_vec) => { + let ciphertext = ste_vec.root_ciphertext()?.clone(); + + let ste_vec_index: Vec = ste_vec + .into_iter() + .map(|EncryptedEntry(selector, term, ciphertext)| { + let indexes = match term { + EncryptedSteVecTerm::Mac(bytes) => EqlEncryptedIndexes { + selector: Some(hex::encode(selector.0)), + blake3_index: Some(hex::encode(bytes)), + ..Default::default() + }, + EncryptedSteVecTerm::OreFixed(ore) => EqlEncryptedIndexes { + selector: Some(hex::encode(selector.0)), + ore_cclw_fixed_index: Some(hex::encode(ore.bytes)), + ..Default::default() + }, + EncryptedSteVecTerm::OreVariable(ore) => EqlEncryptedIndexes { + selector: Some(hex::encode(selector.0)), + ore_cclw_var_index: Some(hex::encode(ore.bytes)), + ..Default::default() + }, + }; + + eql::EqlEncryptedBody { + ciphertext, + indexes, + } + }) + .collect(); + + // FIXME: I'm unsure if I've handled the root ciphertext correctly + // The way it's implemented right now is that it will be repeated one in the ste_vec_index. + Ok(eql::EqlEncrypted { + identifier: identifier.to_owned(), + version: 1, + body: EqlEncryptedBody { + ciphertext: ciphertext.clone(), + indexes: EqlEncryptedIndexes { + match_index: None, + ore_index: None, + unique_index: None, + blake3_index: None, + ore_cclw_fixed_index: None, + ore_cclw_var_index: None, + selector: None, + ste_vec_index: Some(ste_vec_index), + }, + }, }) } - Encrypted::SteVec(ste_vec_index) => Ok(eql::Encrypted::SteVec { - identifier: identifier.to_owned(), - ste_vec_index, - version: 1, - }), } } @@ -314,15 +373,6 @@ fn format_index_term_ore(bytes: &Vec) -> Vec { vec![format_index_term_ore_bytea(bytes)] } -fn eql_encrypted_to_encrypted_record( - eql_encrypted: eql::Encrypted, -) -> Result { - match eql_encrypted { - eql::Encrypted::Ciphertext { ciphertext, .. } => Ok(ciphertext), - eql::Encrypted::SteVec { ste_vec_index, .. } => ste_vec_index.into_root_ciphertext(), - } -} - fn plaintext_type_name(pt: Plaintext) -> String { match pt { Plaintext::BigInt(_) => "BigInt".to_string(), diff --git a/packages/cipherstash-proxy/src/eql/mod.rs b/packages/cipherstash-proxy/src/eql/mod.rs index 69266310..03ec0141 100644 --- a/packages/cipherstash-proxy/src/eql/mod.rs +++ b/packages/cipherstash-proxy/src/eql/mod.rs @@ -1,7 +1,4 @@ -use cipherstash_client::{ - encryption::SteVec, - zerokms::{encrypted_record, EncryptedRecord}, -}; +use cipherstash_client::zerokms::{encrypted_record, EncryptedRecord}; use serde::{Deserialize, Serialize}; use sqltk::parser::ast::Ident; @@ -16,6 +13,7 @@ pub struct Plaintext { #[serde(rename = "q")] pub for_query: Option, } + #[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)] pub struct Identifier { #[serde(rename = "t")] @@ -65,54 +63,57 @@ pub enum ForQuery { } #[derive(Debug, Deserialize, Serialize)] -#[serde(tag = "k")] -pub enum Encrypted { - #[serde(rename = "ct")] - Ciphertext { - #[serde(rename = "c", with = "encrypted_record::formats::mp_base85")] - ciphertext: EncryptedRecord, - #[serde(rename = "o")] - ore_index: Option>, - #[serde(rename = "m")] - match_index: Option>, - #[serde(rename = "u")] - unique_index: Option, - #[serde(rename = "i")] - identifier: Identifier, - #[serde(rename = "v")] - version: u16, - }, - #[serde(rename = "sv")] - SteVec { - #[serde(rename = "sv")] - ste_vec_index: SteVec<16>, - #[serde(rename = "i")] - identifier: Identifier, - #[serde(rename = "v")] - version: u16, - }, +pub struct EqlEncrypted { + #[serde(rename = "i")] + pub(crate) identifier: Identifier, + #[serde(rename = "v")] + pub(crate) version: u16, + + #[serde(flatten)] + pub(crate) body: EqlEncryptedBody, } -// fn ident_de<'de, D>(deserializer: D) -> Result -// where -// D: serde::Deserializer<'de>, -// { -// let s = String::deserialize(deserializer)?; -// Ok(Ident::with_quote('"', s)) -// } - -// fn ident_se(ident: &Ident, serializer: S) -> Result -// where -// S: Serializer, -// { -// let s = ident.to_string(); -// serializer.serialize_str(&s) -// } +#[derive(Debug, Deserialize, Serialize)] +pub struct EqlEncryptedBody { + #[serde(rename = "c", with = "encrypted_record::formats::mp_base85")] + pub(crate) ciphertext: EncryptedRecord, + + #[serde(flatten)] + pub(crate) indexes: EqlEncryptedIndexes, +} + +#[derive(Debug, Deserialize, Serialize, Default)] +pub struct EqlEncryptedIndexes { + #[serde(rename = "o")] + pub(crate) ore_index: Option>, + #[serde(rename = "m")] + pub(crate) match_index: Option>, + #[serde(rename = "u")] + pub(crate) unique_index: Option, + + #[serde(rename = "s")] + pub(crate) selector: Option, + + #[serde(rename = "b")] + pub(crate) blake3_index: Option, + + #[serde(rename = "ocf")] + pub(crate) ore_cclw_fixed_index: Option, + #[serde(rename = "ocv")] + pub(crate) ore_cclw_var_index: Option, + + #[serde(rename = "sv")] + pub(crate) ste_vec_index: Option>, +} #[cfg(test)] mod tests { + use crate::{ + eql::{EqlEncryptedBody, EqlEncryptedIndexes}, + EqlEncrypted, + }; + use super::{Identifier, Plaintext}; - use crate::Encrypted; use cipherstash_client::zerokms::EncryptedRecord; use recipher::key::Iv; use uuid::Uuid; @@ -141,20 +142,28 @@ mod tests { pub fn ciphertext_json() { let expected = Identifier::new("table", "column"); - let ct = Encrypted::Ciphertext { + let ct = EqlEncrypted { identifier: expected.clone(), version: 1, - ciphertext: EncryptedRecord { - iv: Iv::default(), - ciphertext: vec![1; 32], - tag: vec![1; 16], - descriptor: "ciphertext".to_string(), - dataset_id: Some(Uuid::new_v4()), + body: EqlEncryptedBody { + ciphertext: EncryptedRecord { + iv: Iv::default(), + ciphertext: vec![1; 32], + tag: vec![1; 16], + descriptor: "ciphertext".to_string(), + dataset_id: Some(Uuid::new_v4()), + }, + indexes: EqlEncryptedIndexes { + ore_index: None, + match_index: None, + unique_index: None, + blake3_index: None, + selector: None, + ore_cclw_fixed_index: None, + ore_cclw_var_index: None, + ste_vec_index: None, + }, }, - - ore_index: None, - match_index: None, - unique_index: None, }; let value = serde_json::to_value(&ct).unwrap(); @@ -163,12 +172,7 @@ mod tests { let t = &i["t"]; assert_eq!(t, "table"); - let result: Encrypted = serde_json::from_value(value).unwrap(); - - if let Encrypted::Ciphertext { identifier, .. } = result { - assert_eq!(expected, identifier); - } else { - panic!("Expected Encrypted::Ciphertext"); - } + let result: EqlEncrypted = serde_json::from_value(value).unwrap(); + assert_eq!(expected, result.identifier); } } diff --git a/packages/cipherstash-proxy/src/lib.rs b/packages/cipherstash-proxy/src/lib.rs index 202a07e8..2e81ca9e 100644 --- a/packages/cipherstash-proxy/src/lib.rs +++ b/packages/cipherstash-proxy/src/lib.rs @@ -15,7 +15,7 @@ pub use crate::cli::Args; pub use crate::cli::Migrate; pub use crate::config::{DatabaseConfig, ServerConfig, TandemConfig, TlsConfig}; pub use crate::encrypt::Encrypt; -pub use crate::eql::{Encrypted, ForQuery, Identifier, Plaintext}; +pub use crate::eql::{EqlEncrypted, ForQuery, Identifier, Plaintext}; pub use crate::log::init; use std::mem; diff --git a/packages/cipherstash-proxy/src/postgresql/backend.rs b/packages/cipherstash-proxy/src/postgresql/backend.rs index 76b2e74b..950f5a3d 100644 --- a/packages/cipherstash-proxy/src/postgresql/backend.rs +++ b/packages/cipherstash-proxy/src/postgresql/backend.rs @@ -6,7 +6,7 @@ use super::messages::row_description::RowDescription; use super::messages::BackendCode; use crate::connect::Sender; use crate::encrypt::Encrypt; -use crate::eql::Encrypted; +use crate::eql::EqlEncrypted; use crate::error::Error; use crate::log::{DEVELOPMENT, MAPPER, PROTOCOL}; use crate::postgresql::context::Portal; @@ -262,7 +262,7 @@ where let result_column_format_codes = portal.format_codes(result_column_count); // Each row is converted into Vec> - let ciphertexts: Vec> = rows + let ciphertexts: Vec> = rows .iter() .map(|row| row.to_ciphertext()) .flatten_ok() diff --git a/packages/cipherstash-proxy/src/postgresql/frontend.rs b/packages/cipherstash-proxy/src/postgresql/frontend.rs index 681b7f50..a3e87a3d 100644 --- a/packages/cipherstash-proxy/src/postgresql/frontend.rs +++ b/packages/cipherstash-proxy/src/postgresql/frontend.rs @@ -23,7 +23,7 @@ use crate::prometheus::{ STATEMENTS_ENCRYPTED_TOTAL, STATEMENTS_PASSTHROUGH_TOTAL, STATEMENTS_TOTAL, STATEMENTS_UNMAPPABLE_TOTAL, }; -use crate::Encrypted; +use crate::EqlEncrypted; use bytes::BytesMut; use cipherstash_client::encryption::Plaintext; use eql_mapper::{self, EqlMapperError, EqlValue, TableColumn, TypeCheckedStatement}; @@ -359,7 +359,7 @@ where &mut self, typed_statement: &TypeCheckedStatement<'_>, literal_columns: &Vec>, - ) -> Result>, Error> { + ) -> Result>, Error> { let literal_values = typed_statement.literal_values(); if literal_values.is_empty() { debug!(target: MAPPER, @@ -404,7 +404,7 @@ where async fn transform_statement( &mut self, typed_statement: &TypeCheckedStatement<'_>, - encrypted_literals: &Vec>, + encrypted_literals: &Vec>, ) -> Result, Error> { // Convert literals to ast Expr let mut encrypted_expressions = vec![]; @@ -704,7 +704,7 @@ where &mut self, bind: &Bind, statement: &Statement, - ) -> Result>, Error> { + ) -> Result>, Error> { let plaintexts = bind.to_plaintext(&statement.param_columns, &statement.postgres_param_types)?; diff --git a/packages/cipherstash-proxy/src/postgresql/messages/bind.rs b/packages/cipherstash-proxy/src/postgresql/messages/bind.rs index e259b3b6..a76a02f6 100644 --- a/packages/cipherstash-proxy/src/postgresql/messages/bind.rs +++ b/packages/cipherstash-proxy/src/postgresql/messages/bind.rs @@ -76,7 +76,7 @@ impl Bind { Ok(plaintexts) } - pub fn rewrite(&mut self, encrypted: Vec>) -> Result<(), Error> { + pub fn rewrite(&mut self, encrypted: Vec>) -> Result<(), Error> { for (idx, ct) in encrypted.iter().enumerate() { if let Some(ct) = ct { let json = serde_json::to_value(ct)?; diff --git a/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs b/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs index e12274fe..f9e4112e 100644 --- a/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs +++ b/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs @@ -19,7 +19,7 @@ pub struct DataColumn { } impl DataRow { - pub fn to_ciphertext(&self) -> Result>, Error> { + pub fn to_ciphertext(&self) -> Result>, Error> { Ok(self.columns.iter().map(|col| col.into()).collect()) } @@ -159,7 +159,7 @@ impl TryFrom for BytesMut { } } -impl From<&DataColumn> for Option { +impl From<&DataColumn> for Option { fn from(col: &DataColumn) -> Self { debug!(target: MAPPER, data_column = ?col); match col.json_bytes() { diff --git a/packages/eql-mapper/src/inference/infer_type_impls/expr.rs b/packages/eql-mapper/src/inference/infer_type_impls/expr.rs index 667d4dc7..9f888483 100644 --- a/packages/eql-mapper/src/inference/infer_type_impls/expr.rs +++ b/packages/eql-mapper/src/inference/infer_type_impls/expr.rs @@ -158,7 +158,7 @@ impl<'ast> InferType<'ast, Expr> for TypeInferencer<'ast> { | BinaryOperator::HashArrow | BinaryOperator::HashLongArrow | BinaryOperator::AtAt - | BinaryOperator::HashMinus + | BinaryOperator::HashMinus // TODO do not support for EQL | BinaryOperator::AtQuestion | BinaryOperator::Question | BinaryOperator::QuestionAnd diff --git a/packages/eql-mapper/src/lib.rs b/packages/eql-mapper/src/lib.rs index 061ea74d..4f38742b 100644 --- a/packages/eql-mapper/src/lib.rs +++ b/packages/eql-mapper/src/lib.rs @@ -33,6 +33,7 @@ mod test { use super::type_check; use crate::col; use crate::projection; + use crate::test_helpers; use crate::Param; use crate::Schema; use crate::TableResolver; @@ -1363,4 +1364,85 @@ mod test { type_check(schema, &statement).expect("named arrays should be supported"); } + + #[test] + fn jsonb_operator_arrow() { + test_jsonb_operator("->"); + } + + #[test] + fn jsonb_operator_long_arrow() { + test_jsonb_operator("->>"); + } + + #[test] + fn jsonb_operator_hash_arrow() { + test_jsonb_operator("#>"); + } + + #[test] + fn jsonb_operator_hash_long_arrow() { + test_jsonb_operator("#>>"); + } + + #[test] + fn jsonb_operator_hash_at_at() { + test_jsonb_operator("@@"); + } + + #[test] + fn jsonb_operator_at_question() { + test_jsonb_operator("@?"); + } + + #[test] + fn jsonb_operator_question() { + test_jsonb_operator("?"); + } + + #[test] + fn jsonb_operator_question_and() { + test_jsonb_operator("?&"); + } + + #[test] + fn jsonb_operator_question_pipe() { + test_jsonb_operator("?|"); + } + + #[test] + fn jsonb_operator_at_arrow() { + test_jsonb_operator("@>"); + } + + #[test] + fn jsonb_operator_arrow_at() { + test_jsonb_operator("<@"); + } + + fn test_jsonb_operator(op: &'static str) { + let schema = resolver(schema! { + tables: { + patients: { + id (PK), + notes (EQL), + } + } + }); + + let statement = parse(&format!("SELECT id, notes {} 'medications' AS meds FROM patients", op)); + + match type_check(schema, &statement) { + Ok(typed) => { + match typed.transform(test_helpers::dummy_encrypted_json_selector(&typed, "medications")) { + Ok(statement) => assert_eq!( + statement.to_string(), + format!("SELECT id, notes {} '' AS meds FROM patients", op) + ), + Err(err) => panic!("transformation failed: {err}"), + } + } + Err(err) => panic!("type check failed: {err}"), + } + } } diff --git a/packages/eql-mapper/src/model/type_system.rs b/packages/eql-mapper/src/model/type_system.rs index 7ea05ce9..a4ef7fe4 100644 --- a/packages/eql-mapper/src/model/type_system.rs +++ b/packages/eql-mapper/src/model/type_system.rs @@ -61,6 +61,13 @@ impl Projection { Projection::WithColumns(columns) } } + + pub fn type_at_col_index(&self, index: usize) -> Option<&Value> { + match self { + Projection::WithColumns(cols) => cols.get(index).map(|col| &col.ty), + Projection::Empty => None, + } + } } /// A column from a projection which has a type and an optional alias. diff --git a/packages/eql-mapper/src/test_helpers.rs b/packages/eql-mapper/src/test_helpers.rs index cf36ba46..4457bdbe 100644 --- a/packages/eql-mapper/src/test_helpers.rs +++ b/packages/eql-mapper/src/test_helpers.rs @@ -1,16 +1,19 @@ -use std::fmt::Debug; +use std::{collections::HashMap, fmt::Debug}; -use sqltk::parser::{ - ast::{self as ast, Statement}, - dialect::PostgreSqlDialect, - parser::Parser, +use sqltk::{ + parser::{ + ast::{self as ast, Statement}, + dialect::PostgreSqlDialect, + parser::Parser, + }, + NodeKey, }; use tracing_subscriber::fmt::format; use tracing_subscriber::fmt::format::FmtSpan; use std::sync::Once; -use crate::{Projection, ProjectionColumn}; +use crate::{Projection, ProjectionColumn, TypeCheckedStatement}; #[allow(unused)] pub(crate) fn init_tracing() { @@ -27,7 +30,7 @@ pub(crate) fn init_tracing() { }); } -pub(crate) fn parse(statement: &'static str) -> Statement { +pub(crate) fn parse(statement: &str) -> Statement { Parser::parse_sql(&PostgreSqlDialect {}, statement).unwrap()[0].clone() } @@ -35,6 +38,25 @@ pub(crate) fn id(ident: &str) -> ast::Ident { ast::Ident::from(ident) } +pub(crate) fn get_node_key_of_json_selector<'ast>( + typed: &TypeCheckedStatement<'ast>, + selector: &'static str, +) -> NodeKey<'ast> { + typed + .find_nodekey_for_value_node(ast::Value::SingleQuotedString(selector.into())) + .expect("could not find selector Value node") +} + +pub(crate) fn dummy_encrypted_json_selector<'ast>( + typed: &TypeCheckedStatement<'ast>, + selector: &'static str, +) -> HashMap, ast::Value> { + HashMap::from_iter(vec![( + get_node_key_of_json_selector(typed, selector), + ast::Value::SingleQuotedString(format!("", selector)), + )]) +} + #[macro_export] macro_rules! col { ((NATIVE)) => { diff --git a/packages/eql-mapper/src/type_checked_statement.rs b/packages/eql-mapper/src/type_checked_statement.rs index bb66aef8..81758446 100644 --- a/packages/eql-mapper/src/type_checked_statement.rs +++ b/packages/eql-mapper/src/type_checked_statement.rs @@ -1,7 +1,10 @@ +use std::any::TypeId; +use std::convert::Infallible; +use std::ops::ControlFlow; use std::{collections::HashMap, sync::Arc}; -use sqltk::parser::ast::{self, Statement}; -use sqltk::{AsNodeKey, NodeKey, Transformable}; +use sqltk::parser::ast::{self, Query, SetExpr, Statement}; +use sqltk::{AsNodeKey, Break, NodeKey, Transformable, Visitable, Visitor}; use crate::{ DryRunnable, EqlMapperError, EqlValue, FailOnPlaceholderChange, GroupByEqlCol, Param, @@ -81,6 +84,34 @@ impl<'ast> TypeCheckedStatement<'ast> { self.statement.apply_transform(&mut transformer) } + /// Utility for finding the [`NodeKey`] of a [`Value`] node in `statement` by providing a `matching` equal node to search for. + #[cfg(test)] + pub(crate) fn find_nodekey_for_value_node(&self, matching: ast::Value) -> Option> { + struct FindNode<'ast> { + needle: ast::Value, + found: Option>, + } + + impl<'a> Visitor<'a> for FindNode<'a> { + type Error = Infallible; + + fn enter(&mut self, node: &'a N) -> ControlFlow> { + if let Some(haystack) = node.downcast_ref::() { + if haystack == &self.needle { + self.found = Some(haystack.as_node_key()); + return ControlFlow::Break(Break::Finished) + } + } + ControlFlow::Continue(()) + } + } + + let mut visitor = FindNode{ needle: matching, found: None }; + self.statement.accept(&mut visitor); + + visitor.found + } + pub fn literal_values(&self) -> Vec<&sqltk::parser::ast::Value> { self.literals .iter() @@ -113,11 +144,7 @@ impl<'ast> TypeCheckedStatement<'ast> { } for (key, _) in encrypted_literals.iter() { - if !self - .literals - .iter() - .any(|(_, node)| &node.as_node_key() == key) - { + if !self.literal_exists_for_node_key(*key) { return Err(EqlMapperError::Transform(String::from( "encrypted literals refers to a literal node which is not present in the SQL statement" ))); @@ -126,6 +153,12 @@ impl<'ast> TypeCheckedStatement<'ast> { Ok(()) } + fn literal_exists_for_node_key(&self, key: NodeKey<'ast>) -> bool { + self.literals + .iter() + .any(|(_, node)| node.as_node_key() == key) + } + fn count_not_null_literals(&self) -> usize { self.literals .iter() From b243a2bd76d4e6c6d2927c8b0a7bc11ab59408ba Mon Sep 17 00:00:00 2001 From: James Sadler Date: Thu, 1 May 2025 23:27:16 +1000 Subject: [PATCH 04/50] chore(mapper): get test infra in place for JSONB functions --- Cargo.lock | 7 + .../src/generate.rs | 23 +- packages/cipherstash-proxy/src/encrypt/mod.rs | 2 +- packages/eql-mapper/Cargo.toml | 1 + .../inference/infer_type_impls/function.rs | 125 ++-------- .../infer_type_impls/function_arg_expr.rs | 24 ++ .../src/inference/infer_type_impls/mod.rs | 1 + packages/eql-mapper/src/inference/mod.rs | 7 +- .../eql-mapper/src/inference/sql_fn_macros.rs | 30 +++ .../eql-mapper/src/inference/sql_functions.rs | 223 ++++++++++++++++++ packages/eql-mapper/src/lib.rs | 86 ++++++- packages/eql-mapper/src/test_helpers.rs | 64 +++-- .../eql-mapper/src/type_checked_statement.rs | 35 +-- 13 files changed, 451 insertions(+), 177 deletions(-) create mode 100644 packages/eql-mapper/src/inference/infer_type_impls/function_arg_expr.rs create mode 100644 packages/eql-mapper/src/inference/sql_fn_macros.rs create mode 100644 packages/eql-mapper/src/inference/sql_functions.rs diff --git a/Cargo.lock b/Cargo.lock index e6a2dec8..75282176 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1310,6 +1310,7 @@ dependencies = [ "thiserror 2.0.12", "tracing", "tracing-subscriber", + "vec1", ] [[package]] @@ -4320,6 +4321,12 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" +[[package]] +name = "vec1" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eab68b56840f69efb0fefbe3ab6661499217ffdc58e2eef7c3f6f69835386322" + [[package]] name = "version_check" version = "0.9.5" diff --git a/packages/cipherstash-proxy-integration/src/generate.rs b/packages/cipherstash-proxy-integration/src/generate.rs index 49e4bf06..57a4cec3 100644 --- a/packages/cipherstash-proxy-integration/src/generate.rs +++ b/packages/cipherstash-proxy-integration/src/generate.rs @@ -1,22 +1,17 @@ #[cfg(test)] mod tests { - use crate::common::{clear, connect_with_tls, id, trace, PROXY}; + use crate::common::trace; use cipherstash_client::config::EnvSource; use cipherstash_client::credentials::auto_refresh::AutoRefresh; - use cipherstash_client::ejsonpath::Selector; use cipherstash_client::encryption::{ - Encrypted, EncryptedEntry, EncryptedSteVecTerm, JsonIndexer, JsonIndexerOptions, OreTerm, - Plaintext, PlaintextTarget, QueryBuilder, ReferencedPendingPipeline, - }; - use cipherstash_client::{ - encryption::{ScopedCipher, SteVec}, - zerokms::{encrypted_record, EncryptedRecord}, + Encrypted, EncryptedSteVecTerm, JsonIndexer, JsonIndexerOptions, OreTerm, Plaintext, + PlaintextTarget, ReferencedPendingPipeline, }; + use cipherstash_client::{encryption::ScopedCipher, zerokms::EncryptedRecord}; use cipherstash_client::{ConsoleConfig, CtsConfig, ZeroKMSConfig}; use cipherstash_config::column::{Index, IndexType}; - use cipherstash_config::{ColumnConfig, ColumnMode, ColumnType}; + use cipherstash_config::{ColumnConfig, ColumnType}; use cipherstash_proxy::Identifier; - use rustls::unbuffered::EncodeError; use serde::{Deserialize, Serialize}; use std::sync::Arc; use tracing::info; @@ -25,7 +20,7 @@ mod tests { pub mod option_mp_base85 { use cipherstash_client::zerokms::encrypted_record::formats::mp_base85; use cipherstash_client::zerokms::EncryptedRecord; - use serde::{Deserialize, Deserializer, Serialize, Serializer}; + use serde::{Deserialize, Deserializer, Serializer}; pub fn serialize( value: &Option, @@ -167,7 +162,7 @@ mod tests { // let mut value = // serde_json::from_str::("{\"hello\": \"two\", \"n\": 20}").unwrap(); - let mut value = + let value = serde_json::from_str::("{\"hello\": \"two\", \"n\": 30}").unwrap(); // let mut value = @@ -243,12 +238,12 @@ mod tests { let term = indexer.generate_term(term, cipher.index_key()).unwrap(); match term { - EncryptedSteVecTerm::Mac(items) => todo!(), + EncryptedSteVecTerm::Mac(_) => todo!(), EncryptedSteVecTerm::OreFixed(ore_cllw8_v1) => { let term = hex::encode(ore_cllw8_v1.bytes); info!("{n}: {term}"); } - EncryptedSteVecTerm::OreVariable(ore_cllw8_variable_v1) => todo!(), + EncryptedSteVecTerm::OreVariable(_) => todo!(), } // if let Some(ste_vec_index) = e.ste_vec_index { diff --git a/packages/cipherstash-proxy/src/encrypt/mod.rs b/packages/cipherstash-proxy/src/encrypt/mod.rs index 30bf52c9..24bf76d4 100644 --- a/packages/cipherstash-proxy/src/encrypt/mod.rs +++ b/packages/cipherstash-proxy/src/encrypt/mod.rs @@ -271,7 +271,7 @@ fn to_eql_encrypted( ore_cclw_var_index = Some(hex::encode(ore.bytes)) } }, - IndexTerm::SteQueryVec(query) => {} // TODO: what do we do here? + IndexTerm::SteQueryVec(_query) => {} // TODO: what do we do here? IndexTerm::Null => {} }; } diff --git a/packages/eql-mapper/Cargo.toml b/packages/eql-mapper/Cargo.toml index febb293e..3c63989e 100644 --- a/packages/eql-mapper/Cargo.toml +++ b/packages/eql-mapper/Cargo.toml @@ -19,6 +19,7 @@ sqltk = { workspace = true } thiserror = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true } +vec1 = "1.12.1" [dev-dependencies] pretty_assertions = "^1.0" diff --git a/packages/eql-mapper/src/inference/infer_type_impls/function.rs b/packages/eql-mapper/src/inference/infer_type_impls/function.rs index 88347df6..585d6b7e 100644 --- a/packages/eql-mapper/src/inference/infer_type_impls/function.rs +++ b/packages/eql-mapper/src/inference/infer_type_impls/function.rs @@ -1,12 +1,16 @@ use eql_mapper_macros::trace_infer; -use sqltk::parser::ast::{Function, FunctionArg, FunctionArgExpr, FunctionArguments, Ident}; +use sqltk::parser::ast::{Function, FunctionArguments}; use crate::{ - inference::{type_error::TypeError, InferType}, - unifier::Type, - SqlIdent, TypeInferencer, + get_type_signature_for_special_cased_sql_function, inference::infer_type::InferType, + CompoundIdent, FunctionSig, TypeError, TypeInferencer, }; +/// Looks up the function signature. +/// +/// If a signature is found it means that function is handled as an EQL special case and is type checked accordingly. +/// +/// If a signature is not found then all function args and its return type are unified as native. #[trace_infer] impl<'ast> InferType<'ast, Function> for TypeInferencer<'ast> { fn infer_exit(&mut self, function: &'ast Function) -> Result<(), TypeError> { @@ -17,115 +21,14 @@ impl<'ast> InferType<'ast, Function> for TypeInferencer<'ast> { } let Function { name, args, .. } = function; + let fn_name = CompoundIdent::from(&name.0); - let fn_name: Vec<_> = name.0.iter().map(SqlIdent).collect(); - - if fn_name == [SqlIdent(&Ident::new("min"))] || fn_name == [SqlIdent(&Ident::new("max"))] { - // 1. There MUST be one unnamed argument (it CAN come from a subquery) - // 2. The return type is the same as the argument type - - match args { - FunctionArguments::None => { - return Err(TypeError::FunctionCall(format!( - "{} should be called with 1 argument, got 0", - fn_name.last().unwrap() - ))) - } - - FunctionArguments::Subquery(query) => { - // The query must return a single column projection which has the same type as the result of the - // call to min/max. - self.unify_node_with_type( - &**query, - Type::projection(&[(self.get_node_type(function), None)]), - )?; - } - - FunctionArguments::List(args_list) => { - if args_list.args.len() == 1 { - match &args_list.args[0] { - FunctionArg::Named { .. } | FunctionArg::ExprNamed { .. } => { - return Err(TypeError::FunctionCall(format!( - "{} cannot be called with named arguments", - fn_name.last().unwrap(), - ))) - } - - FunctionArg::Unnamed(function_arg_expr) => match function_arg_expr { - FunctionArgExpr::Expr(expr) => { - self.unify_nodes(function, expr)?; - } - - FunctionArgExpr::QualifiedWildcard(_) - | FunctionArgExpr::Wildcard => { - return Err(TypeError::FunctionCall(format!( - "{} cannot be called with wildcard arguments", - fn_name.last().unwrap(), - ))) - } - }, - } - } else { - return Err(TypeError::FunctionCall(format!( - "{} should be called with 1 argument, got {}", - fn_name.last().unwrap(), - args_list.args.len() - ))); - } - } + match get_type_signature_for_special_cased_sql_function(&fn_name, args) { + Some(sig) => { + sig.instantiate(&*self).apply_constraints(self, function)?; } - } else { - // All other functions: resolve to native - // EQL values will be rejected in function calls - self.unify_node_with_type(function, Type::any_native())?; - - match args { - // Function called without any arguments. - // Used for functions like `CURRENT_TIMESTAMP` that do not require parentheses () - // This is not the same as a function that has zero arguments (which would be an empty arg list) - FunctionArguments::None => {} - - FunctionArguments::Subquery(query) => { - // The query must return a single column projection which has the same type as the result of the function - self.unify_node_with_type( - &**query, - Type::projection(&[(self.get_node_type(function), None)]), - )?; - } - - FunctionArguments::List(args_list) => { - self.unify_node_with_type(function, Type::any_native())?; - for arg in &args_list.args { - match arg { - FunctionArg::ExprNamed { - name, - arg, - operator: _, - } => { - self.unify_node_with_type(name, Type::any_native())?; - match arg { - FunctionArgExpr::Expr(expr) => { - self.unify_node_with_type(expr, Type::any_native())?; - } - // Aggregate functions like COUNT(table.*) - FunctionArgExpr::QualifiedWildcard(_) => {} - // Aggregate functions like COUNT(*) - FunctionArgExpr::Wildcard => {} - } - } - FunctionArg::Named { arg, .. } | FunctionArg::Unnamed(arg) => match arg - { - FunctionArgExpr::Expr(expr) => { - self.unify_node_with_type(expr, Type::any_native())?; - } - // Aggregate functions like COUNT(table.*) - FunctionArgExpr::QualifiedWildcard(_) => {} - // Aggregate functions like COUNT(*) - FunctionArgExpr::Wildcard => {} - }, - } - } - } + None => { + FunctionSig::instantiate_native(function).apply_constraints(self, function)?; } } diff --git a/packages/eql-mapper/src/inference/infer_type_impls/function_arg_expr.rs b/packages/eql-mapper/src/inference/infer_type_impls/function_arg_expr.rs new file mode 100644 index 00000000..89e5d5df --- /dev/null +++ b/packages/eql-mapper/src/inference/infer_type_impls/function_arg_expr.rs @@ -0,0 +1,24 @@ +use eql_mapper_macros::trace_infer; +use sqltk::parser::ast::FunctionArgExpr; + +use crate::{inference::infer_type::InferType, TypeError, TypeInferencer}; + +#[trace_infer] +impl<'ast> InferType<'ast, FunctionArgExpr> for TypeInferencer<'ast> { + fn infer_exit(&mut self, farg_expr: &'ast FunctionArgExpr) -> Result<(), TypeError> { + let farg_expr_ty = self.get_node_type(farg_expr); + match farg_expr { + FunctionArgExpr::Expr(expr) => { + self.unify(farg_expr_ty, self.get_node_type(expr))?; + } + FunctionArgExpr::QualifiedWildcard(qualified) => { + self.unify(farg_expr_ty, self.resolve_qualified_wildcard(&qualified.0)?)?; + } + FunctionArgExpr::Wildcard => { + self.unify(farg_expr_ty, self.resolve_wildcard()?)?; + } + }; + + Ok(()) + } +} diff --git a/packages/eql-mapper/src/inference/infer_type_impls/mod.rs b/packages/eql-mapper/src/inference/infer_type_impls/mod.rs index 1f266dee..103a8cfd 100644 --- a/packages/eql-mapper/src/inference/infer_type_impls/mod.rs +++ b/packages/eql-mapper/src/inference/infer_type_impls/mod.rs @@ -1,6 +1,7 @@ // General AST nodes mod expr; mod function; +mod function_arg_expr; mod select; mod select_item; mod select_items; diff --git a/packages/eql-mapper/src/inference/mod.rs b/packages/eql-mapper/src/inference/mod.rs index 2e364944..b5a96a15 100644 --- a/packages/eql-mapper/src/inference/mod.rs +++ b/packages/eql-mapper/src/inference/mod.rs @@ -2,6 +2,8 @@ mod infer_type; mod infer_type_impls; mod registry; mod sequence; +mod sql_fn_macros; +mod sql_functions; mod type_error; pub mod unifier; @@ -12,7 +14,8 @@ use std::{cell::RefCell, fmt::Debug, marker::PhantomData, ops::ControlFlow, rc:: use infer_type::InferType; use sqltk::parser::ast::{ - Delete, Expr, Function, Ident, Insert, Query, Select, SelectItem, SetExpr, Statement, Values, + Delete, Expr, Function, FunctionArgExpr, Ident, Insert, Query, Select, SelectItem, SetExpr, + Statement, Values, }; use sqltk::{into_control_flow, AsNodeKey, Break, Visitable, Visitor}; @@ -20,6 +23,7 @@ use crate::{ScopeError, ScopeTracker, TableResolver}; pub(crate) use registry::*; pub(crate) use sequence::*; +pub(crate) use sql_functions::*; pub(crate) use type_error::*; /// [`Visitor`] implementation that performs type inference on AST nodes. @@ -187,6 +191,7 @@ macro_rules! dispatch_all { dispatch!($self, $method, $node, Vec); dispatch!($self, $method, $node, SelectItem); dispatch!($self, $method, $node, Function); + dispatch!($self, $method, $node, FunctionArgExpr); dispatch!($self, $method, $node, Values); dispatch!($self, $method, $node, sqltk::parser::ast::Value); }; diff --git a/packages/eql-mapper/src/inference/sql_fn_macros.rs b/packages/eql-mapper/src/inference/sql_fn_macros.rs new file mode 100644 index 00000000..8b09734b --- /dev/null +++ b/packages/eql-mapper/src/inference/sql_fn_macros.rs @@ -0,0 +1,30 @@ +#[macro_export] +macro_rules! to_kind { + (NATIVE) => { + crate::Kind::Native + }; + ($generic:ident) => { + crate::Kind::Generic(stringify!($generic)) + }; +} + +#[macro_export] +macro_rules! sql_fn_args { + (()) => { vec![] }; + + (($arg:ident)) => { vec![crate::to_kind!($arg)] }; + + (($arg:ident $(,$rest:ident)*)) => { + vec![crate::to_kind!($arg) $(,crate::to_kind!($rest))*] + }; +} + +#[macro_export] +macro_rules! sql_fn { + ($name:ident $args:tt -> $return_kind:ident) => { + crate::SqlFunction::new( + stringify!($name), + FunctionSig::new(crate::sql_fn_args!($args), crate::to_kind!($return_kind)), + ) + }; +} \ No newline at end of file diff --git a/packages/eql-mapper/src/inference/sql_functions.rs b/packages/eql-mapper/src/inference/sql_functions.rs new file mode 100644 index 00000000..95b4277a --- /dev/null +++ b/packages/eql-mapper/src/inference/sql_functions.rs @@ -0,0 +1,223 @@ +use std::{ + collections::{HashMap, HashSet}, + sync::{Arc, LazyLock}, +}; + +use derive_more::derive::Display; +use sqltk::parser::ast::{Function, FunctionArg, FunctionArgExpr, FunctionArguments, Ident}; + +use itertools::Itertools; +use vec1::{vec1, Vec1}; + +use crate::{sql_fn, unifier::Type, SqlIdent, TypeInferencer}; + +use super::TypeError; + +#[derive(Debug)] +pub(crate) struct SqlFunction(CompoundIdent, FunctionSig); + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub(crate) enum Kind { + Native, + Generic(&'static str), +} + +#[derive(Debug, Clone)] +pub(crate) struct FunctionSig { + args: Vec, + return_type: Kind, + generics: HashSet<&'static str>, +} + +#[derive(Debug, Clone)] +pub(crate) struct InstantiatedSig { + args: Vec>, + return_type: Arc, +} + +impl FunctionSig { + fn new(args: Vec, return_type: Kind) -> Self { + let mut generics: HashSet<&'static str> = HashSet::new(); + + for arg in &args { + if let Kind::Generic(generic) = arg { + generics.insert(*generic); + } + } + + if let Kind::Generic(generic) = return_type { + generics.insert(generic); + } + + Self { + args, + return_type, + generics, + } + } + + pub(crate) fn is_applicable_to_args(&self, fn_args_syntax: &FunctionArguments) -> bool { + match fn_args_syntax { + FunctionArguments::None => self.args.is_empty(), + FunctionArguments::Subquery(_) => self.args.len() == 1, + FunctionArguments::List(fn_args) => self.args.len() == fn_args.args.len(), + } + } + + pub(crate) fn instantiate(&self, inferencer: &TypeInferencer<'_>) -> InstantiatedSig { + let mut generics: HashMap<&'static str, Arc> = HashMap::new(); + + for generic in self.generics.iter() { + generics.insert(generic, inferencer.fresh_tvar()); + } + + InstantiatedSig { + args: self + .args + .iter() + .map(|kind| match kind { + Kind::Native => Arc::new(Type::any_native()), + Kind::Generic(generic) => generics[generic].clone(), + }) + .collect(), + + return_type: match self.return_type { + Kind::Native => Arc::new(Type::any_native()), + Kind::Generic(generic) => generics[generic].clone(), + }, + } + } + + pub(crate) fn instantiate_native(function: &Function) -> InstantiatedSig { + let arg_count = match &function.args { + FunctionArguments::None => 0, + FunctionArguments::Subquery(_) => 1, + FunctionArguments::List(args) => args.args.len(), + }; + + let args: Vec> = (0..arg_count) + .into_iter() + .map(|_| Arc::new(Type::any_native())) + .collect(); + + InstantiatedSig { + args, + return_type: Arc::new(Type::any_native()), + } + } +} + +impl InstantiatedSig { + pub(crate) fn apply_constraints<'ast>( + &self, + inferencer: &mut TypeInferencer<'ast>, + function: &'ast Function, + ) -> Result<(), TypeError> { + let fn_name = CompoundIdent::from(&function.name.0); + + // let function_ty = inferencer.get_node_type(function); + + inferencer.unify_node_with_type(function, self.return_type.clone())?; + + match &function.args { + FunctionArguments::None => { + if self.args.len() == 0 { + Ok(()) + } else { + Err(TypeError::Conflict(format!( + "expected {} args to function {}; got 0", + self.args.len(), + fn_name + ))) + } + } + + FunctionArguments::Subquery(query) => { + if self.args.len() == 1 { + inferencer.unify_node_with_type(&**query, self.args[0].clone())?; + Ok(()) + } else { + Err(TypeError::Conflict(format!( + "expected {} args to function {}; got 0", + self.args.len(), + fn_name + ))) + } + } + + FunctionArguments::List(args) => { + for (sig_arg, fn_arg) in self.args.iter().zip(args.args.iter()) { + let farg_expr = get_function_arg_expr(fn_arg); + inferencer.unify_node_with_type(farg_expr, sig_arg.clone())?; + } + + Ok(()) + } + } + } +} + +fn get_function_arg_expr(fn_arg: &FunctionArg) -> &FunctionArgExpr { + match fn_arg { + FunctionArg::Named { arg, .. } => arg, + FunctionArg::ExprNamed { arg, .. } => arg, + FunctionArg::Unnamed(arg) => arg, + } +} + +impl SqlFunction { + fn new(ident: &str, sig: FunctionSig) -> Self { + Self(CompoundIdent::from(ident), sig) + } +} + +#[derive(Debug, Eq, PartialEq, PartialOrd, Ord, Hash, Clone, Display)] +#[display("{}", _0.iter().map(SqlIdent::to_string).collect::>().join("."))] +pub(crate) struct CompoundIdent(Vec1>); + +impl From<&str> for CompoundIdent { + fn from(value: &str) -> Self { + CompoundIdent(vec1![SqlIdent(Ident::new(value))]) + } +} + +impl From<&Vec> for CompoundIdent { + fn from(value: &Vec) -> Self { + let mut idents = Vec1::>::new(SqlIdent(value[0].clone())); + idents.extend(value[1..].into_iter().cloned().map(SqlIdent)); + CompoundIdent(idents) + } +} + +static SQL_FUNCTION_SIGNATURES: LazyLock>> = LazyLock::new(|| { + // Notation: a single uppercase letter denotes an unknown type. Matching letters in a signature will be assigned + // *the same type variable* and thus must resolve to the same type. (🙏 Haskell) + // + // Eventually we should type check EQL types against their configured indexes instead of leaving that to the EQL + // extension in the database. I can imagine supporting type bounds in signatures here, such as: `T: Eq` + let sql_fns = vec![ + sql_fn!(count(T) -> NATIVE), + sql_fn!(min(T) -> T), + sql_fn!(max(T) -> T), + sql_fn!(jsonb_path_query(T, T) -> T), + ]; + + let mut sql_fns_by_name: HashMap> = HashMap::new(); + + for (key, chunk) in &sql_fns.into_iter().chunk_by(|sql_fn| sql_fn.0.clone()) { + sql_fns_by_name.insert( + key.clone(), + chunk.into_iter().map(|sql_fn| sql_fn.1).collect(), + ); + } + + sql_fns_by_name +}); + +pub(crate) fn get_type_signature_for_special_cased_sql_function( + fn_name: &CompoundIdent, + args: &FunctionArguments, +) -> Option<&'static FunctionSig> { + let sigs = SQL_FUNCTION_SIGNATURES.get(fn_name)?; + sigs.iter().find(|sig| sig.is_applicable_to_args(args)) +} diff --git a/packages/eql-mapper/src/lib.rs b/packages/eql-mapper/src/lib.rs index 4f38742b..43b6c65e 100644 --- a/packages/eql-mapper/src/lib.rs +++ b/packages/eql-mapper/src/lib.rs @@ -42,9 +42,11 @@ mod test { Value, }; use pretty_assertions::assert_eq; + use sqltk::parser::ast::Ident; use sqltk::parser::ast::Statement; use sqltk::parser::ast::{self as ast}; use sqltk::AsNodeKey; + use sqltk::NodeKey; use std::collections::HashMap; use std::sync::Arc; use tracing::error; @@ -1420,7 +1422,82 @@ mod test { test_jsonb_operator("<@"); } - fn test_jsonb_operator(op: &'static str) { + #[test] + fn jsonb_function_jsonb_path_query() { + test_jsonb_function( + "jsonb_path_query", + vec![ + ast::Expr::Identifier(Ident::new("notes")), + ast::Expr::Value(ast::Value::SingleQuotedString("$.medications".to_owned())), + ], + ); + } + + // TODO: do we need to check that the RHS of JSON operators MUST be a Value node + // and not an arbitrary expression? + + fn test_jsonb_function(fn_name: &str, args: Vec) { + let schema = resolver(schema! { + tables: { + patients: { + id (PK), + notes (EQL), + } + } + }); + + let args_in = args + .iter() + .map(|expr| expr.to_string()) + .collect::>() + .join(", "); + + let statement = parse(&format!( + "SELECT id, {}({}) AS meds FROM patients", + fn_name, args_in + )); + + let args_encrypted = args + .iter() + .map(|expr| match expr { + ast::Expr::Identifier(ident) => ident.to_string(), + ast::Expr::Value(ast::Value::SingleQuotedString(s)) => { + format!("''", s) + } + _ => panic!("unsupported expr type in test util"), + }) + .collect::>() + .join(", "); + + let mut encrypted_literals: HashMap, ast::Value> = HashMap::new(); + + for arg in args.iter() { + if let ast::Expr::Value(value) = arg { + encrypted_literals.extend(test_helpers::dummy_encrypted_json_selector( + &statement, + value.clone(), + )); + } + } + + match type_check(schema, &statement) { + Ok(typed) => match typed.transform(encrypted_literals) { + Ok(statement) => { + assert_eq!( + statement.to_string(), + format!( + "SELECT id, {}({}) AS meds FROM patients", + fn_name, args_encrypted + ) + ) + } + Err(err) => panic!("transformation failed: {err}"), + }, + Err(err) => panic!("type check failed: {err}"), + } + } + + fn test_jsonb_operator(op: &str) { let schema = resolver(schema! { tables: { patients: { @@ -1430,11 +1507,14 @@ mod test { } }); - let statement = parse(&format!("SELECT id, notes {} 'medications' AS meds FROM patients", op)); + let statement = parse(&format!( + "SELECT id, notes {} 'medications' AS meds FROM patients", + op + )); match type_check(schema, &statement) { Ok(typed) => { - match typed.transform(test_helpers::dummy_encrypted_json_selector(&typed, "medications")) { + match typed.transform(test_helpers::dummy_encrypted_json_selector(&statement, ast::Value::SingleQuotedString("medications".to_owned()))) { Ok(statement) => assert_eq!( statement.to_string(), format!("SELECT id, notes {} '' AS meds FROM patients", op) diff --git a/packages/eql-mapper/src/test_helpers.rs b/packages/eql-mapper/src/test_helpers.rs index 4457bdbe..5af2fde7 100644 --- a/packages/eql-mapper/src/test_helpers.rs +++ b/packages/eql-mapper/src/test_helpers.rs @@ -1,19 +1,19 @@ -use std::{collections::HashMap, fmt::Debug}; +use std::{collections::HashMap, convert::Infallible, fmt::Debug, ops::ControlFlow}; use sqltk::{ parser::{ - ast::{self as ast, Statement}, + ast::{self as ast, Statement, Value}, dialect::PostgreSqlDialect, parser::Parser, }, - NodeKey, + AsNodeKey, Break, NodeKey, Visitable, Visitor, }; use tracing_subscriber::fmt::format; use tracing_subscriber::fmt::format::FmtSpan; use std::sync::Once; -use crate::{Projection, ProjectionColumn, TypeCheckedStatement}; +use crate::{Projection, ProjectionColumn}; #[allow(unused)] pub(crate) fn init_tracing() { @@ -39,24 +39,60 @@ pub(crate) fn id(ident: &str) -> ast::Ident { } pub(crate) fn get_node_key_of_json_selector<'ast>( - typed: &TypeCheckedStatement<'ast>, - selector: &'static str, + statement: &'ast Statement, + selector: &Value, ) -> NodeKey<'ast> { - typed - .find_nodekey_for_value_node(ast::Value::SingleQuotedString(selector.into())) + find_nodekey_for_value_node(statement, selector.clone()) .expect("could not find selector Value node") } pub(crate) fn dummy_encrypted_json_selector<'ast>( - typed: &TypeCheckedStatement<'ast>, - selector: &'static str, + statement: &'ast Statement, + selector: Value, ) -> HashMap, ast::Value> { - HashMap::from_iter(vec![( - get_node_key_of_json_selector(typed, selector), - ast::Value::SingleQuotedString(format!("", selector)), - )]) + if let Value::SingleQuotedString(s) = &selector { + return HashMap::from_iter(vec![( + get_node_key_of_json_selector(statement, &selector), + ast::Value::SingleQuotedString(format!("", s)), + )]) + } else { + panic!("dummy_encrypted_json_selector only works on Value::SingleQuotedString") + } } +/// Utility for finding the [`NodeKey`] of a [`Value`] node in `statement` by providing a `matching` equal node to search for. +pub(crate) fn find_nodekey_for_value_node<'ast>( + statement: &'ast Statement, + matching: ast::Value, +) -> Option> { + struct FindNode<'ast> { + needle: ast::Value, + found: Option>, + } + + impl<'a> Visitor<'a> for FindNode<'a> { + type Error = Infallible; + + fn enter(&mut self, node: &'a N) -> ControlFlow> { + if let Some(haystack) = node.downcast_ref::() { + if haystack == &self.needle { + self.found = Some(haystack.as_node_key()); + return ControlFlow::Break(Break::Finished); + } + } + ControlFlow::Continue(()) + } + } + + let mut visitor = FindNode { + needle: matching, + found: None, + }; + + statement.accept(&mut visitor); + + visitor.found +} #[macro_export] macro_rules! col { ((NATIVE)) => { diff --git a/packages/eql-mapper/src/type_checked_statement.rs b/packages/eql-mapper/src/type_checked_statement.rs index 81758446..43012b7d 100644 --- a/packages/eql-mapper/src/type_checked_statement.rs +++ b/packages/eql-mapper/src/type_checked_statement.rs @@ -1,10 +1,7 @@ -use std::any::TypeId; -use std::convert::Infallible; -use std::ops::ControlFlow; use std::{collections::HashMap, sync::Arc}; -use sqltk::parser::ast::{self, Query, SetExpr, Statement}; -use sqltk::{AsNodeKey, Break, NodeKey, Transformable, Visitable, Visitor}; +use sqltk::parser::ast::{self, Statement}; +use sqltk::{AsNodeKey, NodeKey, Transformable}; use crate::{ DryRunnable, EqlMapperError, EqlValue, FailOnPlaceholderChange, GroupByEqlCol, Param, @@ -84,34 +81,6 @@ impl<'ast> TypeCheckedStatement<'ast> { self.statement.apply_transform(&mut transformer) } - /// Utility for finding the [`NodeKey`] of a [`Value`] node in `statement` by providing a `matching` equal node to search for. - #[cfg(test)] - pub(crate) fn find_nodekey_for_value_node(&self, matching: ast::Value) -> Option> { - struct FindNode<'ast> { - needle: ast::Value, - found: Option>, - } - - impl<'a> Visitor<'a> for FindNode<'a> { - type Error = Infallible; - - fn enter(&mut self, node: &'a N) -> ControlFlow> { - if let Some(haystack) = node.downcast_ref::() { - if haystack == &self.needle { - self.found = Some(haystack.as_node_key()); - return ControlFlow::Break(Break::Finished) - } - } - ControlFlow::Continue(()) - } - } - - let mut visitor = FindNode{ needle: matching, found: None }; - self.statement.accept(&mut visitor); - - visitor.found - } - pub fn literal_values(&self) -> Vec<&sqltk::parser::ast::Value> { self.literals .iter() From c1dd044bbe799eb3f88747c488cf89b606288bc8 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Mon, 5 May 2025 09:38:55 +1000 Subject: [PATCH 05/50] fix(mapper): hash function for SqlIdent must take quote style into account --- packages/eql-mapper/src/lib.rs | 4 ++-- packages/eql-mapper/src/model/sql_ident.rs | 28 ++++++++++++++++------ 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/packages/eql-mapper/src/lib.rs b/packages/eql-mapper/src/lib.rs index 43b6c65e..5bb923e7 100644 --- a/packages/eql-mapper/src/lib.rs +++ b/packages/eql-mapper/src/lib.rs @@ -1339,14 +1339,14 @@ mod test { }); let statement = - parse("SELECT MIN(salary), MAX(salary), department FROM employees GROUP BY department"); + parse("SELECT min(salary), max(salary), department FROM employees GROUP BY department"); match type_check(schema, &statement) { Ok(typed) => { match typed.transform(HashMap::new()) { Ok(statement) => assert_eq!( statement.to_string(), - "SELECT CS_MIN_V1(salary) AS MIN, CS_MAX_V1(salary) AS MAX, department FROM employees GROUP BY department".to_string() + "SELECT CS_MIN_V1(salary) AS min, CS_MAX_V1(salary) AS max, department FROM employees GROUP BY department".to_string() ), Err(err) => panic!("transformation failed: {err}"), } diff --git a/packages/eql-mapper/src/model/sql_ident.rs b/packages/eql-mapper/src/model/sql_ident.rs index acaa91da..63bdc9eb 100644 --- a/packages/eql-mapper/src/model/sql_ident.rs +++ b/packages/eql-mapper/src/model/sql_ident.rs @@ -102,14 +102,28 @@ impl SqlIdent { } } -// This manual Hash implementation is required to prevent a clippy error: -// "error: you are deriving `Hash` but have implemented `PartialEq` explicitly" -impl Hash for SqlIdent -where - T: Hash, -{ +// This Hash implementation (and the following) one is required in order to be consistent with PartialEq. +impl Hash for SqlIdent<&Ident> { + fn hash(&self, state: &mut H) { + match self.0.quote_style { + Some(ch) => { + state.write_u8(1); + state.write_u32(ch as u32); + state.write(self.0.value.as_bytes()); + }, + None => { + state.write_u8(0); + for ch in self.0.value.chars().map(|ch| ch.to_lowercase()).flatten() { + state.write_u32(ch as u32); + } + }, + } + } +} + +impl Hash for SqlIdent { fn hash(&self, state: &mut H) { - self.0.hash(state) + SqlIdent(&self.0).hash(state) } } From b455c974f0cb9806dd9d37771b599c81fbaf3a39 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Mon, 5 May 2025 10:36:18 +1000 Subject: [PATCH 06/50] docs: rustdoc FunctionSig etc --- .../eql-mapper/src/inference/sql_functions.rs | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/packages/eql-mapper/src/inference/sql_functions.rs b/packages/eql-mapper/src/inference/sql_functions.rs index 95b4277a..ec8e0d4d 100644 --- a/packages/eql-mapper/src/inference/sql_functions.rs +++ b/packages/eql-mapper/src/inference/sql_functions.rs @@ -13,15 +13,23 @@ use crate::{sql_fn, unifier::Type, SqlIdent, TypeInferencer}; use super::TypeError; +/// The identifier and type signature of a SQL function. +/// +/// See [`SQL_FUNCTION_SIGNATURES`]. #[derive(Debug)] pub(crate) struct SqlFunction(CompoundIdent, FunctionSig); +/// A representation of the type of an argument or return type in a SQL function. #[derive(Debug, Clone, Copy, Eq, PartialEq)] pub(crate) enum Kind { + /// A type that mjust be a native type Native, + + /// A type that can be a native or EQL type. The `str` is the generic variable name. Generic(&'static str), } +/// The type signature of a SQL functon (excluding its name). #[derive(Debug, Clone)] pub(crate) struct FunctionSig { args: Vec, @@ -29,6 +37,8 @@ pub(crate) struct FunctionSig { generics: HashSet<&'static str>, } +/// A function signature but filled in with fresh type variables that correspond with the [`Kind`] or each argument and +/// return type. #[derive(Debug, Clone)] pub(crate) struct InstantiatedSig { args: Vec>, @@ -56,6 +66,7 @@ impl FunctionSig { } } + /// Checks if `self` is applicable to a particular piece of SQL function invocation syntax. pub(crate) fn is_applicable_to_args(&self, fn_args_syntax: &FunctionArguments) -> bool { match fn_args_syntax { FunctionArguments::None => self.args.is_empty(), @@ -64,6 +75,7 @@ impl FunctionSig { } } + /// Creates an [`InstantiatedSig`] from `self`, filling in the [`Kind`]s with fresh type variables. pub(crate) fn instantiate(&self, inferencer: &TypeInferencer<'_>) -> InstantiatedSig { let mut generics: HashMap<&'static str, Arc> = HashMap::new(); @@ -88,6 +100,8 @@ impl FunctionSig { } } + /// For functions that do not have special case handling we synthesise an [`InstatiatedSig`] from the SQL function + /// invocation synta where all arguments and the return types are native. pub(crate) fn instantiate_native(function: &Function) -> InstantiatedSig { let arg_count = match &function.args { FunctionArguments::None => 0, @@ -108,6 +122,7 @@ impl FunctionSig { } impl InstantiatedSig { + /// Applies the type constraints of the function to to the AST. pub(crate) fn apply_constraints<'ast>( &self, inferencer: &mut TypeInferencer<'ast>, @@ -115,8 +130,6 @@ impl InstantiatedSig { ) -> Result<(), TypeError> { let fn_name = CompoundIdent::from(&function.name.0); - // let function_ty = inferencer.get_node_type(function); - inferencer.unify_node_with_type(function, self.return_type.clone())?; match &function.args { @@ -189,6 +202,7 @@ impl From<&Vec> for CompoundIdent { } } +/// SQL functions that are handled with special case type checking rules. static SQL_FUNCTION_SIGNATURES: LazyLock>> = LazyLock::new(|| { // Notation: a single uppercase letter denotes an unknown type. Matching letters in a signature will be assigned // *the same type variable* and thus must resolve to the same type. (🙏 Haskell) @@ -200,6 +214,8 @@ static SQL_FUNCTION_SIGNATURES: LazyLock sql_fn!(min(T) -> T), sql_fn!(max(T) -> T), sql_fn!(jsonb_path_query(T, T) -> T), + sql_fn!(jsonb_path_query_first(T, T) -> T), + sql_fn!(jsonb_path_exists(T, T) -> T), ]; let mut sql_fns_by_name: HashMap> = HashMap::new(); From 198867c12d26e7f9f84c9a23852c69559e909e85 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Mon, 5 May 2025 11:58:26 +1000 Subject: [PATCH 07/50] chore: fmt & clippy --- .../eql-mapper/src/inference/sql_fn_macros.rs | 14 ++--- .../eql-mapper/src/inference/sql_functions.rs | 56 +++++++++---------- packages/eql-mapper/src/model/sql_ident.rs | 8 +-- packages/eql-mapper/src/test_helpers.rs | 14 ++--- 4 files changed, 46 insertions(+), 46 deletions(-) diff --git a/packages/eql-mapper/src/inference/sql_fn_macros.rs b/packages/eql-mapper/src/inference/sql_fn_macros.rs index 8b09734b..4fcd9bc7 100644 --- a/packages/eql-mapper/src/inference/sql_fn_macros.rs +++ b/packages/eql-mapper/src/inference/sql_fn_macros.rs @@ -1,10 +1,10 @@ #[macro_export] macro_rules! to_kind { (NATIVE) => { - crate::Kind::Native + $crate::Kind::Native }; ($generic:ident) => { - crate::Kind::Generic(stringify!($generic)) + $crate::Kind::Generic(stringify!($generic)) }; } @@ -12,19 +12,19 @@ macro_rules! to_kind { macro_rules! sql_fn_args { (()) => { vec![] }; - (($arg:ident)) => { vec![crate::to_kind!($arg)] }; + (($arg:ident)) => { vec![$crate::to_kind!($arg)] }; (($arg:ident $(,$rest:ident)*)) => { - vec![crate::to_kind!($arg) $(,crate::to_kind!($rest))*] + vec![$crate::to_kind!($arg) $(, $crate::to_kind!($rest))*] }; } #[macro_export] macro_rules! sql_fn { ($name:ident $args:tt -> $return_kind:ident) => { - crate::SqlFunction::new( + $crate::SqlFunction::new( stringify!($name), - FunctionSig::new(crate::sql_fn_args!($args), crate::to_kind!($return_kind)), + FunctionSig::new($crate::sql_fn_args!($args), $crate::to_kind!($return_kind)), ) }; -} \ No newline at end of file +} diff --git a/packages/eql-mapper/src/inference/sql_functions.rs b/packages/eql-mapper/src/inference/sql_functions.rs index ec8e0d4d..7a2d42d4 100644 --- a/packages/eql-mapper/src/inference/sql_functions.rs +++ b/packages/eql-mapper/src/inference/sql_functions.rs @@ -110,7 +110,6 @@ impl FunctionSig { }; let args: Vec> = (0..arg_count) - .into_iter() .map(|_| Arc::new(Type::any_native())) .collect(); @@ -134,7 +133,7 @@ impl InstantiatedSig { match &function.args { FunctionArguments::None => { - if self.args.len() == 0 { + if self.args.is_empty() { Ok(()) } else { Err(TypeError::Conflict(format!( @@ -197,38 +196,39 @@ impl From<&str> for CompoundIdent { impl From<&Vec> for CompoundIdent { fn from(value: &Vec) -> Self { let mut idents = Vec1::>::new(SqlIdent(value[0].clone())); - idents.extend(value[1..].into_iter().cloned().map(SqlIdent)); + idents.extend(value[1..].iter().cloned().map(SqlIdent)); CompoundIdent(idents) } } /// SQL functions that are handled with special case type checking rules. -static SQL_FUNCTION_SIGNATURES: LazyLock>> = LazyLock::new(|| { - // Notation: a single uppercase letter denotes an unknown type. Matching letters in a signature will be assigned - // *the same type variable* and thus must resolve to the same type. (🙏 Haskell) - // - // Eventually we should type check EQL types against their configured indexes instead of leaving that to the EQL - // extension in the database. I can imagine supporting type bounds in signatures here, such as: `T: Eq` - let sql_fns = vec![ - sql_fn!(count(T) -> NATIVE), - sql_fn!(min(T) -> T), - sql_fn!(max(T) -> T), - sql_fn!(jsonb_path_query(T, T) -> T), - sql_fn!(jsonb_path_query_first(T, T) -> T), - sql_fn!(jsonb_path_exists(T, T) -> T), - ]; - - let mut sql_fns_by_name: HashMap> = HashMap::new(); - - for (key, chunk) in &sql_fns.into_iter().chunk_by(|sql_fn| sql_fn.0.clone()) { - sql_fns_by_name.insert( - key.clone(), - chunk.into_iter().map(|sql_fn| sql_fn.1).collect(), - ); - } +static SQL_FUNCTION_SIGNATURES: LazyLock>> = + LazyLock::new(|| { + // Notation: a single uppercase letter denotes an unknown type. Matching letters in a signature will be assigned + // *the same type variable* and thus must resolve to the same type. (🙏 Haskell) + // + // Eventually we should type check EQL types against their configured indexes instead of leaving that to the EQL + // extension in the database. I can imagine supporting type bounds in signatures here, such as: `T: Eq` + let sql_fns = vec![ + sql_fn!(count(T) -> NATIVE), + sql_fn!(min(T) -> T), + sql_fn!(max(T) -> T), + sql_fn!(jsonb_path_query(T, T) -> T), + sql_fn!(jsonb_path_query_first(T, T) -> T), + sql_fn!(jsonb_path_exists(T, T) -> T), + ]; + + let mut sql_fns_by_name: HashMap> = HashMap::new(); + + for (key, chunk) in &sql_fns.into_iter().chunk_by(|sql_fn| sql_fn.0.clone()) { + sql_fns_by_name.insert( + key.clone(), + chunk.into_iter().map(|sql_fn| sql_fn.1).collect(), + ); + } - sql_fns_by_name -}); + sql_fns_by_name + }); pub(crate) fn get_type_signature_for_special_cased_sql_function( fn_name: &CompoundIdent, diff --git a/packages/eql-mapper/src/model/sql_ident.rs b/packages/eql-mapper/src/model/sql_ident.rs index 63bdc9eb..aa5a0a3b 100644 --- a/packages/eql-mapper/src/model/sql_ident.rs +++ b/packages/eql-mapper/src/model/sql_ident.rs @@ -110,20 +110,20 @@ impl Hash for SqlIdent<&Ident> { state.write_u8(1); state.write_u32(ch as u32); state.write(self.0.value.as_bytes()); - }, + } None => { state.write_u8(0); - for ch in self.0.value.chars().map(|ch| ch.to_lowercase()).flatten() { + for ch in self.0.value.chars().flat_map(|ch| ch.to_lowercase()) { state.write_u32(ch as u32); } - }, + } } } } impl Hash for SqlIdent { fn hash(&self, state: &mut H) { - SqlIdent(&self.0).hash(state) + SqlIdent(&self.0).hash(state) } } diff --git a/packages/eql-mapper/src/test_helpers.rs b/packages/eql-mapper/src/test_helpers.rs index 5af2fde7..1d9285b2 100644 --- a/packages/eql-mapper/src/test_helpers.rs +++ b/packages/eql-mapper/src/test_helpers.rs @@ -46,12 +46,12 @@ pub(crate) fn get_node_key_of_json_selector<'ast>( .expect("could not find selector Value node") } -pub(crate) fn dummy_encrypted_json_selector<'ast>( - statement: &'ast Statement, +pub(crate) fn dummy_encrypted_json_selector( + statement: &Statement, selector: Value, -) -> HashMap, ast::Value> { +) -> HashMap, ast::Value> { if let Value::SingleQuotedString(s) = &selector { - return HashMap::from_iter(vec![( + HashMap::from_iter(vec![( get_node_key_of_json_selector(statement, &selector), ast::Value::SingleQuotedString(format!("", s)), )]) @@ -61,10 +61,10 @@ pub(crate) fn dummy_encrypted_json_selector<'ast>( } /// Utility for finding the [`NodeKey`] of a [`Value`] node in `statement` by providing a `matching` equal node to search for. -pub(crate) fn find_nodekey_for_value_node<'ast>( - statement: &'ast Statement, +pub(crate) fn find_nodekey_for_value_node( + statement: &Statement, matching: ast::Value, -) -> Option> { +) -> Option> { struct FindNode<'ast> { needle: ast::Value, found: Option>, From a4a3dfdbfa15a7d7bf0b2dbd807a8ddcb341a931 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Mon, 5 May 2025 16:37:42 +1000 Subject: [PATCH 08/50] chore: fixup cipherstash-client usages --- Cargo.lock | 33 +++++++------------ .../cipherstash-proxy-integration/Cargo.toml | 8 ++--- .../src/generate.rs | 6 ++-- .../cipherstash-proxy-integration/src/lib.rs | 1 - packages/cipherstash-proxy/Cargo.toml | 8 ++--- packages/cipherstash-proxy/src/encrypt/mod.rs | 16 ++++----- 6 files changed, 26 insertions(+), 46 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 75282176..f22af274 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -681,7 +681,7 @@ dependencies = [ "percent-encoding", "rand 0.8.5", "rand_chacha 0.3.1", - "recipher 0.1.3", + "recipher", "reqwest", "reqwest-middleware", "reqwest-retry", @@ -709,6 +709,8 @@ dependencies = [ [[package]] name = "cipherstash-config" version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30104045751da6e528e83804f4b22d0cddcb27aacce0e1c79604872ddb076bbf" dependencies = [ "serde", "thiserror 1.0.69", @@ -717,6 +719,8 @@ dependencies = [ [[package]] name = "cipherstash-core" version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd56dfac0a35146968ef6696fb822b22f70a664a8739874385876d5452844b7a" dependencies = [ "hmac", "lazy_static", @@ -751,7 +755,7 @@ dependencies = [ "postgres-protocol", "postgres-types", "rand 0.9.0", - "recipher 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "recipher", "regex", "ring", "rust_decimal", @@ -788,7 +792,7 @@ dependencies = [ "fake 4.2.0", "hex", "rand 0.9.0", - "recipher 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "recipher", "rustls", "serde", "serde_json", @@ -857,6 +861,8 @@ checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "cllw-ore" version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1b01c26e11101044f85802e31d842483ef983a890c03472d9489f6969cf865a" dependencies = [ "bit-vec", "bitvec", @@ -2945,25 +2951,6 @@ dependencies = [ "bitflags 2.9.0", ] -[[package]] -name = "recipher" -version = "0.1.3" -dependencies = [ - "aes", - "async-trait", - "cmac", - "hex", - "hex-literal", - "opaque-debug", - "rand 0.8.5", - "rand_chacha 0.3.1", - "serde", - "serde_cbor", - "sha2", - "thiserror 1.0.69", - "zeroize", -] - [[package]] name = "recipher" version = "0.1.3" @@ -5041,6 +5028,8 @@ dependencies = [ [[package]] name = "zerokms-protocol" version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01a9d0d8103cfa862b451f2c35144301df25a233f7fae041666b890a1578c3b1" dependencies = [ "async-trait", "base64", diff --git a/packages/cipherstash-proxy-integration/Cargo.toml b/packages/cipherstash-proxy-integration/Cargo.toml index aa79a2e9..74ea4334 100644 --- a/packages/cipherstash-proxy-integration/Cargo.toml +++ b/packages/cipherstash-proxy-integration/Cargo.toml @@ -24,12 +24,8 @@ tracing-subscriber = { workspace = true } webpki-roots = "0.26.7" [dev-dependencies] -# cipherstash-client = { version = "0.18.0-pre.1", features = ["tokio"] } -cipherstash-client = { path = "../../../cipherstash-suite/packages/cipherstash-client", features = [ - "tokio", -] } -# cipherstash-config = "0.2.3" -cipherstash-config = { path = "../../../cipherstash-suite/packages/cipherstash-config" } +cipherstash-client = { version = "0.20.0", features = ["tokio"] } +cipherstash-config = "0.2.3" clap = "4.5.32" fake = { version = "4", features = ["chrono", "derive"] } hex = "0.4.3" diff --git a/packages/cipherstash-proxy-integration/src/generate.rs b/packages/cipherstash-proxy-integration/src/generate.rs index 57a4cec3..13d11820 100644 --- a/packages/cipherstash-proxy-integration/src/generate.rs +++ b/packages/cipherstash-proxy-integration/src/generate.rs @@ -189,7 +189,7 @@ mod tests { let mut result = pipeline.encrypt(None).await.unwrap(); if let Some(Encrypted::SteVec(ste_vec)) = result.remove(idx) { for entry in ste_vec { - let selector = hex::encode(entry.0 .0); + let selector = hex::encode(entry.0.as_bytes()); let term = entry.1; let record = entry.2; @@ -200,10 +200,10 @@ mod tests { e.blake3_index = Some(hex::encode(&items)); } EncryptedSteVecTerm::OreFixed(o) => { - e.ore_cclw_fixed_index = Some(hex::encode(o.bytes)); + e.ore_cclw_fixed_index = Some(hex::encode(&o)); } EncryptedSteVecTerm::OreVariable(o) => { - e.ore_cclw_var_index = Some(hex::encode(o.bytes)); + e.ore_cclw_var_index = Some(hex::encode(&o)); } } diff --git a/packages/cipherstash-proxy-integration/src/lib.rs b/packages/cipherstash-proxy-integration/src/lib.rs index db8e84f8..dcff8072 100644 --- a/packages/cipherstash-proxy-integration/src/lib.rs +++ b/packages/cipherstash-proxy-integration/src/lib.rs @@ -1,7 +1,6 @@ mod common; mod empty_result; mod extended_protocol_error_messages; -mod generate; mod map_concat; mod map_literals; mod map_match_index; diff --git a/packages/cipherstash-proxy/Cargo.toml b/packages/cipherstash-proxy/Cargo.toml index 09bc0399..7ee18238 100644 --- a/packages/cipherstash-proxy/Cargo.toml +++ b/packages/cipherstash-proxy/Cargo.toml @@ -8,12 +8,8 @@ bigdecimal = { version = "0.4.6", features = ["serde-json"] } arc-swap = "1.7.1" bytes = { version = "1.9", default-features = false } chrono = { version = "0.4.39", features = ["clock"] } -# cipherstash-client = { version = "0.18.0-pre.1", features = ["tokio"] } -cipherstash-client = { path = "../../../cipherstash-suite/packages/cipherstash-client", features = [ - "tokio", -] } -# cipherstash-config = "0.2.3" -cipherstash-config = { path = "../../../cipherstash-suite/packages/cipherstash-config" } +cipherstash-client = { version = "0.20.0", features = ["tokio"] } +cipherstash-config = "0.2.3" clap = { version = "4.5.31", features = ["derive", "env"] } config = { version = "0.15", features = [ "async", diff --git a/packages/cipherstash-proxy/src/encrypt/mod.rs b/packages/cipherstash-proxy/src/encrypt/mod.rs index 24bf76d4..b23f994a 100644 --- a/packages/cipherstash-proxy/src/encrypt/mod.rs +++ b/packages/cipherstash-proxy/src/encrypt/mod.rs @@ -260,15 +260,15 @@ fn to_eql_encrypted( } IndexTerm::BinaryVec(_) => todo!(), IndexTerm::SteVecSelector(s) => { - selector = Some(hex::encode(s.0)); + selector = Some(hex::encode(s.as_bytes())); } IndexTerm::SteVecTerm(ste_vec_term) => match ste_vec_term { EncryptedSteVecTerm::Mac(bytes) => blake3_index = Some(hex::encode(bytes)), EncryptedSteVecTerm::OreFixed(ore) => { - ore_cclw_fixed_index = Some(hex::encode(ore.bytes)) + ore_cclw_fixed_index = Some(hex::encode(&ore)) } EncryptedSteVecTerm::OreVariable(ore) => { - ore_cclw_var_index = Some(hex::encode(ore.bytes)) + ore_cclw_var_index = Some(hex::encode(&ore)) } }, IndexTerm::SteQueryVec(_query) => {} // TODO: what do we do here? @@ -302,18 +302,18 @@ fn to_eql_encrypted( .map(|EncryptedEntry(selector, term, ciphertext)| { let indexes = match term { EncryptedSteVecTerm::Mac(bytes) => EqlEncryptedIndexes { - selector: Some(hex::encode(selector.0)), + selector: Some(hex::encode(selector.as_bytes())), blake3_index: Some(hex::encode(bytes)), ..Default::default() }, EncryptedSteVecTerm::OreFixed(ore) => EqlEncryptedIndexes { - selector: Some(hex::encode(selector.0)), - ore_cclw_fixed_index: Some(hex::encode(ore.bytes)), + selector: Some(hex::encode(selector.as_bytes())), + ore_cclw_fixed_index: Some(hex::encode(&ore)), ..Default::default() }, EncryptedSteVecTerm::OreVariable(ore) => EqlEncryptedIndexes { - selector: Some(hex::encode(selector.0)), - ore_cclw_var_index: Some(hex::encode(ore.bytes)), + selector: Some(hex::encode(selector.as_bytes())), + ore_cclw_var_index: Some(hex::encode(&ore)), ..Default::default() }, }; From 1bd04d77224adad1613e1cca0f44df553ecdd138 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Tue, 6 May 2025 16:35:53 +1000 Subject: [PATCH 09/50] chore(mapper): replaced literals are now `ROW(..)` expressions --- packages/eql-mapper/src/lib.rs | 58 +++++++++++++++---- .../replace_plaintext_eql_literals.rs | 36 ++++++++++-- 2 files changed, 78 insertions(+), 16 deletions(-) diff --git a/packages/eql-mapper/src/lib.rs b/packages/eql-mapper/src/lib.rs index 5bb923e7..49e01dd5 100644 --- a/packages/eql-mapper/src/lib.rs +++ b/packages/eql-mapper/src/lib.rs @@ -1020,27 +1020,63 @@ mod test { )] ); - let transformed_statement = match typed.transform(HashMap::from_iter([( + match typed.transform(HashMap::from_iter([( typed.literals[0].1.as_node_key(), ast::Value::SingleQuotedString("ENCRYPTED".into()), )])) { - Ok(transformed_statement) => transformed_statement, + Ok(transformed_statement) => assert_eq!( + transformed_statement.to_string(), + "SELECT * FROM employees WHERE salary > ROW('ENCRYPTED'::JSONB)" + ), Err(err) => panic!("statement transformation failed: {}", err), }; + } + + #[test] + fn insert_with_literal_subsitution() { + // init_tracing(); + + let schema = resolver(schema! { + tables: { + employees: { + id, + salary (EQL), + } + } + }); + + let statement = parse( + r#" + insert into employees (salary) values (20000) + "#, + ); - // This type checks the transformed statement so we can get hold of the encrypted literal. - let typed = match type_check(schema, &transformed_statement) { + let typed = match type_check(schema.clone(), &statement) { Ok(typed) => typed, Err(err) => panic!("type check failed: {:#?}", err), }; - assert!(typed.literals.contains(&( - EqlValue(TableColumn { - table: id("employees"), - column: id("salary") - }), - &ast::Value::SingleQuotedString("ENCRYPTED".into()), - ))); + assert_eq!( + typed.literals, + vec![( + EqlValue(TableColumn { + table: id("employees"), + column: id("salary") + }), + &ast::Value::Number(20000.into(), false) + )] + ); + + match typed.transform(HashMap::from_iter([( + typed.literals[0].1.as_node_key(), + ast::Value::SingleQuotedString("ENCRYPTED".into()), + )])) { + Ok(transformed_statement) => assert_eq!( + transformed_statement.to_string(), + "INSERT INTO employees (salary) VALUES (ROW('ENCRYPTED'::JSONB))" + ), + Err(err) => panic!("statement transformation failed: {}", err), + }; } #[test] diff --git a/packages/eql-mapper/src/transformation_rules/replace_plaintext_eql_literals.rs b/packages/eql-mapper/src/transformation_rules/replace_plaintext_eql_literals.rs index 687649af..7b9d1fe8 100644 --- a/packages/eql-mapper/src/transformation_rules/replace_plaintext_eql_literals.rs +++ b/packages/eql-mapper/src/transformation_rules/replace_plaintext_eql_literals.rs @@ -1,6 +1,9 @@ use std::{any::type_name, collections::HashMap}; -use sqltk::parser::ast::Value; +use sqltk::parser::ast::{ + CastKind, DataType, Expr, Function, FunctionArg, FunctionArgExpr, FunctionArgumentList, + FunctionArguments, Ident, ObjectName, Value, +}; use sqltk::{NodeKey, NodePath, Visitable}; use crate::EqlMapperError; @@ -24,11 +27,12 @@ impl<'ast> TransformationRule<'ast> for ReplacePlaintextEqlLiterals<'ast> { node_path: &NodePath<'ast>, target_node: &mut N, ) -> Result { + if let Some((Expr::Value(value),)) = node_path.last_1_as::() { if self.would_edit(node_path, target_node) { - if let Some((value,)) = node_path.last_1_as::() { + if let Some((Expr::Value(value),)) = node_path.last_1_as::() { if let Some(replacement) = self.encrypted_literals.remove(&NodeKey::new(value)) { - let target_node = target_node.downcast_mut::().unwrap(); - *target_node = replacement; + let target_node = target_node.downcast_mut::().unwrap(); + *target_node = make_row_expression(replacement); return Ok(true); } } @@ -38,7 +42,7 @@ impl<'ast> TransformationRule<'ast> for ReplacePlaintextEqlLiterals<'ast> { } fn would_edit(&mut self, node_path: &NodePath<'ast>, _target_node: &N) -> bool { - if let Some((value,)) = node_path.last_1_as::() { + if let Some((Expr::Value(value),)) = node_path.last_1_as::() { return self.encrypted_literals.contains_key(&NodeKey::new(value)); } false @@ -55,3 +59,25 @@ impl<'ast> TransformationRule<'ast> for ReplacePlaintextEqlLiterals<'ast> { } } } + +fn make_row_expression(replacement: Value) -> Expr { + Expr::Function(Function { + name: ObjectName(vec![Ident::new("ROW")]), + uses_odbc_syntax: false, + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + clauses: vec![], + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(Expr::Value(replacement)), + data_type: DataType::JSONB, + format: None, + }))], + }), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + }) +} From 34c25deec4e8661bc0eef4663b1f2128389b4c9d Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Mon, 5 May 2025 17:38:03 +1000 Subject: [PATCH 10/50] chore(eql): Update test schema to use EQL 2.0 --- tests/sql/schema.sql | 54 ++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tests/sql/schema.sql b/tests/sql/schema.sql index c1398811..32198f54 100644 --- a/tests/sql/schema.sql +++ b/tests/sql/schema.sql @@ -13,95 +13,95 @@ DROP TABLE IF EXISTS encrypted; CREATE TABLE encrypted ( id bigint, plaintext text, - encrypted_text cs_encrypted_v1, - encrypted_bool cs_encrypted_v1, - encrypted_int2 cs_encrypted_v1, - encrypted_int4 cs_encrypted_v1, - encrypted_int8 cs_encrypted_v1, - encrypted_float8 cs_encrypted_v1, - encrypted_date cs_encrypted_v1, - encrypted_jsonb cs_encrypted_v1, + encrypted_text eql_v1_encrypted, + encrypted_bool eql_v1_encrypted, + encrypted_int2 eql_v1_encrypted, + encrypted_int4 eql_v1_encrypted, + encrypted_int8 eql_v1_encrypted, + encrypted_float8 eql_v1_encrypted, + encrypted_date eql_v1_encrypted, + encrypted_jsonb eql_v1_encrypted, PRIMARY KEY(id) ); DROP TABLE IF EXISTS unconfigured; CREATE TABLE unconfigured ( id bigint, - encrypted_unconfigured cs_encrypted_v1, + encrypted_unconfigured eql_v1_encrypted, PRIMARY KEY(id) ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_text', 'unique', 'text' ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_text', 'match', 'text' ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_text', 'ore', 'text' ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_bool', 'unique', 'boolean' ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_bool', 'ore', 'boolean' ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_int2', 'unique', 'small_int' ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_int2', 'ore', 'small_int' ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_int4', 'unique', 'int' ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_int4', 'ore', 'int' ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_int8', 'unique', 'big_int' ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_int8', 'ore', @@ -109,35 +109,35 @@ SELECT cs_add_index_v1( ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_float8', 'unique', 'double' ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_float8', 'ore', 'double' ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_date', 'unique', 'date' ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_date', 'ore', 'date' ); -SELECT cs_add_index_v1( +SELECT eql_v1.add_index( 'encrypted', 'encrypted_jsonb', 'ste_vec', @@ -145,5 +145,5 @@ SELECT cs_add_index_v1( '{"prefix": "encrypted/encrypted_jsonb"}' ); -SELECT cs_encrypt_v1(); -SELECT cs_activate_v1(); +SELECT eql_v1.encrypt(); +SELECT eql_v1.activate(); From 0b0d1ea15c859271ba8ee25d8bc210d142dfb7ee Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 6 May 2025 09:10:26 +1000 Subject: [PATCH 11/50] chore(eql): Update to use EQL 2.0 configuration table --- docs/getting-started/schema-example.sql | 2 +- packages/cipherstash-proxy/src/encrypt/config/manager.rs | 3 +-- .../cipherstash-proxy/src/encrypt/sql/select_config.sql | 2 +- tests/benchmark/sql/benchmark-schema.sql | 2 +- tests/sql/schema-uninstall.sql | 2 +- tests/sql/schema.sql | 2 +- tests/tasks/test/integration/psql-passthrough.sh | 6 +++--- 7 files changed, 9 insertions(+), 10 deletions(-) diff --git a/docs/getting-started/schema-example.sql b/docs/getting-started/schema-example.sql index 29e3e743..c6ad0cb0 100644 --- a/docs/getting-started/schema-example.sql +++ b/docs/getting-started/schema-example.sql @@ -1,4 +1,4 @@ -TRUNCATE TABLE cs_configuration_v1; +TRUNCATE TABLE public.eql_v1_configuration; -- Exciting cipherstash table DROP TABLE IF EXISTS users; diff --git a/packages/cipherstash-proxy/src/encrypt/config/manager.rs b/packages/cipherstash-proxy/src/encrypt/config/manager.rs index 31312533..df23d8ab 100644 --- a/packages/cipherstash-proxy/src/encrypt/config/manager.rs +++ b/packages/cipherstash-proxy/src/encrypt/config/manager.rs @@ -195,8 +195,7 @@ pub async fn load_encrypt_config(config: &DatabaseConfig) -> Result bool { let msg = e.to_string(); - msg.contains("cs_configuration_v1") && msg.contains("does not exist") + msg.contains("eql_v1_configuration") && msg.contains("does not exist") } diff --git a/packages/cipherstash-proxy/src/encrypt/sql/select_config.sql b/packages/cipherstash-proxy/src/encrypt/sql/select_config.sql index 72827f37..8be0732f 100644 --- a/packages/cipherstash-proxy/src/encrypt/sql/select_config.sql +++ b/packages/cipherstash-proxy/src/encrypt/sql/select_config.sql @@ -1 +1 @@ -SELECT data FROM cs_configuration_v1 WHERE state = 'active' LIMIT 1; +SELECT data FROM public.eql_v1_configuration WHERE state = 'active' LIMIT 1; diff --git a/tests/benchmark/sql/benchmark-schema.sql b/tests/benchmark/sql/benchmark-schema.sql index fecdf7bf..e975f2e8 100644 --- a/tests/benchmark/sql/benchmark-schema.sql +++ b/tests/benchmark/sql/benchmark-schema.sql @@ -1,4 +1,4 @@ -TRUNCATE TABLE cs_configuration_v1; +TRUNCATE TABLE public.eql_v1_configuration; DROP TABLE IF EXISTS benchmark_plaintext; CREATE TABLE benchmark_plaintext ( diff --git a/tests/sql/schema-uninstall.sql b/tests/sql/schema-uninstall.sql index 3c34ba76..ae6630cf 100644 --- a/tests/sql/schema-uninstall.sql +++ b/tests/sql/schema-uninstall.sql @@ -1,4 +1,4 @@ -DROP TABLE IF EXISTS cs_configuration_v1; +DROP TABLE IF EXISTS public.eql_v1_configuration; -- Regular old table DROP TABLE IF EXISTS plaintext; diff --git a/tests/sql/schema.sql b/tests/sql/schema.sql index 32198f54..57f5e29f 100644 --- a/tests/sql/schema.sql +++ b/tests/sql/schema.sql @@ -1,4 +1,4 @@ -TRUNCATE TABLE cs_configuration_v1; +TRUNCATE TABLE public.eql_v1_configuration; -- Regular old table DROP TABLE IF EXISTS plaintext; diff --git a/tests/tasks/test/integration/psql-passthrough.sh b/tests/tasks/test/integration/psql-passthrough.sh index e07fd77d..c9319539 100755 --- a/tests/tasks/test/integration/psql-passthrough.sh +++ b/tests/tasks/test/integration/psql-passthrough.sh @@ -17,10 +17,10 @@ EOF # Confirm that there is indeed no config set +e -OUTPUT="$(docker exec -i postgres${CONTAINER_SUFFIX} psql 'postgresql://cipherstash:password@proxy:6432/cipherstash?sslmode=disable' --command 'SELECT * FROM cs_configuration_v1' 2>&1)" +OUTPUT="$(docker exec -i postgres${CONTAINER_SUFFIX} psql 'postgresql://cipherstash:password@proxy:6432/cipherstash?sslmode=disable' --command 'SELECT * FROM eql_v1_configuration' 2>&1)" retval=$? -if echo ${OUTPUT} | grep -v 'relation "cs_configuration_v1" does not exist'; then - echo "error: did not see string in output: \"relation "cs_configuration_v1" does not exist\"" +if echo ${OUTPUT} | grep -v 'relation "eql_v1_configuration" does not exist'; then + echo "error: did not see string in output: \"relation "eql_v1_configuration" does not exist\"" exit 1 fi From 54666331085af1bb72651aff22da59a49fe9dd40 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 6 May 2025 10:48:36 +1000 Subject: [PATCH 12/50] fix: Uninstall should use CS_EQL_UNINSTALL_PATH if defined --- mise.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mise.toml b/mise.toml index 9cf04e2a..db0a71cb 100644 --- a/mise.toml +++ b/mise.toml @@ -421,7 +421,7 @@ cat sql/schema.sql | docker exec -i postgres${CONTAINER_SUFFIX} psql postgresql: """ [tasks."postgres:eql:teardown"] -alias = 's' +alias = 'teardown' description = "Uninstalls EQL and removes schema from database" run = """ #!/bin/bash @@ -512,7 +512,7 @@ fi if [ -z "$CS_EQL_UNINSTALL_PATH" ]; then curl -sLo sql/cipherstash-encrypt-uninstall.sql https://github.com/cipherstash/encrypt-query-language/releases/download/${CS_EQL_VERSION}/cipherstash-encrypt-uninstall.sql else - echo "Using EQL: ${CS_EQL_PATH}" + echo "Using EQL: ${CS_EQL_UNINSTALL_PATH}" cp "$CS_EQL_UNINSTALL_PATH" sql/cipherstash-encrypt-uninstall.sql fi """ From 135cf09f3d8ee271840703744aa07def7a3b49dc Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 6 May 2025 14:48:12 +1000 Subject: [PATCH 13/50] chore: make postgres:setup depend on postgres:teardown --- mise.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mise.toml b/mise.toml index db0a71cb..cef234e3 100644 --- a/mise.toml +++ b/mise.toml @@ -410,6 +410,7 @@ fi [tasks."postgres:setup"] alias = 's' +depends = ["postgres:eql:teardown"] description = "Installs EQL and applies schema to database" run = """ #!/bin/bash @@ -421,7 +422,6 @@ cat sql/schema.sql | docker exec -i postgres${CONTAINER_SUFFIX} psql postgresql: """ [tasks."postgres:eql:teardown"] -alias = 'teardown' description = "Uninstalls EQL and removes schema from database" run = """ #!/bin/bash From 82fc5ebaab41b51882a3268b408912ea5b352e41 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 6 May 2025 16:18:21 +1000 Subject: [PATCH 14/50] fix: Use updated eql_v1_encrypted column type --- docs/errors.md | 4 ++-- docs/getting-started/schema-example.sql | 6 +++--- docs/how-to.md | 8 ++++---- mise.toml | 12 +----------- .../src/extended_protocol_error_messages.rs | 4 ++-- .../cipherstash-proxy/src/encrypt/schema/manager.rs | 4 ++-- .../src/postgresql/messages/parse.rs | 8 ++++---- 7 files changed, 18 insertions(+), 28 deletions(-) diff --git a/docs/errors.md b/docs/errors.md index 7e5faceb..1105d064 100644 --- a/docs/errors.md +++ b/docs/errors.md @@ -314,7 +314,7 @@ For example: ## Unknown Column -The column has an encrypted type (PostgreSQL `cs_encrypted_v1` type ) with no encryption configuration. +The column has an encrypted type (PostgreSQL `eql_v1_encrypted` type ) with no encryption configuration. Without the configuration, Cipherstash Proxy does not know how to encrypt the column. Any data is unprotected and unencrypted. @@ -341,7 +341,7 @@ Column 'column_name' in table 'table_name' has no Encrypt configuration ## Unknown Table -The table has one or more encrypted columns (PostgreSQL `cs_encrypted_v1` type ) with no encryption configuration. +The table has one or more encrypted columns (PostgreSQL `eql_v1_encrypted` type ) with no encryption configuration. Without the configuration, Cipherstash Proxy does not know how to encrypt the column. Any data is unprotected and unencrypted. diff --git a/docs/getting-started/schema-example.sql b/docs/getting-started/schema-example.sql index c6ad0cb0..0120cde4 100644 --- a/docs/getting-started/schema-example.sql +++ b/docs/getting-started/schema-example.sql @@ -4,9 +4,9 @@ TRUNCATE TABLE public.eql_v1_configuration; DROP TABLE IF EXISTS users; CREATE TABLE users ( id SERIAL PRIMARY KEY, - encrypted_email cs_encrypted_v1, - encrypted_dob cs_encrypted_v1, - encrypted_salary cs_encrypted_v1 + encrypted_email eql_v1_encrypted, + encrypted_dob eql_v1_encrypted, + encrypted_salary eql_v1_encrypted ); SELECT cs_add_index_v1( diff --git a/docs/how-to.md b/docs/how-to.md index a38f80cb..cac141c0 100644 --- a/docs/how-to.md +++ b/docs/how-to.md @@ -162,22 +162,22 @@ This will output the version of EQL installed. In your existing PostgreSQL database, you store your data in tables and columns. Those columns have types like `integer`, `text`, `timestamp`, and `boolean`. -When storing encrypted data in PostgreSQL with Proxy, you use a special column type called `cs_encrypted_v1`, which is [provided by EQL](#setting-up-the-database-schema). -`cs_encrypted_v1` is a container column type that can be used for any type of encrypted data you want to store or search, whether they are numbers (`int`, `small_int`, `big_int`), text (`text`), dates and times (`date`), or booleans (`boolean`). +When storing encrypted data in PostgreSQL with Proxy, you use a special column type called `eql_v1_encrypted`, which is [provided by EQL](#setting-up-the-database-schema). +`eql_v1_encrypted` is a container column type that can be used for any type of encrypted data you want to store or search, whether they are numbers (`int`, `small_int`, `big_int`), text (`text`), dates and times (`date`), or booleans (`boolean`). Create a table with an encrypted column for `email`: ```sql CREATE TABLE users ( id SERIAL PRIMARY KEY, - email cs_encrypted_v1 + email eql_v1_encrypted ) ``` This creates a `users` table with two columns: - `id`, an autoincrementing integer column that is the primary key for the record - - `email`, a `cs_encrypted_v1` column + - `email`, a `eql_v1_encrypted` column There are important differences between the plaintext columns you've traditionally used in PostgreSQL and encrypted columns with CipherStash Proxy: diff --git a/mise.toml b/mise.toml index cef234e3..01769875 100644 --- a/mise.toml +++ b/mise.toml @@ -567,7 +567,7 @@ cp -v {{config_root}}/target/{{ target }}/release/cipherstash-proxy {{config_roo """ [tasks."build:docker"] -depends = ["build:docker:fetch_eql"] +depends = ["postgres:eql:download"] description = "Build a Docker image for cipherstash-proxy" run = """ {% set default_platform = "linux/" ~ arch() | replace(from="x86_64", to="amd64") %} @@ -580,16 +580,6 @@ docker build . \ --platform {{option(name="platform",default=default_platform)}} \ """ -[tasks."build:docker:fetch_eql"] -description = "Fetch the EQL installation script" -run = """ -if [ ! -e "cipherstash-eql.sql" ]; then - echo "Fetching: cipherstash-eql.sql" - curl -sLo cipherstash-eql.sql https://github.com/cipherstash/encrypt-query-language/releases/download/${CS_EQL_VERSION}/cipherstash-encrypt.sql -else - echo "Prefetched: cipherstash-eql.sql" -fi -""" [tasks.release] description = "Publish release artifacts" diff --git a/packages/cipherstash-proxy-integration/src/extended_protocol_error_messages.rs b/packages/cipherstash-proxy-integration/src/extended_protocol_error_messages.rs index c45ec4af..37ac99ea 100644 --- a/packages/cipherstash-proxy-integration/src/extended_protocol_error_messages.rs +++ b/packages/cipherstash-proxy-integration/src/extended_protocol_error_messages.rs @@ -67,10 +67,10 @@ mod tests { let msg = err.to_string(); // This is similar to below. The error message comes from tokio-postgres when Proxy - // returns cs_encrypted_v1 and the client cannot convert to a string. + // returns eql_v1_encrypted and the client cannot convert to a string. // If mapping errors are enabled (enable_mapping_errors or CS_DEVELOPMENT__ENABLE_MAPPING_ERRORS), // then Proxy will return an error that says "Column X in table Y has no Encrypt configuration" - assert_eq!(msg, "error serializing parameter 1: cannot convert between the Rust type `&str` and the Postgres type `cs_encrypted_v1`"); + assert_eq!(msg, "error serializing parameter 1: cannot convert between the Rust type `&str` and the Postgres type `eql_v1_encrypted`"); } else { unreachable!(); } diff --git a/packages/cipherstash-proxy/src/encrypt/schema/manager.rs b/packages/cipherstash-proxy/src/encrypt/schema/manager.rs index fc000ccb..f005a16f 100644 --- a/packages/cipherstash-proxy/src/encrypt/schema/manager.rs +++ b/packages/cipherstash-proxy/src/encrypt/schema/manager.rs @@ -143,8 +143,8 @@ pub async fn load_schema(config: &DatabaseConfig) -> Result { let ident = Ident::with_quote('"', col); let column = match domain.as_deref() { - Some("cs_encrypted_v1") => { - debug!(target: SCHEMA, msg = "cs_encrypted_v1 column", table = table_name, column = col); + Some("eql_v1_encrypted") => { + debug!(target: SCHEMA, msg = "eql_v1_encrypted column", table = table_name, column = col); Column::eql(ident) } _ => Column::native(ident), diff --git a/packages/cipherstash-proxy/src/postgresql/messages/parse.rs b/packages/cipherstash-proxy/src/postgresql/messages/parse.rs index faf0ec6b..a2c30f5c 100644 --- a/packages/cipherstash-proxy/src/postgresql/messages/parse.rs +++ b/packages/cipherstash-proxy/src/postgresql/messages/parse.rs @@ -24,11 +24,11 @@ impl Parse { } /// - /// Encrypted columns are the cs_encrypted_v1 Domain Type - /// cs_encrypted_v1 wraps JSONB + /// Encrypted columns are the eql_v1_encrypted Domain Type + /// eql_v1_encrypted wraps JSONB /// - /// Using JSONB to avoid the complexity of loading the OID of cs_encrypted_v1 - /// PostgreSQL will coerce JSONB to cs_encrypted_v1 if it passes the constaint check + /// Using JSONB to avoid the complexity of loading the OID of eql_v1_encrypted + /// PostgreSQL will coerce JSONB to eql_v1_encrypted if it passes the constaint check /// pub fn rewrite_param_types(&mut self, columns: &[Option]) { for (idx, col) in columns.iter().enumerate() { From dc094c43207fbc0b81a7ed23e6848df18bb5c292 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 6 May 2025 16:38:39 +1000 Subject: [PATCH 15/50] chore: update schema load to check for new type --- packages/cipherstash-proxy/src/encrypt/schema/manager.rs | 7 +++---- .../src/encrypt/sql/select_table_schemas.sql | 6 ++++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/cipherstash-proxy/src/encrypt/schema/manager.rs b/packages/cipherstash-proxy/src/encrypt/schema/manager.rs index f005a16f..b2603a98 100644 --- a/packages/cipherstash-proxy/src/encrypt/schema/manager.rs +++ b/packages/cipherstash-proxy/src/encrypt/schema/manager.rs @@ -132,17 +132,16 @@ pub async fn load_schema(config: &DatabaseConfig) -> Result { let table_name: String = table.get("table_name"); let primary_keys: Vec> = table.get("primary_keys"); let columns: Vec = table.get("columns"); - let _types: Vec> = table.get("column_types"); - let domains: Vec> = table.get("column_domains"); + let column_type_names: Vec> = table.get("column_type_names"); let mut table = Table::new(Ident::new(&table_name)); - columns.iter().zip(domains).for_each(|(col, domain)| { + columns.iter().zip(column_type_names).for_each(|(col, column_type_name)| { let is_primary_key = primary_keys.contains(&Some(col.to_string())); let ident = Ident::with_quote('"', col); - let column = match domain.as_deref() { + let column = match column_type_name.as_deref() { Some("eql_v1_encrypted") => { debug!(target: SCHEMA, msg = "eql_v1_encrypted column", table = table_name, column = col); Column::eql(ident) diff --git a/packages/cipherstash-proxy/src/encrypt/sql/select_table_schemas.sql b/packages/cipherstash-proxy/src/encrypt/sql/select_table_schemas.sql index ee3ba513..88743f3e 100644 --- a/packages/cipherstash-proxy/src/encrypt/sql/select_table_schemas.sql +++ b/packages/cipherstash-proxy/src/encrypt/sql/select_table_schemas.sql @@ -3,8 +3,7 @@ SELECT t.table_name, array_agg(distinct k.column_name)::text[] AS primary_keys, array_agg(c.column_name)::text[] AS columns, - array_agg(c.data_type)::text[] AS column_types, - array_agg(c.domain_name)::text[] AS column_domains + array_agg(c.udt_name)::text[] AS column_type_names FROM information_schema.tables t LEFT JOIN @@ -24,3 +23,6 @@ GROUP BY t.table_schema, t.table_name ORDER BY t.table_schema, t.table_name; + + + From 862b7b330542b2a4f5abdccf74e89efc40715a7a Mon Sep 17 00:00:00 2001 From: James Sadler Date: Tue, 6 May 2025 16:54:59 +1000 Subject: [PATCH 16/50] =?UTF-8?q?refactor(mise):=20unify=20EQL=20download?= =?UTF-8?q?=20tasks=20to=20One=20True=20Way=E2=84=A2=EF=B8=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 5 ++--- mise.toml | 47 ++++++++++++++++++++++++++++------------------- proxy.Dockerfile | 2 +- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index a5b17a57..05413b30 100644 --- a/.gitignore +++ b/.gitignore @@ -4,8 +4,6 @@ /cipherstash-proxy.local.toml mise.local.toml tests/pg/data** -tests/sql/cipherstash-encrypt.sql -tests/sql/cipherstash-encrypt-uninstall.sql .vscode rust-toolchain.toml @@ -13,8 +11,9 @@ rust-toolchain.toml # release artifacts /cipherstash-proxy -/cipherstash-eql.sql /packages/cipherstash-proxy/eql-version-at-build-time.txt +/cipherstash-encrypt.sql +/cipherstash-encrypt-uninstall.sql # credentials for local dev .env.proxy.docker diff --git a/mise.toml b/mise.toml index 01769875..64e563df 100644 --- a/mise.toml +++ b/mise.toml @@ -409,27 +409,28 @@ fi """ [tasks."postgres:setup"] -alias = 's' depends = ["postgres:eql:teardown"] +alias = 's' description = "Installs EQL and applies schema to database" run = """ #!/bin/bash cd tests mise run postgres:fail_if_not_running -mise run postgres:eql:download -cat sql/cipherstash-encrypt.sql | docker exec -i postgres${CONTAINER_SUFFIX} psql postgresql://${CS_DATABASE__USERNAME}:${CS_DATABASE__PASSWORD}@${CS_DATABASE__HOST}:${CS_DATABASE__PORT}/${CS_DATABASE__NAME} -f- +cat sql/schema-uninstall.sql | docker exec -i postgres${CONTAINER_SUFFIX} psql postgresql://${CS_DATABASE__USERNAME}:${CS_DATABASE__PASSWORD}@${CS_DATABASE__HOST}:${CS_DATABASE__PORT}/${CS_DATABASE__NAME} -f- +cat ../cipherstash-encrypt-uninstall.sql | docker exec -i postgres${CONTAINER_SUFFIX} psql postgresql://${CS_DATABASE__USERNAME}:${CS_DATABASE__PASSWORD}@${CS_DATABASE__HOST}:${CS_DATABASE__PORT}/${CS_DATABASE__NAME} -f- +cat ../cipherstash-encrypt.sql | docker exec -i postgres${CONTAINER_SUFFIX} psql postgresql://${CS_DATABASE__USERNAME}:${CS_DATABASE__PASSWORD}@${CS_DATABASE__HOST}:${CS_DATABASE__PORT}/${CS_DATABASE__NAME} -f- cat sql/schema.sql | docker exec -i postgres${CONTAINER_SUFFIX} psql postgresql://${CS_DATABASE__USERNAME}:${CS_DATABASE__PASSWORD}@${CS_DATABASE__HOST}:${CS_DATABASE__PORT}/${CS_DATABASE__NAME} -f- """ [tasks."postgres:eql:teardown"] +depends = ["eql:download"] description = "Uninstalls EQL and removes schema from database" run = """ #!/bin/bash cd tests mise run postgres:fail_if_not_running -mise run postgres:eql:download cat sql/schema-uninstall.sql | docker exec -i postgres${CONTAINER_SUFFIX} psql postgresql://${CS_DATABASE__USERNAME}:${CS_DATABASE__PASSWORD}@${CS_DATABASE__HOST}:${CS_DATABASE__PORT}/${CS_DATABASE__NAME} -f- -cat sql/cipherstash-encrypt-uninstall.sql | docker exec -i postgres${CONTAINER_SUFFIX} psql postgresql://${CS_DATABASE__USERNAME}:${CS_DATABASE__PASSWORD}@${CS_DATABASE__HOST}:${CS_DATABASE__PORT}/${CS_DATABASE__NAME} -f- +cat ../cipherstash-encrypt-uninstall.sql | docker exec -i postgres${CONTAINER_SUFFIX} psql postgresql://${CS_DATABASE__USERNAME}:${CS_DATABASE__PASSWORD}@${CS_DATABASE__HOST}:${CS_DATABASE__PORT}/${CS_DATABASE__NAME} -f- """ [tasks."postgres:up"] @@ -490,34 +491,32 @@ for d in tests/pg/data-*; do done """ - -[tasks."postgres:eql:download"] +[tasks."eql:download"] alias = 'e' -description = "Download latest EQL release" +description = "Download latest EQL release or use local copy" dir = "{{config_root}}/tests" outputs = [ - "{{config_root}}/tests/sql/cipherstash-encrypt.sql", - "{{config_root}}/tests/sql/cipherstash-encrypt-uninstall.sql", + "{{config_root}}/cipherstash-encrypt.sql", + "{{config_root}}/cipherstash-encrypt-uninstall.sql", ] run = """ # install script if [ -z "$CS_EQL_PATH" ]; then - curl -sLo sql/cipherstash-encrypt.sql https://github.com/cipherstash/encrypt-query-language/releases/download/${CS_EQL_VERSION}/cipherstash-encrypt.sql + curl -sLo "{{config_root}}/cipherstash-encrypt.sql" https://github.com/cipherstash/encrypt-query-language/releases/download/${CS_EQL_VERSION}/cipherstash-encrypt.sql else - echo "Using EQL: ${CS_EQL_PATH}" - cp "$CS_EQL_PATH" sql/cipherstash-encrypt.sql + echo "Using EQL: ${CS_EQL_PATH}/cipherstash-encrypt.sql" + cp "$CS_EQL_PATH/cipherstash-encrypt.sql" "{{config_root}}/cipherstash-encrypt.sql" fi # uninstall script -if [ -z "$CS_EQL_UNINSTALL_PATH" ]; then - curl -sLo sql/cipherstash-encrypt-uninstall.sql https://github.com/cipherstash/encrypt-query-language/releases/download/${CS_EQL_VERSION}/cipherstash-encrypt-uninstall.sql +if [ -z "$CS_EQL_PATH" ]; then + curl -sLo "{{config_root}}/cipherstash-encrypt-uninstall.sql" https://github.com/cipherstash/encrypt-query-language/releases/download/${CS_EQL_VERSION}/cipherstash-encrypt-uninstall.sql else - echo "Using EQL: ${CS_EQL_UNINSTALL_PATH}" - cp "$CS_EQL_UNINSTALL_PATH" sql/cipherstash-encrypt-uninstall.sql + echo "Using EQL: ${CS_EQL_PATH}/cipherstash-encrypt-uninstall.sql" + cp "$CS_EQL_PATH/cipherstash-encrypt-uninstall.sql" "{{config_root}}/cipherstash-encrypt-uninstall.sql" fi """ - [tasks."python:test"] dir = "{{config_root}}/tests" description = "Runs python tests" @@ -567,7 +566,7 @@ cp -v {{config_root}}/target/{{ target }}/release/cipherstash-proxy {{config_roo """ [tasks."build:docker"] -depends = ["postgres:eql:download"] +depends = ["eql:download"] description = "Build a Docker image for cipherstash-proxy" run = """ {% set default_platform = "linux/" ~ arch() | replace(from="x86_64", to="amd64") %} @@ -580,6 +579,16 @@ docker build . \ --platform {{option(name="platform",default=default_platform)}} \ """ +[tasks."build:docker:fetch_eql"] +description = "Fetch the EQL installation script" +run = """ +if [ ! -e "cipherstash-eql.sql" ]; then + echo "Fetching: cipherstash-eql.sql" + curl -sLo cipherstash-eql.sql https://github.com/cipherstash/encrypt-query-language/releases/download/${CS_EQL_VERSION}/cipherstash-encrypt.sql +else + echo "Prefetched: cipherstash-eql.sql" +fi +""" [tasks.release] description = "Publish release artifacts" diff --git a/proxy.Dockerfile b/proxy.Dockerfile index 27a9d431..02cea0e5 100644 --- a/proxy.Dockerfile +++ b/proxy.Dockerfile @@ -10,7 +10,7 @@ COPY cipherstash-proxy /usr/local/bin/cipherstash-proxy COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh # Copy EQL install scripts -COPY cipherstash-eql.sql /opt/cipherstash-eql.sql +COPY cipherstash-encrypt.sql /opt/cipherstash-eql.sql # Copy example schema COPY docs/getting-started/schema-example.sql /opt/schema-example.sql From a799f7374bdf92933f1cf409dfa21d7c8164b8b9 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Tue, 6 May 2025 16:59:02 +1000 Subject: [PATCH 17/50] fix(proxy): SQL for getting EQL version --- docs/how-to.md | 2 +- packages/cipherstash-proxy/src/encrypt/mod.rs | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/how-to.md b/docs/how-to.md index cac141c0..5f906a84 100644 --- a/docs/how-to.md +++ b/docs/how-to.md @@ -153,7 +153,7 @@ You can also install EQL by running [the installation script](https://github.com Once you have installed EQL, you can see what version is installed by querying the database: ```sql -SELECT cs_eql_version(); +SELECT eql_v1.version(); ``` This will output the version of EQL installed. diff --git a/packages/cipherstash-proxy/src/encrypt/mod.rs b/packages/cipherstash-proxy/src/encrypt/mod.rs index b23f994a..33b0b2f3 100644 --- a/packages/cipherstash-proxy/src/encrypt/mod.rs +++ b/packages/cipherstash-proxy/src/encrypt/mod.rs @@ -57,10 +57,11 @@ impl Encrypt { let eql_version = { let client = connect::database(&config.database).await?; - let rows = client.query("SELECT cs_eql_version();", &[]).await; + let rows = client.query("SELECT eql_v1.version() AS version;", &[]).await; + // let rows = client.query("SELECT 'WAT' AS version;", &[]).await; match rows { - Ok(rows) => rows.first().map(|row| row.get("cs_eql_version")), + Ok(rows) => rows.first().map(|row| row.get("version")), Err(err) => { warn!( msg = "Could not query EQL version from database", From c0a0015d96c21d530b6570f2ee6f642af55ffb92 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Tue, 6 May 2025 21:32:28 +1000 Subject: [PATCH 18/50] fix: fat finger fallout --- packages/cipherstash-proxy/src/encrypt/mod.rs | 4 +++- .../transformation_rules/replace_plaintext_eql_literals.rs | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/cipherstash-proxy/src/encrypt/mod.rs b/packages/cipherstash-proxy/src/encrypt/mod.rs index 33b0b2f3..fca8efd1 100644 --- a/packages/cipherstash-proxy/src/encrypt/mod.rs +++ b/packages/cipherstash-proxy/src/encrypt/mod.rs @@ -57,7 +57,9 @@ impl Encrypt { let eql_version = { let client = connect::database(&config.database).await?; - let rows = client.query("SELECT eql_v1.version() AS version;", &[]).await; + let rows = client + .query("SELECT eql_v1.version() AS version;", &[]) + .await; // let rows = client.query("SELECT 'WAT' AS version;", &[]).await; match rows { diff --git a/packages/eql-mapper/src/transformation_rules/replace_plaintext_eql_literals.rs b/packages/eql-mapper/src/transformation_rules/replace_plaintext_eql_literals.rs index 7b9d1fe8..a153b115 100644 --- a/packages/eql-mapper/src/transformation_rules/replace_plaintext_eql_literals.rs +++ b/packages/eql-mapper/src/transformation_rules/replace_plaintext_eql_literals.rs @@ -27,7 +27,6 @@ impl<'ast> TransformationRule<'ast> for ReplacePlaintextEqlLiterals<'ast> { node_path: &NodePath<'ast>, target_node: &mut N, ) -> Result { - if let Some((Expr::Value(value),)) = node_path.last_1_as::() { if self.would_edit(node_path, target_node) { if let Some((Expr::Value(value),)) = node_path.last_1_as::() { if let Some(replacement) = self.encrypted_literals.remove(&NodeKey::new(value)) { From 137efd2f63ab75e1f5e7a748c6844a06a67afc1a Mon Sep 17 00:00:00 2001 From: James Sadler Date: Tue, 6 May 2025 22:03:07 +1000 Subject: [PATCH 19/50] fix(mapper): update JSONB test assertions for `ROW(..)` --- packages/eql-mapper/src/lib.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/eql-mapper/src/lib.rs b/packages/eql-mapper/src/lib.rs index 49e01dd5..1867c2bb 100644 --- a/packages/eql-mapper/src/lib.rs +++ b/packages/eql-mapper/src/lib.rs @@ -1498,7 +1498,7 @@ mod test { .map(|expr| match expr { ast::Expr::Identifier(ident) => ident.to_string(), ast::Expr::Value(ast::Value::SingleQuotedString(s)) => { - format!("''", s) + format!("ROW(''::JSONB)", s) } _ => panic!("unsupported expr type in test util"), }) @@ -1553,7 +1553,7 @@ mod test { match typed.transform(test_helpers::dummy_encrypted_json_selector(&statement, ast::Value::SingleQuotedString("medications".to_owned()))) { Ok(statement) => assert_eq!( statement.to_string(), - format!("SELECT id, notes {} '' AS meds FROM patients", op) + format!("SELECT id, notes {} ROW(''::JSONB) AS meds FROM patients", op) ), Err(err) => panic!("transformation failed: {err}"), } From 0afeeb8173bf8d18d7ffe3991fa6afbd3e6f134e Mon Sep 17 00:00:00 2001 From: James Sadler Date: Thu, 8 May 2025 23:12:04 +1000 Subject: [PATCH 20/50] feat(mapper): rewrite std SQL functions for EQL cols e.g. `jsonb_query_path(eql_col, selector)` on an EQL column should be transformed to `eql_v1.jsonb_query_path(..)` --- Cargo.lock | 4 + .../inference/infer_type_impls/function.rs | 8 +- .../eql-mapper/src/inference/sql_fn_macros.rs | 27 +++++- .../eql-mapper/src/inference/sql_functions.rs | 95 ++++++++++++------- packages/eql-mapper/src/lib.rs | 43 ++++++++- packages/eql-mapper/src/test_helpers.rs | 1 + .../transformation_rules/group_by_eql_col.rs | 2 +- .../src/transformation_rules/mod.rs | 4 +- .../rewrite_standard_sql_fns_on_eql_types.rs | 75 +++++++++++++++ .../use_equivalent_eql_fns_on_eql_types.rs | 73 -------------- .../wrap_eql_cols_in_order_by_with_ore_fn.rs | 14 +-- .../wrap_grouped_eql_col_in_aggregate_fn.rs | 12 +-- .../eql-mapper/src/type_checked_statement.rs | 8 +- 13 files changed, 231 insertions(+), 135 deletions(-) create mode 100644 packages/eql-mapper/src/transformation_rules/rewrite_standard_sql_fns_on_eql_types.rs delete mode 100644 packages/eql-mapper/src/transformation_rules/use_equivalent_eql_fns_on_eql_types.rs diff --git a/Cargo.lock b/Cargo.lock index f22af274..cae63529 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -651,6 +651,8 @@ dependencies = [ [[package]] name = "cipherstash-client" version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fe21509165da6daf50b84d4dc9bc46b558e5afb34db75dbd2371b963faabe4d" dependencies = [ "aes-gcm-siv", "anyhow", @@ -1021,6 +1023,8 @@ dependencies = [ [[package]] name = "cts-common" version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938da7d14d05c2769bf7ae33c5a395eb6a34ffdd25ec286e97702ae563314f9b" dependencies = [ "arrayvec", "axum", diff --git a/packages/eql-mapper/src/inference/infer_type_impls/function.rs b/packages/eql-mapper/src/inference/infer_type_impls/function.rs index 585d6b7e..a9e6d98f 100644 --- a/packages/eql-mapper/src/inference/infer_type_impls/function.rs +++ b/packages/eql-mapper/src/inference/infer_type_impls/function.rs @@ -2,7 +2,7 @@ use eql_mapper_macros::trace_infer; use sqltk::parser::ast::{Function, FunctionArguments}; use crate::{ - get_type_signature_for_special_cased_sql_function, inference::infer_type::InferType, + get_sql_function_def, inference::infer_type::InferType, CompoundIdent, FunctionSig, TypeError, TypeInferencer, }; @@ -23,9 +23,9 @@ impl<'ast> InferType<'ast, Function> for TypeInferencer<'ast> { let Function { name, args, .. } = function; let fn_name = CompoundIdent::from(&name.0); - match get_type_signature_for_special_cased_sql_function(&fn_name, args) { - Some(sig) => { - sig.instantiate(&*self).apply_constraints(self, function)?; + match get_sql_function_def(&fn_name, args) { + Some(sql_fn) => { + sql_fn.sig.instantiate(&*self).apply_constraints(self, function)?; } None => { FunctionSig::instantiate_native(function).apply_constraints(self, function)?; diff --git a/packages/eql-mapper/src/inference/sql_fn_macros.rs b/packages/eql-mapper/src/inference/sql_fn_macros.rs index 4fcd9bc7..9c5c89ef 100644 --- a/packages/eql-mapper/src/inference/sql_fn_macros.rs +++ b/packages/eql-mapper/src/inference/sql_fn_macros.rs @@ -21,10 +21,35 @@ macro_rules! sql_fn_args { #[macro_export] macro_rules! sql_fn { + ($name:ident $args:tt -> $return_kind:ident, rewrite) => { + $crate::SqlFunction::new( + stringify!($name), + FunctionSig::new($crate::sql_fn_args!($args), $crate::to_kind!($return_kind)), + $crate::RewriteRule::AsEqlFunction, + ) + }; + ($name:ident $args:tt -> $return_kind:ident) => { $crate::SqlFunction::new( stringify!($name), FunctionSig::new($crate::sql_fn_args!($args), $crate::to_kind!($return_kind)), + $crate::RewriteRule::Ignore, ) }; -} + + ($schema:ident . $name:ident $args:tt -> $return_kind:ident, rewrite) => { + $crate::SqlFunction::new( + stringify!($schema . $name), + FunctionSig::new($crate::sql_fn_args!($args), $crate::to_kind!($return_kind)), + $crate::RewriteRule::AsEqlFunction, + ) + }; + + ($schema:ident . $name:ident $args:tt -> $return_kind:ident) => { + $crate::SqlFunction::new( + stringify!($schema . $name), + FunctionSig::new($crate::sql_fn_args!($args), $crate::to_kind!($return_kind)), + $crate::RewriteRule::Ignore, + ) + }; +} \ No newline at end of file diff --git a/packages/eql-mapper/src/inference/sql_functions.rs b/packages/eql-mapper/src/inference/sql_functions.rs index 7a2d42d4..020a8d07 100644 --- a/packages/eql-mapper/src/inference/sql_functions.rs +++ b/packages/eql-mapper/src/inference/sql_functions.rs @@ -17,12 +17,22 @@ use super::TypeError; /// /// See [`SQL_FUNCTION_SIGNATURES`]. #[derive(Debug)] -pub(crate) struct SqlFunction(CompoundIdent, FunctionSig); +pub(crate) struct SqlFunction { + pub(crate) name: CompoundIdent, + pub(crate) sig: FunctionSig, + pub(crate) rewrite_rule: RewriteRule, +} + +#[derive(Debug)] +pub(crate) enum RewriteRule { + Ignore, + AsEqlFunction, +} /// A representation of the type of an argument or return type in a SQL function. #[derive(Debug, Clone, Copy, Eq, PartialEq)] pub(crate) enum Kind { - /// A type that mjust be a native type + /// A type that must be a native type Native, /// A type that can be a native or EQL type. The `str` is the generic variable name. @@ -178,8 +188,12 @@ fn get_function_arg_expr(fn_arg: &FunctionArg) -> &FunctionArgExpr { } impl SqlFunction { - fn new(ident: &str, sig: FunctionSig) -> Self { - Self(CompoundIdent::from(ident), sig) + fn new(ident: &str, sig: FunctionSig, rewrite_rule: RewriteRule) -> Self { + Self { + name: CompoundIdent::from(ident), + sig, + rewrite_rule, + } } } @@ -202,38 +216,51 @@ impl From<&Vec> for CompoundIdent { } /// SQL functions that are handled with special case type checking rules. -static SQL_FUNCTION_SIGNATURES: LazyLock>> = - LazyLock::new(|| { - // Notation: a single uppercase letter denotes an unknown type. Matching letters in a signature will be assigned - // *the same type variable* and thus must resolve to the same type. (🙏 Haskell) - // - // Eventually we should type check EQL types against their configured indexes instead of leaving that to the EQL - // extension in the database. I can imagine supporting type bounds in signatures here, such as: `T: Eq` - let sql_fns = vec![ - sql_fn!(count(T) -> NATIVE), - sql_fn!(min(T) -> T), - sql_fn!(max(T) -> T), - sql_fn!(jsonb_path_query(T, T) -> T), - sql_fn!(jsonb_path_query_first(T, T) -> T), - sql_fn!(jsonb_path_exists(T, T) -> T), - ]; - - let mut sql_fns_by_name: HashMap> = HashMap::new(); - - for (key, chunk) in &sql_fns.into_iter().chunk_by(|sql_fn| sql_fn.0.clone()) { - sql_fns_by_name.insert( - key.clone(), - chunk.into_iter().map(|sql_fn| sql_fn.1).collect(), - ); - } +static SQL_FUNCTIONS: LazyLock>> = LazyLock::new(|| { + // Notation: a single uppercase letter denotes an unknown type. Matching letters in a signature will be assigned + // *the same type variable* and thus must resolve to the same type. (🙏 Haskell) + // + // Eventually we should type check EQL types against their configured indexes instead of leaving that to the EQL + // extension in the database. I can imagine supporting type bounds in signatures here, such as: `T: Eq` + let sql_fns = vec![ + // TODO: when search_path support is added to the resolver we should change these + // to their fully-qualified names. + sql_fn!(count(T) -> NATIVE), + sql_fn!(min(T) -> T, rewrite), + sql_fn!(max(T) -> T, rewrite), + sql_fn!(jsonb_path_query(T, T) -> T, rewrite), + sql_fn!(jsonb_path_query_first(T, T) -> T, rewrite), + sql_fn!(jsonb_path_exists(T, T) -> T, rewrite), + sql_fn!(jsonb_array_length(T) -> T, rewrite), + sql_fn!(jsonb_array_elements(T) -> T, rewrite), + sql_fn!(jsonb_array_elements_text(T) -> T, rewrite), + // These are typings for when customer SQL already contains references to EQL functions. + // They must be type checked but not rewritten. + sql_fn!(eql_v1.min(T) -> T), + sql_fn!(eql_v1.max(T) -> T), + sql_fn!(eql_v1.jsonb_path_query(T, T) -> T), + sql_fn!(eql_v1.jsonb_path_query_first(T, T) -> T), + sql_fn!(eql_v1.jsonb_path_exists(T, T) -> T), + sql_fn!(eql_v1.jsonb_array_length(T) -> T), + sql_fn!(eql_v1.jsonb_array_elements(T) -> T), + sql_fn!(eql_v1.jsonb_array_elements_text(T) -> T), + ]; + + let mut sql_fns_by_name: HashMap> = HashMap::new(); + + for (key, chunk) in &sql_fns.into_iter().chunk_by(|sql_fn| sql_fn.name.clone()) { + sql_fns_by_name.insert(key.clone(), chunk.into_iter().collect()); + } - sql_fns_by_name - }); + sql_fns_by_name +}); -pub(crate) fn get_type_signature_for_special_cased_sql_function( +pub(crate) fn get_sql_function_def( fn_name: &CompoundIdent, args: &FunctionArguments, -) -> Option<&'static FunctionSig> { - let sigs = SQL_FUNCTION_SIGNATURES.get(fn_name)?; - sigs.iter().find(|sig| sig.is_applicable_to_args(args)) +) -> Option<&'static SqlFunction> { + let sql_fns = SQL_FUNCTIONS.get(fn_name)?; + sql_fns + .iter() + .find(|sql_fn| sql_fn.sig.is_applicable_to_args(args)) } diff --git a/packages/eql-mapper/src/lib.rs b/packages/eql-mapper/src/lib.rs index 1867c2bb..a44dc44a 100644 --- a/packages/eql-mapper/src/lib.rs +++ b/packages/eql-mapper/src/lib.rs @@ -1352,7 +1352,7 @@ mod test { match typed.transform(HashMap::new()) { Ok(statement) => assert_eq!( statement.to_string(), - "SELECT CS_GROUPED_VALUE_V1(email) AS email FROM users GROUP BY CS_ORE_64_8_V1(email)".to_string() + "SELECT eql_v1.cs_grouped_value(email) AS email FROM users GROUP BY eql_v1.ore_64_8_v1(email)".to_string() ), Err(err) => panic!("transformation failed: {err}"), } @@ -1382,7 +1382,7 @@ mod test { match typed.transform(HashMap::new()) { Ok(statement) => assert_eq!( statement.to_string(), - "SELECT CS_MIN_V1(salary) AS min, CS_MAX_V1(salary) AS max, department FROM employees GROUP BY department".to_string() + "SELECT eql_v1.min(salary), eql_v1.max(salary), department FROM employees GROUP BY department".to_string() ), Err(err) => panic!("transformation failed: {err}"), } @@ -1391,6 +1391,42 @@ mod test { } } + #[test] + fn rewrite_standard_sql_fns_on_eql_types() { + // init_tracing(); + let schema = resolver(schema! { + tables: { + employees: { + id (PK), + eql_col (EQL), + native_col, + } + } + }); + + let statement = parse(" + SELECT jsonb_path_query(eql_col, '$.secret'), jsonb_path_query(native_col, '$.not-secret') FROM employees + "); + + match type_check(schema, &statement) { + Ok(typed) => { + match typed.transform(test_helpers::dummy_encrypted_json_selector( + &statement, + ast::Value::SingleQuotedString("$.secret".into()), + )) { + Ok(statement) => { + assert_eq!( + statement.to_string(), + "SELECT eql_v1.jsonb_path_query(eql_col, ROW(''::JSONB)), jsonb_path_query(native_col, '$.not-secret') FROM employees" + ); + } + Err(err) => panic!("transformation failed: {err}"), + } + } + Err(err) => panic!("type check failed: {err}"), + } + } + #[test] fn supports_named_arrays() { let schema = resolver(schema! { @@ -1519,11 +1555,12 @@ mod test { match type_check(schema, &statement) { Ok(typed) => match typed.transform(encrypted_literals) { Ok(statement) => { + let rewritten_fn_name = format!("eql_v1.{fn_name}"); assert_eq!( statement.to_string(), format!( "SELECT id, {}({}) AS meds FROM patients", - fn_name, args_encrypted + rewritten_fn_name, args_encrypted ) ) } diff --git a/packages/eql-mapper/src/test_helpers.rs b/packages/eql-mapper/src/test_helpers.rs index 1d9285b2..61c225e2 100644 --- a/packages/eql-mapper/src/test_helpers.rs +++ b/packages/eql-mapper/src/test_helpers.rs @@ -93,6 +93,7 @@ pub(crate) fn find_nodekey_for_value_node( visitor.found } + #[macro_export] macro_rules! col { ((NATIVE)) => { diff --git a/packages/eql-mapper/src/transformation_rules/group_by_eql_col.rs b/packages/eql-mapper/src/transformation_rules/group_by_eql_col.rs index 10840a44..6db7c0b4 100644 --- a/packages/eql-mapper/src/transformation_rules/group_by_eql_col.rs +++ b/packages/eql-mapper/src/transformation_rules/group_by_eql_col.rs @@ -40,7 +40,7 @@ impl<'ast> TransformationRule<'ast> for GroupByEqlCol<'ast> { *target_node = helpers::wrap_in_1_arg_function( transformed_expr, - ObjectName(vec![Ident::new("CS_ORE_64_8_V1")]), + ObjectName(vec![Ident::new("eql_v1"), Ident::new("ore_64_8_v1")]), ); return Ok(true); diff --git a/packages/eql-mapper/src/transformation_rules/mod.rs b/packages/eql-mapper/src/transformation_rules/mod.rs index b735a4fa..e4479902 100644 --- a/packages/eql-mapper/src/transformation_rules/mod.rs +++ b/packages/eql-mapper/src/transformation_rules/mod.rs @@ -15,7 +15,7 @@ mod fail_on_placeholder_change; mod group_by_eql_col; mod preserve_effective_aliases; mod replace_plaintext_eql_literals; -mod use_equivalent_eql_fns_on_eql_types; +mod rewrite_standard_sql_fns_on_eql_types; mod wrap_eql_cols_in_order_by_with_ore_fn; mod wrap_grouped_eql_col_in_aggregate_fn; @@ -24,8 +24,8 @@ use std::marker::PhantomData; pub(crate) use fail_on_placeholder_change::*; pub(crate) use group_by_eql_col::*; pub(crate) use preserve_effective_aliases::*; +pub(crate) use rewrite_standard_sql_fns_on_eql_types::*; pub(crate) use replace_plaintext_eql_literals::*; -pub(crate) use use_equivalent_eql_fns_on_eql_types::*; pub(crate) use wrap_eql_cols_in_order_by_with_ore_fn::*; pub(crate) use wrap_grouped_eql_col_in_aggregate_fn::*; diff --git a/packages/eql-mapper/src/transformation_rules/rewrite_standard_sql_fns_on_eql_types.rs b/packages/eql-mapper/src/transformation_rules/rewrite_standard_sql_fns_on_eql_types.rs new file mode 100644 index 00000000..cca69574 --- /dev/null +++ b/packages/eql-mapper/src/transformation_rules/rewrite_standard_sql_fns_on_eql_types.rs @@ -0,0 +1,75 @@ +use std::mem; +use std::{collections::HashMap, sync::Arc}; + +use sqltk::parser::ast::{Expr, Function, Ident, ObjectName}; +use sqltk::{AsNodeKey, NodeKey, NodePath, Visitable}; + +use crate::{ + get_sql_function_def, CompoundIdent, EqlMapperError, RewriteRule, SqlFunction, Type, Value, +}; + +use super::TransformationRule; + +#[derive(Debug)] +pub struct RewriteStandardSqlFnsOnEqlTypes<'ast> { + node_types: Arc, Type>>, +} + +impl<'ast> RewriteStandardSqlFnsOnEqlTypes<'ast> { + pub fn new(node_types: Arc, Type>>) -> Self { + Self { node_types } + } +} + +impl<'ast> TransformationRule<'ast> for RewriteStandardSqlFnsOnEqlTypes<'ast> { + fn apply( + &mut self, + node_path: &NodePath<'ast>, + target_node: &mut N, + ) -> Result { + if self.would_edit(node_path, target_node) { + if let Some((_expr, function)) = node_path.last_2_as::() { + if matches!( + self.node_types.get(&function.as_node_key()), + Some(Type::Value(Value::Eql(_))) + ) { + let function_name = CompoundIdent::from(&function.name.0); + + if let Some(SqlFunction { + rewrite_rule: RewriteRule::AsEqlFunction, + .. + }) = get_sql_function_def(&function_name, &function.args) + { + let function = target_node.downcast_mut::().unwrap(); + let mut existing_name = mem::take(&mut function.name.0); + existing_name.insert(0, Ident::new("eql_v1")); + function.name = ObjectName(existing_name); + } + } + } + } + + Ok(false) + } + + fn would_edit(&mut self, node_path: &NodePath<'ast>, _target_node: &N) -> bool { + if let Some((_expr, function)) = node_path.last_2_as::() { + if matches!( + self.node_types.get(&function.as_node_key()), + Some(Type::Value(Value::Eql(_))) + ) { + let function_name = CompoundIdent::from(&function.name.0); + + if let Some(SqlFunction { + rewrite_rule: RewriteRule::AsEqlFunction, + .. + }) = get_sql_function_def(&function_name, &function.args) + { + return true; + } + } + } + + false + } +} diff --git a/packages/eql-mapper/src/transformation_rules/use_equivalent_eql_fns_on_eql_types.rs b/packages/eql-mapper/src/transformation_rules/use_equivalent_eql_fns_on_eql_types.rs deleted file mode 100644 index 6de21b74..00000000 --- a/packages/eql-mapper/src/transformation_rules/use_equivalent_eql_fns_on_eql_types.rs +++ /dev/null @@ -1,73 +0,0 @@ -use std::{collections::HashMap, sync::Arc}; - -use sqltk::parser::ast::{Expr, Function, Ident, Select, SelectItem}; -use sqltk::{NodeKey, NodePath, Visitable}; - -use crate::{EqlMapperError, SqlIdent, Type}; - -use super::{helpers, TransformationRule}; - -#[derive(Debug)] -pub struct UseEquivalentSqlFuncForEqlTypes<'ast> { - node_types: Arc, Type>>, -} - -impl<'ast> UseEquivalentSqlFuncForEqlTypes<'ast> { - pub fn new(node_types: Arc, Type>>) -> Self { - Self { node_types } - } -} - -impl<'ast> TransformationRule<'ast> for UseEquivalentSqlFuncForEqlTypes<'ast> { - fn apply( - &mut self, - node_path: &NodePath<'ast>, - target_node: &mut N, - ) -> Result { - if self.would_edit(node_path, target_node) { - if let Some((_select, _select_items, _select_item, _expr)) = - node_path.last_4_as::, SelectItem, Expr>() - { - let target_node = target_node.downcast_mut::().unwrap(); - if let Expr::Function(Function { name, .. }) = target_node { - let f_name = name.0.last_mut().unwrap(); - - if SqlIdent(&*f_name) == SqlIdent(Ident::new("MIN")) { - *f_name = Ident::new("CS_MIN_V1"); - } - - if SqlIdent(&*f_name) == SqlIdent(Ident::new("MAX")) { - *f_name = Ident::new("CS_MAX_V1"); - } - - return Ok(true); - } - } - } - - Ok(false) - } - - fn would_edit(&mut self, node_path: &NodePath<'ast>, target_node: &N) -> bool { - if let Some((select, _select_items, _select_item, expr)) = - node_path.last_4_as::, SelectItem, Expr>() - { - if !helpers::is_used_in_group_by_clause(&self.node_types, &select.group_by, expr) { - let target_node = target_node.downcast_ref::().unwrap(); - if let Expr::Function(Function { name, .. }) = target_node { - let f_name = name.0.last().unwrap(); - - if SqlIdent(f_name) == SqlIdent(Ident::new("MIN")) { - return true; - } - - if SqlIdent(f_name) == SqlIdent(Ident::new("MAX")) { - return true; - } - } - } - } - - false - } -} diff --git a/packages/eql-mapper/src/transformation_rules/wrap_eql_cols_in_order_by_with_ore_fn.rs b/packages/eql-mapper/src/transformation_rules/wrap_eql_cols_in_order_by_with_ore_fn.rs index 17078487..d356bba5 100644 --- a/packages/eql-mapper/src/transformation_rules/wrap_eql_cols_in_order_by_with_ore_fn.rs +++ b/packages/eql-mapper/src/transformation_rules/wrap_eql_cols_in_order_by_with_ore_fn.rs @@ -11,7 +11,7 @@ use crate::{EqlMapperError, Type, Value}; use super::{helpers::wrap_in_1_arg_function, TransformationRule}; /// When an [`Expr`] of a [`SelectItem`] has an EQL type and that EQL type is used in a `GROUP BY` clause then -/// this rule wraps the `Expr` in a call to `CS_GROUPED_VALUE_V1`. +/// this rule wraps the `Expr` in a call to `eql_v1.grouped_value`. /// /// # Example /// @@ -20,11 +20,11 @@ use super::{helpers::wrap_in_1_arg_function, TransformationRule}; /// SELECT eql_col FROM some_table GROUP BY eql_col; /// /// -- after mapping -/// SELECT CS_GROUPED_VALUE_V1(eql_col) FROM some_table GROUP BY CS_ORE_64_8_V1(eql_col); -/// -- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^ -/// -- ^ ^ -/// -- | | -/// -- Changed by this rule Changed by rule `GroupByEqlCol` +/// SELECT eql_v1.grouped_value(eql_col) AS eql_col FROM some_table GROUP BY eql_v1.cs_ore_64_8(eql_col); +/// -- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^ +/// -- ^ ^ ^ +/// -- | | | +/// -- Changed by this rule Preserve effective aliases Changed by rule GroupByEqlCol /// ``` #[derive(Debug)] pub struct WrapEqlColsInOrderByWithOreFn<'ast> { @@ -54,7 +54,7 @@ impl<'ast> TransformationRule<'ast> for WrapEqlColsInOrderByWithOreFn<'ast> { target_node.expr = wrap_in_1_arg_function( expr_to_wrap, - ObjectName(vec![Ident::new("CS_ORE_64_8_V1")]), + ObjectName(vec![Ident::new("eql_v1"), Ident::new("ore_64_8_v1")]), ); return Ok(true); diff --git a/packages/eql-mapper/src/transformation_rules/wrap_grouped_eql_col_in_aggregate_fn.rs b/packages/eql-mapper/src/transformation_rules/wrap_grouped_eql_col_in_aggregate_fn.rs index 22d4aeb6..5b5f8e0f 100644 --- a/packages/eql-mapper/src/transformation_rules/wrap_grouped_eql_col_in_aggregate_fn.rs +++ b/packages/eql-mapper/src/transformation_rules/wrap_grouped_eql_col_in_aggregate_fn.rs @@ -20,11 +20,11 @@ use super::{ /// SELECT eql_col FROM some_table GROUP BY eql_col; /// /// -- after mapping -/// SELECT CS_GROUPED_VALUE_V1(eql_col) FROM some_table GROUP BY CS_ORE_64_8_V1(eql_col); -/// -- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^ -/// -- ^ ^ -/// -- | | -/// -- Changed by this rule Changed by rule `GroupByEqlCol` +/// SELECT eql_v1.grouped_value(eql_col) AS eql_col FROM some_table GROUP BY eql_v1.ore_64_8_v1(eql_col); +/// -- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^ +/// -- ^ ^ ^ +/// -- | | | +/// -- Changed by this rule PreserveEffectiveAliases GroupByEqlCol /// ``` #[derive(Debug)] pub struct WrapGroupedEqlColInAggregateFn<'ast> { @@ -50,7 +50,7 @@ impl<'ast> TransformationRule<'ast> for WrapGroupedEqlColInAggregateFn<'ast> { let target_node: &mut Expr = target_node.downcast_mut().unwrap(); *target_node = wrap_in_1_arg_function( expr.clone(), - ObjectName(vec![Ident::new("CS_GROUPED_VALUE_V1")]), + ObjectName(vec![Ident::new("eql_v1"), Ident::new("cs_grouped_value")]), ); return Ok(true); diff --git a/packages/eql-mapper/src/type_checked_statement.rs b/packages/eql-mapper/src/type_checked_statement.rs index 43012b7d..be801587 100644 --- a/packages/eql-mapper/src/type_checked_statement.rs +++ b/packages/eql-mapper/src/type_checked_statement.rs @@ -5,9 +5,9 @@ use sqltk::{AsNodeKey, NodeKey, Transformable}; use crate::{ DryRunnable, EqlMapperError, EqlValue, FailOnPlaceholderChange, GroupByEqlCol, Param, - PreserveEffectiveAliases, Projection, ReplacePlaintextEqlLiterals, TransformationRule, Type, - UseEquivalentSqlFuncForEqlTypes, Value, WrapEqlColsInOrderByWithOreFn, - WrapGroupedEqlColInAggregateFn, + PreserveEffectiveAliases, Projection, ReplacePlaintextEqlLiterals, + RewriteStandardSqlFnsOnEqlTypes, TransformationRule, Type, Value, + WrapEqlColsInOrderByWithOreFn, WrapGroupedEqlColInAggregateFn, }; /// A `TypeCheckedStatement` is returned from a successful call to [`crate::type_check`]. @@ -140,12 +140,12 @@ impl<'ast> TypeCheckedStatement<'ast> { encrypted_literals: HashMap, sqltk::parser::ast::Value>, ) -> DryRunnable> { DryRunnable::new(( + RewriteStandardSqlFnsOnEqlTypes::new(Arc::clone(&self.node_types)), WrapGroupedEqlColInAggregateFn::new(Arc::clone(&self.node_types)), GroupByEqlCol::new(Arc::clone(&self.node_types)), WrapEqlColsInOrderByWithOreFn::new(Arc::clone(&self.node_types)), PreserveEffectiveAliases, ReplacePlaintextEqlLiterals::new(encrypted_literals), - UseEquivalentSqlFuncForEqlTypes::new(Arc::clone(&self.node_types)), FailOnPlaceholderChange::new(), )) } From d963ad170e9e1175673a10f83d2037bb1e33e4f0 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Thu, 8 May 2025 23:14:02 +1000 Subject: [PATCH 21/50] chore: remove unused attrs --- packages/eql-mapper/src/inference/infer_type_impls/expr.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/eql-mapper/src/inference/infer_type_impls/expr.rs b/packages/eql-mapper/src/inference/infer_type_impls/expr.rs index 9f888483..51c03c12 100644 --- a/packages/eql-mapper/src/inference/infer_type_impls/expr.rs +++ b/packages/eql-mapper/src/inference/infer_type_impls/expr.rs @@ -24,12 +24,10 @@ impl<'ast> InferType<'ast, Expr> for TypeInferencer<'ast> { self.unify_node_with_type(this_expr, self.resolve_compound_ident(idents)?)?; } - #[allow(unused_variables)] Expr::Wildcard(_) => { self.unify_node_with_type(this_expr, self.resolve_wildcard()?)?; } - #[allow(unused_variables)] Expr::QualifiedWildcard(object_name, _) => { self.unify_node_with_type( this_expr, From 1cbd8c522aab19a362c756af4e473ffe473feb1c Mon Sep 17 00:00:00 2001 From: James Sadler Date: Thu, 8 May 2025 23:56:38 +1000 Subject: [PATCH 22/50] chore: formatting --- .../src/inference/infer_type_impls/function.rs | 9 ++++++--- packages/eql-mapper/src/inference/sql_fn_macros.rs | 6 +++--- packages/eql-mapper/src/transformation_rules/mod.rs | 2 +- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/packages/eql-mapper/src/inference/infer_type_impls/function.rs b/packages/eql-mapper/src/inference/infer_type_impls/function.rs index a9e6d98f..515bcc83 100644 --- a/packages/eql-mapper/src/inference/infer_type_impls/function.rs +++ b/packages/eql-mapper/src/inference/infer_type_impls/function.rs @@ -2,8 +2,8 @@ use eql_mapper_macros::trace_infer; use sqltk::parser::ast::{Function, FunctionArguments}; use crate::{ - get_sql_function_def, inference::infer_type::InferType, - CompoundIdent, FunctionSig, TypeError, TypeInferencer, + get_sql_function_def, inference::infer_type::InferType, CompoundIdent, FunctionSig, TypeError, + TypeInferencer, }; /// Looks up the function signature. @@ -25,7 +25,10 @@ impl<'ast> InferType<'ast, Function> for TypeInferencer<'ast> { match get_sql_function_def(&fn_name, args) { Some(sql_fn) => { - sql_fn.sig.instantiate(&*self).apply_constraints(self, function)?; + sql_fn + .sig + .instantiate(&*self) + .apply_constraints(self, function)?; } None => { FunctionSig::instantiate_native(function).apply_constraints(self, function)?; diff --git a/packages/eql-mapper/src/inference/sql_fn_macros.rs b/packages/eql-mapper/src/inference/sql_fn_macros.rs index 9c5c89ef..d0acec86 100644 --- a/packages/eql-mapper/src/inference/sql_fn_macros.rs +++ b/packages/eql-mapper/src/inference/sql_fn_macros.rs @@ -39,7 +39,7 @@ macro_rules! sql_fn { ($schema:ident . $name:ident $args:tt -> $return_kind:ident, rewrite) => { $crate::SqlFunction::new( - stringify!($schema . $name), + stringify!($schema.$name), FunctionSig::new($crate::sql_fn_args!($args), $crate::to_kind!($return_kind)), $crate::RewriteRule::AsEqlFunction, ) @@ -47,9 +47,9 @@ macro_rules! sql_fn { ($schema:ident . $name:ident $args:tt -> $return_kind:ident) => { $crate::SqlFunction::new( - stringify!($schema . $name), + stringify!($schema.$name), FunctionSig::new($crate::sql_fn_args!($args), $crate::to_kind!($return_kind)), $crate::RewriteRule::Ignore, ) }; -} \ No newline at end of file +} diff --git a/packages/eql-mapper/src/transformation_rules/mod.rs b/packages/eql-mapper/src/transformation_rules/mod.rs index e4479902..11cc7091 100644 --- a/packages/eql-mapper/src/transformation_rules/mod.rs +++ b/packages/eql-mapper/src/transformation_rules/mod.rs @@ -24,8 +24,8 @@ use std::marker::PhantomData; pub(crate) use fail_on_placeholder_change::*; pub(crate) use group_by_eql_col::*; pub(crate) use preserve_effective_aliases::*; -pub(crate) use rewrite_standard_sql_fns_on_eql_types::*; pub(crate) use replace_plaintext_eql_literals::*; +pub(crate) use rewrite_standard_sql_fns_on_eql_types::*; pub(crate) use wrap_eql_cols_in_order_by_with_ore_fn::*; pub(crate) use wrap_grouped_eql_col_in_aggregate_fn::*; From fe86008ed799d7bf6838dbaf4a25f459e372fc78 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Mon, 12 May 2025 12:32:30 +1000 Subject: [PATCH 23/50] fix(mapper): wrap placeholders for EQL types in `ROW(..)` --- packages/eql-mapper/src/lib.rs | 33 ++++++++++++ .../src/transformation_rules/helpers.rs | 26 ++++++++- .../src/transformation_rules/mod.rs | 2 + .../wrap_eql_params_in_row.rs | 54 +++++++++++++++++++ .../eql-mapper/src/type_checked_statement.rs | 3 +- 5 files changed, 115 insertions(+), 3 deletions(-) create mode 100644 packages/eql-mapper/src/transformation_rules/wrap_eql_params_in_row.rs diff --git a/packages/eql-mapper/src/lib.rs b/packages/eql-mapper/src/lib.rs index a44dc44a..e9988c4b 100644 --- a/packages/eql-mapper/src/lib.rs +++ b/packages/eql-mapper/src/lib.rs @@ -1391,6 +1391,39 @@ mod test { } } + #[test] + fn eql_params_are_wrapped_in_row() { + // init_tracing(); + let schema = resolver(schema! { + tables: { + employees: { + id (PK), + eql_col (EQL), + native_col, + } + } + }); + + let statement = parse( + " + SELECT * FROM employees WHERE eql_col = $1 AND native_col = $2; + ", + ); + + match type_check(schema, &statement) { + Ok(typed) => match typed.transform(HashMap::new()) { + Ok(statement) => { + assert_eq!( + statement.to_string(), + "SELECT * FROM employees WHERE eql_col = ROW($1::JSONB) AND native_col = $2" + ); + } + Err(err) => panic!("transformation failed: {err}"), + }, + Err(err) => panic!("type check failed: {err}"), + } + } + #[test] fn rewrite_standard_sql_fns_on_eql_types() { // init_tracing(); diff --git a/packages/eql-mapper/src/transformation_rules/helpers.rs b/packages/eql-mapper/src/transformation_rules/helpers.rs index d665db57..675e9a2d 100644 --- a/packages/eql-mapper/src/transformation_rules/helpers.rs +++ b/packages/eql-mapper/src/transformation_rules/helpers.rs @@ -1,8 +1,8 @@ use std::{collections::HashMap, convert::Infallible, ops::ControlFlow}; use sqltk::parser::ast::{ - Expr, Function, FunctionArg, FunctionArgExpr, FunctionArgumentList, FunctionArguments, - GroupByExpr, ObjectName, + CastKind, DataType, Expr, Function, FunctionArg, FunctionArgExpr, FunctionArgumentList, + FunctionArguments, GroupByExpr, Ident, ObjectName, }; use sqltk::{AsNodeKey, Break, NodeKey, Visitable, Visitor}; @@ -48,6 +48,28 @@ pub(crate) fn wrap_in_1_arg_function(expr: Expr, name: ObjectName) -> Expr { }) } +pub(crate) fn make_row_expression(wrapped: sqltk::parser::ast::Value) -> Expr { + Expr::Function(Function { + name: ObjectName(vec![Ident::new("ROW")]), + uses_odbc_syntax: false, + parameters: FunctionArguments::None, + args: FunctionArguments::List(FunctionArgumentList { + duplicate_treatment: None, + clauses: vec![], + args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(Expr::Value(wrapped)), + data_type: DataType::JSONB, + format: None, + }))], + }), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + }) +} + struct ContainsExprWithType<'ast, 't> { node_types: &'t HashMap, Type>, ty: &'t Type, diff --git a/packages/eql-mapper/src/transformation_rules/mod.rs b/packages/eql-mapper/src/transformation_rules/mod.rs index 11cc7091..7054e707 100644 --- a/packages/eql-mapper/src/transformation_rules/mod.rs +++ b/packages/eql-mapper/src/transformation_rules/mod.rs @@ -17,6 +17,7 @@ mod preserve_effective_aliases; mod replace_plaintext_eql_literals; mod rewrite_standard_sql_fns_on_eql_types; mod wrap_eql_cols_in_order_by_with_ore_fn; +mod wrap_eql_params_in_row; mod wrap_grouped_eql_col_in_aggregate_fn; use std::marker::PhantomData; @@ -27,6 +28,7 @@ pub(crate) use preserve_effective_aliases::*; pub(crate) use replace_plaintext_eql_literals::*; pub(crate) use rewrite_standard_sql_fns_on_eql_types::*; pub(crate) use wrap_eql_cols_in_order_by_with_ore_fn::*; +pub(crate) use wrap_eql_params_in_row::*; pub(crate) use wrap_grouped_eql_col_in_aggregate_fn::*; use crate::EqlMapperError; diff --git a/packages/eql-mapper/src/transformation_rules/wrap_eql_params_in_row.rs b/packages/eql-mapper/src/transformation_rules/wrap_eql_params_in_row.rs new file mode 100644 index 00000000..514ddf8c --- /dev/null +++ b/packages/eql-mapper/src/transformation_rules/wrap_eql_params_in_row.rs @@ -0,0 +1,54 @@ +use std::collections::HashMap; +use std::sync::Arc; + +use sqltk::parser::ast::{Expr, Value}; +use sqltk::{NodeKey, NodePath, Visitable}; + +use crate::{EqlMapperError, Type}; + +use super::helpers::make_row_expression; +use super::TransformationRule; + +#[derive(Debug)] +pub struct WrapEqlParamsInRow<'ast> { + node_types: Arc, Type>>, +} + +impl<'ast> WrapEqlParamsInRow<'ast> { + pub fn new(node_types: Arc, Type>>) -> Self { + Self { node_types } + } +} + +impl<'ast> TransformationRule<'ast> for WrapEqlParamsInRow<'ast> { + fn apply( + &mut self, + node_path: &NodePath<'ast>, + target_node: &mut N, + ) -> Result { + if self.would_edit(node_path, target_node) { + if let Some(expr @ Expr::Value(Value::Placeholder(_))) = target_node.downcast_mut() { + let to_wrap = std::mem::replace(expr, Expr::Value(Value::Null)); + let Expr::Value(value @ Value::Placeholder(_)) = to_wrap else { + unreachable!("the Expr is known to be Expr::Value(Value::Placeholder(_))") + }; + + *expr = make_row_expression(value); + return Ok(true); + } + } + + Ok(false) + } + + fn would_edit(&mut self, node_path: &NodePath<'ast>, _target_node: &N) -> bool { + if let Some((node @ Expr::Value(Value::Placeholder(_)),)) = node_path.last_1_as() { + if let Some(Type::Value(crate::Value::Eql(_))) = + self.node_types.get(&NodeKey::new(node)) + { + return true; + } + } + false + } +} diff --git a/packages/eql-mapper/src/type_checked_statement.rs b/packages/eql-mapper/src/type_checked_statement.rs index be801587..8c0be314 100644 --- a/packages/eql-mapper/src/type_checked_statement.rs +++ b/packages/eql-mapper/src/type_checked_statement.rs @@ -7,7 +7,7 @@ use crate::{ DryRunnable, EqlMapperError, EqlValue, FailOnPlaceholderChange, GroupByEqlCol, Param, PreserveEffectiveAliases, Projection, ReplacePlaintextEqlLiterals, RewriteStandardSqlFnsOnEqlTypes, TransformationRule, Type, Value, - WrapEqlColsInOrderByWithOreFn, WrapGroupedEqlColInAggregateFn, + WrapEqlColsInOrderByWithOreFn, WrapEqlParamsInRow, WrapGroupedEqlColInAggregateFn, }; /// A `TypeCheckedStatement` is returned from a successful call to [`crate::type_check`]. @@ -147,6 +147,7 @@ impl<'ast> TypeCheckedStatement<'ast> { PreserveEffectiveAliases, ReplacePlaintextEqlLiterals::new(encrypted_literals), FailOnPlaceholderChange::new(), + WrapEqlParamsInRow::new(Arc::clone(&self.node_types)), )) } } From 0432a2adf2804c17a34a11a58b01e43b10cb40b8 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Mon, 12 May 2025 15:42:29 +1000 Subject: [PATCH 24/50] chore(proxy): remove cipherstash-config dependency We have known that config was a mess for a while. The straw that broke the camel's back was attempting to use a local cipherstash-client override which had the knock on effect of pulling in two copies of cipherstash-config. --- Cargo.lock | 70 +++++++++++++------ packages/cipherstash-proxy/Cargo.toml | 1 - .../src/encrypt/config/encrypt_config.rs | 2 +- .../src/encrypt/config/manager.rs | 2 +- packages/cipherstash-proxy/src/encrypt/mod.rs | 11 ++- packages/cipherstash-proxy/src/error.rs | 8 +-- .../src/postgresql/context/column.rs | 2 +- .../src/postgresql/data/from_sql.rs | 9 +-- .../src/postgresql/messages/parse.rs | 2 +- 9 files changed, 67 insertions(+), 40 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cae63529..afc01fba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -194,7 +194,7 @@ dependencies = [ "asn1-rs-derive", "asn1-rs-impl", "displaydoc", - "nom", + "nom 7.1.3", "num-traits", "rusticata-macros", "thiserror 2.0.12", @@ -599,7 +599,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" dependencies = [ - "nom", + "nom 7.1.3", ] [[package]] @@ -651,8 +651,6 @@ dependencies = [ [[package]] name = "cipherstash-client" version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fe21509165da6daf50b84d4dc9bc46b558e5afb34db75dbd2371b963faabe4d" dependencies = [ "aes-gcm-siv", "anyhow", @@ -664,7 +662,7 @@ dependencies = [ "blake3", "cfg-if", "chrono", - "cipherstash-config", + "cipherstash-config 0.2.3", "cipherstash-core", "cllw-ore", "cts-common", @@ -683,7 +681,7 @@ dependencies = [ "percent-encoding", "rand 0.8.5", "rand_chacha 0.3.1", - "recipher", + "recipher 0.1.3", "reqwest", "reqwest-middleware", "reqwest-retry", @@ -708,6 +706,14 @@ dependencies = [ "zerokms-protocol", ] +[[package]] +name = "cipherstash-config" +version = "0.2.3" +dependencies = [ + "serde", + "thiserror 1.0.69", +] + [[package]] name = "cipherstash-config" version = "0.2.3" @@ -721,8 +727,6 @@ dependencies = [ [[package]] name = "cipherstash-core" version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd56dfac0a35146968ef6696fb822b22f70a664a8739874385876d5452844b7a" dependencies = [ "hmac", "lazy_static", @@ -742,7 +746,6 @@ dependencies = [ "bytes", "chrono", "cipherstash-client", - "cipherstash-config", "clap", "config", "eql-mapper", @@ -757,7 +760,7 @@ dependencies = [ "postgres-protocol", "postgres-types", "rand 0.9.0", - "recipher", + "recipher 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "regex", "ring", "rust_decimal", @@ -788,13 +791,13 @@ version = "0.1.0" dependencies = [ "chrono", "cipherstash-client", - "cipherstash-config", + "cipherstash-config 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", "cipherstash-proxy", "clap", "fake 4.2.0", "hex", "rand 0.9.0", - "recipher", + "recipher 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "rustls", "serde", "serde_json", @@ -863,8 +866,6 @@ checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "cllw-ore" version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1b01c26e11101044f85802e31d842483ef983a890c03472d9489f6969cf865a" dependencies = [ "bit-vec", "bitvec", @@ -1023,8 +1024,6 @@ dependencies = [ [[package]] name = "cts-common" version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "938da7d14d05c2769bf7ae33c5a395eb6a34ffdd25ec286e97702ae563314f9b" dependencies = [ "arrayvec", "axum", @@ -1033,6 +1032,7 @@ dependencies = [ "fake 3.1.0", "http", "miette", + "nom 8.0.0", "rand 0.8.5", "regex", "serde", @@ -1111,7 +1111,7 @@ checksum = "07da5016415d5a3c4dd39b11ed26f915f52fc4e0dc197d87908bc916e51bc1a6" dependencies = [ "asn1-rs", "displaydoc", - "nom", + "nom 7.1.3", "num-bigint", "num-traits", "rusticata-macros", @@ -2354,6 +2354,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -2955,6 +2964,25 @@ dependencies = [ "bitflags 2.9.0", ] +[[package]] +name = "recipher" +version = "0.1.3" +dependencies = [ + "aes", + "async-trait", + "cmac", + "hex", + "hex-literal", + "opaque-debug", + "rand 0.8.5", + "rand_chacha 0.3.1", + "serde", + "serde_cbor", + "sha2", + "thiserror 1.0.69", + "zeroize", +] + [[package]] name = "recipher" version = "0.1.3" @@ -3256,7 +3284,7 @@ version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632" dependencies = [ - "nom", + "nom 7.1.3", ] [[package]] @@ -4911,7 +4939,7 @@ dependencies = [ "data-encoding", "der-parser", "lazy_static", - "nom", + "nom 7.1.3", "oid-registry", "rusticata-macros", "thiserror 2.0.12", @@ -5032,12 +5060,10 @@ dependencies = [ [[package]] name = "zerokms-protocol" version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01a9d0d8103cfa862b451f2c35144301df25a233f7fae041666b890a1578c3b1" dependencies = [ "async-trait", "base64", - "cipherstash-config", + "cipherstash-config 0.2.3", "fake 2.10.0", "opaque-debug", "rand 0.8.5", diff --git a/packages/cipherstash-proxy/Cargo.toml b/packages/cipherstash-proxy/Cargo.toml index 7ee18238..957f3a1b 100644 --- a/packages/cipherstash-proxy/Cargo.toml +++ b/packages/cipherstash-proxy/Cargo.toml @@ -9,7 +9,6 @@ arc-swap = "1.7.1" bytes = { version = "1.9", default-features = false } chrono = { version = "0.4.39", features = ["clock"] } cipherstash-client = { version = "0.20.0", features = ["tokio"] } -cipherstash-config = "0.2.3" clap = { version = "4.5.31", features = ["derive", "env"] } config = { version = "0.15", features = [ "async", diff --git a/packages/cipherstash-proxy/src/encrypt/config/encrypt_config.rs b/packages/cipherstash-proxy/src/encrypt/config/encrypt_config.rs index c9bf5101..730e678e 100644 --- a/packages/cipherstash-proxy/src/encrypt/config/encrypt_config.rs +++ b/packages/cipherstash-proxy/src/encrypt/config/encrypt_config.rs @@ -3,7 +3,7 @@ use crate::{ error::{ConfigError, Error}, log::KEYSET, }; -use cipherstash_config::{ +use cipherstash_client::schema::{ column::{Index, IndexType, TokenFilter, Tokenizer}, ColumnConfig, ColumnType, }; diff --git a/packages/cipherstash-proxy/src/encrypt/config/manager.rs b/packages/cipherstash-proxy/src/encrypt/config/manager.rs index df23d8ab..50c4296d 100644 --- a/packages/cipherstash-proxy/src/encrypt/config/manager.rs +++ b/packages/cipherstash-proxy/src/encrypt/config/manager.rs @@ -7,7 +7,7 @@ use crate::{ log::ENCRYPT_CONFIG, }; use arc_swap::ArcSwap; -use cipherstash_config::ColumnConfig; +use cipherstash_client::schema::ColumnConfig; use serde_json::Value; use std::{collections::HashMap, sync::Arc, time::Duration}; use tokio::{task::JoinHandle, time}; diff --git a/packages/cipherstash-proxy/src/encrypt/mod.rs b/packages/cipherstash-proxy/src/encrypt/mod.rs index fca8efd1..681e4b78 100644 --- a/packages/cipherstash-proxy/src/encrypt/mod.rs +++ b/packages/cipherstash-proxy/src/encrypt/mod.rs @@ -17,9 +17,9 @@ use cipherstash_client::{ self, Encrypted, EncryptedEntry, EncryptedSteVecTerm, IndexTerm, Plaintext, PlaintextTarget, ReferencedPendingPipeline, }, + schema::ColumnConfig, ConsoleConfig, CtsConfig, ZeroKMSConfig, }; -use cipherstash_config::ColumnConfig; use config::EncryptConfigManager; use schema::SchemaManager; use std::{sync::Arc, vec}; @@ -201,7 +201,14 @@ async fn init_cipher(config: &TandemConfig) -> Result { // Not using with_env because the proxy config should take precedence let builder = ZeroKMSConfig::builder() .add_source(EnvSource::default()) - .workspace_id(&config.auth.workspace_id) + .workspace_id( + config + .auth + .workspace_id + .to_owned() + .try_into() + .map_err(cipherstash_client::config::ConfigError::from)?, + ) .access_key(&config.auth.client_access_key) .try_with_client_id(&config.encrypt.client_id)? .try_with_client_key(&config.encrypt.client_key)? diff --git a/packages/cipherstash-proxy/src/error.rs b/packages/cipherstash-proxy/src/error.rs index a37e7413..d0499f11 100644 --- a/packages/cipherstash-proxy/src/error.rs +++ b/packages/cipherstash-proxy/src/error.rs @@ -104,7 +104,7 @@ pub enum ConfigError { Certificate(#[from] rustls_pki_types::pem::Error), #[error(transparent)] - EncryptConfig(#[from] cipherstash_config::errors::ConfigError), + EncryptConfig(#[from] cipherstash_client::config::errors::ConfigError), #[error(transparent)] Database(#[from] tokio_postgres::Error), @@ -285,12 +285,6 @@ impl From for Error { } } -impl From for Error { - fn from(e: cipherstash_config::errors::ConfigError) -> Self { - Error::Config(e.into()) - } -} - impl From for Error { fn from(e: cipherstash_client::encryption::TypeParseError) -> Self { Error::Encrypt(e.into()) diff --git a/packages/cipherstash-proxy/src/postgresql/context/column.rs b/packages/cipherstash-proxy/src/postgresql/context/column.rs index 41f45e8b..20155ca8 100644 --- a/packages/cipherstash-proxy/src/postgresql/context/column.rs +++ b/packages/cipherstash-proxy/src/postgresql/context/column.rs @@ -1,4 +1,4 @@ -use cipherstash_config::{ColumnConfig, ColumnType}; +use cipherstash_client::schema::{ColumnConfig, ColumnType}; use postgres_types::Type; use crate::Identifier; diff --git a/packages/cipherstash-proxy/src/postgresql/data/from_sql.rs b/packages/cipherstash-proxy/src/postgresql/data/from_sql.rs index 24daad7d..7850741f 100644 --- a/packages/cipherstash-proxy/src/postgresql/data/from_sql.rs +++ b/packages/cipherstash-proxy/src/postgresql/data/from_sql.rs @@ -6,8 +6,7 @@ use crate::{ use bigdecimal::BigDecimal; use bytes::BytesMut; use chrono::NaiveDate; -use cipherstash_client::encryption::Plaintext; -use cipherstash_config::ColumnType; +use cipherstash_client::{encryption::Plaintext, schema::ColumnType}; use postgres_types::FromSql; use postgres_types::Type; use rust_decimal::Decimal; @@ -342,8 +341,10 @@ mod tests { }; use bytes::{BufMut, BytesMut}; use chrono::NaiveDate; - use cipherstash_client::encryption::Plaintext; - use cipherstash_config::{ColumnConfig, ColumnMode, ColumnType}; + use cipherstash_client::{ + encryption::Plaintext, + schema::{ColumnConfig, ColumnMode, ColumnType}, + }; use postgres_types::{ToSql, Type}; fn to_message(s: &[u8]) -> BytesMut { diff --git a/packages/cipherstash-proxy/src/postgresql/messages/parse.rs b/packages/cipherstash-proxy/src/postgresql/messages/parse.rs index a2c30f5c..cc49f6e9 100644 --- a/packages/cipherstash-proxy/src/postgresql/messages/parse.rs +++ b/packages/cipherstash-proxy/src/postgresql/messages/parse.rs @@ -123,7 +123,7 @@ mod tests { Identifier, }; use bytes::BytesMut; - use cipherstash_config::{ColumnConfig, ColumnType}; + use cipherstash_client::schema::{ColumnConfig, ColumnType}; fn to_message(s: &[u8]) -> BytesMut { BytesMut::from(s) From 59e66d5e4de0aa85629238224f3c4e0680fb556d Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Mon, 12 May 2025 16:04:32 +1000 Subject: [PATCH 25/50] feat: decrypt eql 2.0 records --- mise.toml | 8 +- packages/cipherstash-proxy/src/config/log.rs | 23 +-- packages/cipherstash-proxy/src/error.rs | 6 + packages/cipherstash-proxy/src/log/mod.rs | 2 + .../cipherstash-proxy/src/log/subscriber.rs | 4 + .../src/postgresql/backend.rs | 12 +- .../src/postgresql/context/mod.rs | 36 ++-- .../src/postgresql/frontend.rs | 8 +- .../src/postgresql/messages/data_row.rs | 193 +++++++++--------- 9 files changed, 146 insertions(+), 146 deletions(-) diff --git a/mise.toml b/mise.toml index 64e563df..ec398917 100644 --- a/mise.toml +++ b/mise.toml @@ -22,8 +22,10 @@ CS_DATABASE__PORT = "5532" # Default configuration for dev cipherstash-proxy run using 'mise run proxy:up' CS_PROXY__HOST = "proxy" # Misc -DOCKER_CLI_HINTS = "false" # Please don't show us What's Next. -CS_EQL_VERSION = "eql-1.0.1" +DOCKER_CLI_HINTS = "false" # Please don't show us What's Next. + +# CS_EQL_VERSION = "eql-1.0.1" +CS_EQL_VERSION = "eql-2.0.0-alpha.1" [tools] "cargo:cargo-binstall" = "latest" @@ -502,6 +504,7 @@ outputs = [ run = """ # install script if [ -z "$CS_EQL_PATH" ]; then + echo "Downloading ${CS_EQL_VERSION} install" curl -sLo "{{config_root}}/cipherstash-encrypt.sql" https://github.com/cipherstash/encrypt-query-language/releases/download/${CS_EQL_VERSION}/cipherstash-encrypt.sql else echo "Using EQL: ${CS_EQL_PATH}/cipherstash-encrypt.sql" @@ -510,6 +513,7 @@ fi # uninstall script if [ -z "$CS_EQL_PATH" ]; then + echo "Downloading ${CS_EQL_VERSION} uninstall" curl -sLo "{{config_root}}/cipherstash-encrypt-uninstall.sql" https://github.com/cipherstash/encrypt-query-language/releases/download/${CS_EQL_VERSION}/cipherstash-encrypt-uninstall.sql else echo "Using EQL: ${CS_EQL_PATH}/cipherstash-encrypt-uninstall.sql" diff --git a/packages/cipherstash-proxy/src/config/log.rs b/packages/cipherstash-proxy/src/config/log.rs index 06c235ce..aa5ebbc4 100644 --- a/packages/cipherstash-proxy/src/config/log.rs +++ b/packages/cipherstash-proxy/src/config/log.rs @@ -35,6 +35,9 @@ pub struct LogConfig { #[serde(default = "LogConfig::default_log_level")] pub encrypt_level: LogLevel, + #[serde(default = "LogConfig::default_log_level")] + pub decrypt_level: LogLevel, + #[serde(default = "LogConfig::default_log_level")] pub encrypt_config_level: LogLevel, @@ -107,24 +110,7 @@ impl Display for LogLevel { impl Default for LogConfig { fn default() -> Self { - LogConfig { - format: LogConfig::default_log_format(), - output: LogConfig::default_log_output(), - ansi_enabled: LogConfig::default_ansi_enabled(), - level: LogConfig::default_log_level(), - development_level: LogConfig::default_log_level(), - authentication_level: LogConfig::default_log_level(), - context_level: LogConfig::default_log_level(), - encrypt_level: LogConfig::default_log_level(), - encoding_level: LogConfig::default_log_level(), - encrypt_config_level: LogConfig::default_log_level(), - keyset_level: LogConfig::default_log_level(), - migrate_level: LogConfig::default_log_level(), - protocol_level: LogConfig::default_log_level(), - mapper_level: LogConfig::default_log_level(), - schema_level: LogConfig::default_log_level(), - config_level: LogConfig::default_log_level(), - } + Self::with_level(LogConfig::default_log_level()) } } @@ -141,6 +127,7 @@ impl LogConfig { encoding_level: level, encrypt_level: level, encrypt_config_level: level, + decrypt_level: level, keyset_level: level, migrate_level: level, protocol_level: level, diff --git a/packages/cipherstash-proxy/src/error.rs b/packages/cipherstash-proxy/src/error.rs index a37e7413..e6eef3b0 100644 --- a/packages/cipherstash-proxy/src/error.rs +++ b/packages/cipherstash-proxy/src/error.rs @@ -202,6 +202,12 @@ pub enum EncryptError { #[error(transparent)] CiphertextCouldNotBeSerialised(#[from] serde_json::Error), + #[error("Encrypted column could not be parsed")] + ColumnCouldNotBeParsed, + + #[error("Column '{column}' in table '{table}' could not be deserialised. For help visit {}#encrypt-column-could-not-be-deserialised", ERROR_DOC_BASE_URL)] + ColumnCouldNotBeDeserialised { table: String, column: String }, + #[error("Column '{column}' in table '{table}' could not be encrypted. For help visit {}#encrypt-column-could-not-be-encrypted", ERROR_DOC_BASE_URL)] ColumnCouldNotBeEncrypted { table: String, column: String }, diff --git a/packages/cipherstash-proxy/src/log/mod.rs b/packages/cipherstash-proxy/src/log/mod.rs index 3ed05f7f..b1922ba8 100644 --- a/packages/cipherstash-proxy/src/log/mod.rs +++ b/packages/cipherstash-proxy/src/log/mod.rs @@ -18,6 +18,7 @@ pub const AUTHENTICATION: &str = "authentication"; pub const CONFIG: &str = "config"; pub const CONTEXT: &str = "context"; pub const ENCRYPT: &str = "encrypt"; +pub const DECRYPT: &str = "decrypt"; pub const ENCODING: &str = "encoding"; pub const ENCRYPT_CONFIG: &str = "encrypt_config"; pub const KEYSET: &str = "keyset"; @@ -128,6 +129,7 @@ mod tests { encoding_level: LogLevel::Error, encrypt_level: LogLevel::Error, encrypt_config_level: LogLevel::Error, + decrypt_level: LogLevel::Error, keyset_level: LogLevel::Trace, migrate_level: LogLevel::Trace, protocol_level: LogLevel::Info, diff --git a/packages/cipherstash-proxy/src/log/subscriber.rs b/packages/cipherstash-proxy/src/log/subscriber.rs index a69beac9..eb1d6912 100644 --- a/packages/cipherstash-proxy/src/log/subscriber.rs +++ b/packages/cipherstash-proxy/src/log/subscriber.rs @@ -6,11 +6,14 @@ use tracing_subscriber::fmt::writer::BoxMakeWriter; use tracing_subscriber::fmt::SubscriberBuilder; use tracing_subscriber::FmtSubscriber; +use super::DECRYPT; + fn log_targets() -> Vec<&'static str> { vec![ DEVELOPMENT, AUTHENTICATION, CONTEXT, + DECRYPT, ENCRYPT, KEYSET, PROTOCOL, @@ -24,6 +27,7 @@ fn log_level_for(config: &LogConfig, target: &str) -> LogLevel { DEVELOPMENT => config.development_level, AUTHENTICATION => config.authentication_level, CONTEXT => config.context_level, + DECRYPT => config.decrypt_level, ENCRYPT => config.encrypt_level, KEYSET => config.keyset_level, PROTOCOL => config.protocol_level, diff --git a/packages/cipherstash-proxy/src/postgresql/backend.rs b/packages/cipherstash-proxy/src/postgresql/backend.rs index 950f5a3d..893addb1 100644 --- a/packages/cipherstash-proxy/src/postgresql/backend.rs +++ b/packages/cipherstash-proxy/src/postgresql/backend.rs @@ -19,7 +19,6 @@ use crate::prometheus::{ ROWS_PASSTHROUGH_TOTAL, ROWS_TOTAL, SERVER_BYTES_RECEIVED_TOTAL, }; use bytes::BytesMut; -use itertools::Itertools; use metrics::{counter, histogram}; use std::time::Instant; use tokio::io::AsyncRead; @@ -234,7 +233,7 @@ where let portal = self.context.get_portal_from_execute(); let portal = match portal.as_deref() { - Some(Portal::Encrypted { .. }) | Some(Portal::EncryptedText) => portal.unwrap(), + Some(Portal::Encrypted { .. }) => portal.unwrap(), _ => { debug!(target: MAPPER, client_id = self.context.client_id, msg = "Passthrough portal"); if !self.buffer.is_empty() { @@ -261,12 +260,13 @@ where // If no portal, assume Text for all columns let result_column_format_codes = portal.format_codes(result_column_count); + let projection_columns = portal.projection_columns(); + // Each row is converted into Vec> let ciphertexts: Vec> = rows .iter() - .map(|row| row.to_ciphertext()) - .flatten_ok() - .collect::, _>>()?; + .flat_map(|row| row.to_ciphertext(projection_columns)) + .collect::>(); let start = Instant::now(); @@ -381,7 +381,7 @@ where async fn data_row_handler(&mut self, bytes: &BytesMut) -> Result { counter!(ROWS_TOTAL).increment(1); match self.context.get_portal_from_execute().as_deref() { - Some(Portal::Encrypted { .. }) | Some(Portal::EncryptedText) => { + Some(Portal::Encrypted { .. }) => { debug!(target: MAPPER, client_id = self.context.client_id, msg = "Encrypted"); let data_row = DataRow::try_from(bytes)?; diff --git a/packages/cipherstash-proxy/src/postgresql/context/mod.rs b/packages/cipherstash-proxy/src/postgresql/context/mod.rs index 4099335a..e4f03d78 100644 --- a/packages/cipherstash-proxy/src/postgresql/context/mod.rs +++ b/packages/cipherstash-proxy/src/postgresql/context/mod.rs @@ -74,20 +74,9 @@ pub enum Portal { format_codes: Vec, statement: Arc, }, - EncryptedText, Passthrough, } -/// -/// Portal is a Statement with Bound variables -/// An Execute message will execute the statement with the variables associated during a Bind. -/// -#[derive(Clone, Debug)] -pub struct EncryptedPortal { - pub format_codes: Vec, - pub statement: Arc, -} - impl Context { pub fn new(client_id: i32, schema: Arc) -> Context { Context { @@ -200,7 +189,6 @@ impl Context { match portal.as_ref() { Portal::Encrypted { statement, .. } => Some(statement.clone()), - Portal::EncryptedText => None, Portal::Passthrough => None, } } @@ -303,7 +291,18 @@ impl Queue { } impl Portal { - pub fn encrypted(statement: Arc, format_codes: Vec) -> Portal { + pub fn encrypted_with_format_codes( + statement: Arc, + format_codes: Vec, + ) -> Portal { + Portal::Encrypted { + statement, + format_codes, + } + } + + pub fn encrypted(statement: Arc) -> Portal { + let format_codes = vec![]; Portal::Encrypted { statement, format_codes, @@ -314,8 +313,12 @@ impl Portal { Portal::Passthrough } - pub fn encrypted_text() -> Portal { - Portal::EncryptedText + pub fn projection_columns(&self) -> &Vec> { + static EMPTY: Vec> = vec![]; + match self { + Portal::Encrypted { statement, .. } => &statement.projection_columns, + _ => &EMPTY, + } } // FormatCodes should not be None at this point @@ -336,7 +339,6 @@ impl Portal { } _ => format_codes.clone(), }, - Portal::EncryptedText => vec![FormatCode::Text; row_len], Portal::Passthrough => { unreachable!() } @@ -365,7 +367,7 @@ mod tests { } fn portal(statement: &Arc) -> Portal { - Portal::encrypted(statement.clone(), vec![]) + Portal::encrypted_with_format_codes(statement.clone(), vec![]) } fn get_statement(portal: Arc) -> Arc { diff --git a/packages/cipherstash-proxy/src/postgresql/frontend.rs b/packages/cipherstash-proxy/src/postgresql/frontend.rs index a3e87a3d..2cc83aea 100644 --- a/packages/cipherstash-proxy/src/postgresql/frontend.rs +++ b/packages/cipherstash-proxy/src/postgresql/frontend.rs @@ -35,6 +35,7 @@ use sqltk::parser::dialect::PostgreSqlDialect; use sqltk::parser::parser::Parser; use sqltk::NodeKey; use std::collections::HashMap; +use std::sync::Arc; use std::time::Instant; use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt}; use tracing::{debug, error, warn}; @@ -311,7 +312,7 @@ where counter!(STATEMENTS_ENCRYPTED_TOTAL).increment(1); // Set Encrypted portal - portal = Portal::encrypted_text(); + portal = Portal::encrypted(Arc::new(statement)); } None => { debug!(target: MAPPER, @@ -671,7 +672,10 @@ where bind.rewrite(encrypted)?; } if statement.has_projection() { - portal = Portal::encrypted(statement, bind.result_columns_format_codes.to_owned()); + portal = Portal::encrypted_with_format_codes( + statement, + bind.result_columns_format_codes.to_owned(), + ); } }; diff --git a/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs b/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs index f9e4112e..d89a25c3 100644 --- a/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs +++ b/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs @@ -1,12 +1,13 @@ -use super::{maybe_json, maybe_jsonb, BackendCode, NULL}; +use super::{BackendCode, NULL}; use crate::{ eql, - error::{Error, ProtocolError}, - log::MAPPER, + error::{EncryptError, Error, ProtocolError}, + log::DECRYPT, + postgresql::Column, }; use bytes::{Buf, BufMut, BytesMut}; use std::io::Cursor; -use tracing::debug; +use tracing::{debug, error}; #[derive(Debug, Clone)] pub struct DataRow { @@ -19,8 +20,31 @@ pub struct DataColumn { } impl DataRow { - pub fn to_ciphertext(&self) -> Result>, Error> { - Ok(self.columns.iter().map(|col| col.into()).collect()) + pub fn to_ciphertext( + &self, + column_configuration: &Vec>, + ) -> Vec> { + let mut result = vec![]; + for (data_column, column_config) in self.columns.iter().zip(column_configuration) { + let encrypted = column_config + .as_ref() + .filter(|_| data_column.is_not_null()) + .and_then(|config| { + data_column + .try_into() + .inspect_err(|_| { + let err = EncryptError::ColumnCouldNotBeDeserialised { + table: config.identifier.table.to_owned(), + column: config.identifier.column.to_owned(), + }; + error!(target: DECRYPT, msg = err.to_string()); + }) + .ok() + }); + result.push(encrypted); + } + + result } pub fn column_count(&self) -> usize { @@ -47,14 +71,8 @@ impl DataRow { } impl DataColumn { - pub fn get_data(&self) -> Option> { - self.bytes.as_ref().map(|b| b.to_vec()) - } - - pub fn maybe_ciphertext(&self) -> bool { - self.bytes - .as_ref() - .is_some_and(|b| maybe_jsonb(b) || maybe_json(b)) + pub fn is_not_null(&self) -> bool { + self.bytes.is_some() } pub fn rewrite(&mut self, b: &[u8]) { @@ -63,21 +81,6 @@ impl DataColumn { bytes.extend_from_slice(b); } } - - /// - /// If the json format looks binary, returns a reference to the bytes without the jsonb header byte - /// - pub fn json_bytes(&self) -> Option<&[u8]> { - self.bytes.as_ref().and_then(|b| { - if maybe_jsonb(b) { - Some(&b[1..]) - } else if maybe_json(b) { - Some(&b[0..]) - } else { - None - } - }) - } } impl TryFrom<&BytesMut> for DataRow { @@ -108,9 +111,11 @@ impl TryFrom<&BytesMut> for DataRow { columns.push(DataColumn { bytes: None }); } else { let len = len as usize; + let mut bytes = BytesMut::with_capacity(len); bytes.resize(len, 0); cursor.copy_to_slice(&mut bytes); + columns.push(DataColumn { bytes: Some(bytes) }); } } @@ -159,108 +164,94 @@ impl TryFrom for BytesMut { } } -impl From<&DataColumn> for Option { - fn from(col: &DataColumn) -> Self { - debug!(target: MAPPER, data_column = ?col); - match col.json_bytes() { - Some(bytes) => match serde_json::from_slice(bytes) { - Ok(ct) => Some(ct), +impl TryFrom<&DataColumn> for eql::EqlEncrypted { + type Error = Error; + + fn try_from(col: &DataColumn) -> Result { + if let Some(bytes) = &col.bytes { + // Encrypted record is in the form ("{}") + // json data can be extracted by dropping the first and last two bytes to remove (" and ") + let start = 2; + let end = bytes.len() - 2; + let sliced = &bytes[start..end]; + + let input = String::from_utf8_lossy(sliced).to_string(); + let input = input.replace("\"\"", "\""); + + match serde_json::from_str(&input) { + Ok(e) => return Ok(e), Err(err) => { - debug!(target: MAPPER, msg = "Could not convert DataColumn to Ciphertext", error = err.to_string()); - None + debug!(target: DECRYPT, error = err.to_string()); + return Err(err.into()); } - }, - None => None, + } } + + Err(EncryptError::ColumnCouldNotBeParsed.into()) } } #[cfg(test)] mod tests { use super::DataRow; - use crate::{config::LogConfig, log, postgresql::messages::data_row::DataColumn}; - use bytes::{Buf, BytesMut}; - use cipherstash_client::zerokms::EncryptedRecord; - use recipher::key::Iv; - use tracing::info; - use uuid::Uuid; + use crate::{ + config::{LogConfig, LogLevel}, + log, + postgresql::messages::data_row::DataColumn, + }; + use crate::{EqlEncrypted, Identifier}; + use bytes::BytesMut; fn to_message(s: &[u8]) -> BytesMut { BytesMut::from(s) } - fn record() -> EncryptedRecord { - EncryptedRecord { - iv: Iv::default(), - ciphertext: vec![1; 32], - tag: vec![1; 16], - descriptor: "users/name".to_string(), - dataset_id: Some(Uuid::new_v4()), - } - } - #[test] - pub fn data_row_to_ciphertext() { - log::init(LogConfig::default()); - - let record = record(); - - let s = record.to_mp_base85().unwrap(); - info!("{:?}", s); - - // "{\"c\": \"mBbKx=EbyVyx>mNt9E;-h3xf8wDrq~v|IvQ=jXYG!u4Uu9SI)@Q+xmSd+PWo=<;Y$Ct\",\"k\": \"ct\",\"i\": {\"t\": \"\"users\"\",\"c\": \"\"email\"\"},\"v\": 1}"; + pub fn parse_encrypted_column() { + log::init(LogConfig::with_level(LogLevel::Debug)); - let bytes = to_message(b"D\0\0\0i\0\x01\0\0\0_\x01{\"c\": \"mBbKx=EbyVyx>mNt9E;-h3xf8wDrq~v|IvQ=jXYG!u4Uu9SI)@Q+xmSd+PWo=<;Y$Ct\",\"k\": \"ct\",\"i\": {\"t\": \"\"users\"\",\"c\": \"\"email\"\"},\"v\": 1}"); - // let expected = bytes.clone(); + // SELECT encrypted_jsonb FROM encrypted LIMIT 1 + let bytes = to_message(b"D\0\0\x03\xba\0\x01\0\0\x03\xb0(\"{\"\"b\"\": null, \"\"c\"\": \"\"mBbLR(BvRN1BF^PAFs!B^`U;mA>uOUiFLgDpZXhU#s#%c4wyi&Z7`(d0IxUty-cI#Yp%o~QFF39^sRf>4*EG{zlk;}ArEQ}NQHa9@;T73aPOSTpuh\"\", \"\"i\"\": {\"\"c\"\": \"\"encrypted_jsonb\"\", \"\"t\"\": \"\"encrypted\"\"}, \"\"m\"\": null, \"\"o\"\": null, \"\"s\"\": null, \"\"u\"\": null, \"\"v\"\": 1, \"\"sv\"\": [{\"\"b\"\": \"\"8067db44a848ab32c3056a3dbe4edf16\"\", \"\"c\"\": \"\"mBbLR(BvRN1BF^PAFs!B^`U;mA>uOUiFLgDpZXhU#s#%c4wyi&Z7`(d0IxUty-cI#Yp%o~QFF39^sRf>4*EG{zlk;}ArEQ}NQHa9@;T73aPOSTpuh\"\", \"\"m\"\": null, \"\"o\"\": null, \"\"s\"\": \"\"9493d6010fe7845d52149b697729c745\"\", \"\"u\"\": null, \"\"sv\"\": null, \"\"ocf\"\": null, \"\"ocv\"\": null}, {\"\"b\"\": null, \"\"c\"\": \"\"mBbLR(BvRN1BF^PAFs!B^`U;m8QkTKr|h>Q`^NbW(CC|>SD}UM=o%mz(Fw#LQFF39^sRf>4*EG{zlk;}ArEQ}NQHa9@;T73aPOSTpuh\"\", \"\"m\"\": null, \"\"o\"\": null, \"\"s\"\": \"\"b1f0e4bb3855bc33936ef1fddf532765\"\", \"\"u\"\": null, \"\"sv\"\": null, \"\"ocf\"\": null, \"\"ocv\"\": \"\"fbc7a11fc81f2a31c904c5b05572b054824e3b5f5ece78f1b711f93175f0a4a9726157cea247e107\"\"}], \"\"ocf\"\": null, \"\"ocv\"\": null}\")"); + let data_row = DataRow::try_from(&bytes).unwrap(); - let _data_row = DataRow::try_from(&bytes).unwrap(); + let col = data_row.columns.first().unwrap(); + let e: EqlEncrypted = col.try_into().unwrap(); - // let ciphertext = data_row.to_ciphertext().expect("ok"); + let expected = Identifier::new("encrypted", "encrypted_jsonb"); - // let ct = ciphertext.first(); - // assert!(ct.is_some()); + assert_eq!(e.identifier, expected); - // info!("{:?}", ciphertext.first()); + // SELECT * FROM encrypted WHERE id = $1; + // Only encrypted_text is NOT NULL + let bytes = to_message(b"D\0\0\n\x91\0\n\0\0\0\n1297231342\xff\xff\xff\xff\0\0\nY(\"{\"\"b\"\": null, \"\"c\"\": \"\"mBbJ;S^xMu@++(U20{lxK;qYYaDYF#30N~x;wyOUMoFOB9K!>A_9g9j@+M6V3wENqu#H8gDb9OZewzJaCBv4Uvy=7bie\"\", \"\"i\"\": {\"\"c\"\": \"\"encrypted_text\"\", \"\"t\"\": \"\"encrypted\"\"}, \"\"m\"\": [369, 381, 1758, 403, 35, 609, 1181, 1098, 1347, 1633, 1150, 815, 1997, 234, 1858, 656, 1335, 936, 1204, 630, 1764, 1328, 1649, 1396, 113, 1149, 1499, 1147, 586, 1942, 901, 1256, 1226, 1045, 637, 279, 1162, 1077, 1340, 1336, 1448, 700, 176, 1849, 1915, 1389, 71, 515, 633, 388, 1877, 1339, 1239, 638, 1365, 1380, 1273, 581, 1792, 1716, 145, 512, 814, 272, 1333, 1775, 1572, 1744, 2018, 433, 1641, 1529, 647, 1317, 652, 1606, 1737, 470, 826, 80, 929, 1700, 1619, 1253, 358, 1589, 1971, 1019, 1533, 1624, 573, 1684, 1287, 575, 1761, 527, 404, 1369, 894, 18, 1101, 986, 1772, 1090, 1506, 2015, 1988, 205, 141, 445, 1982], \"\"o\"\": [\"\"faa1f63cb6d36094d1aa50db6c0217eb447a987071119bb127f677b6a7ee0b4fe40eed7cd84e96e8a11bbe3ea14331f3ec4c8f149ce9d2b0253b4676c86557fcec4a5f8ca4e1ee081c66bf0a3cb594c6b5739f77f62fc5e76991869c23a97f01816cde3dfc24b2ca2fbb12b50fde324f18aa51718d681772bf9caf3c059a6748cbcaf4dd1c4fa02645d74699d7d265faf938c339f6cc8f57db9bd4cff8e03cae9e5d21a651b33525e86e335dff61520e8f23d7002f05fa186075a335fb7b2c740133b5a72760ccd216127d69983aa31a090a3b6ca56a48b6372cab60c979465d84dc94e5452c92517b643882fa82c22a26b4feaaa1b0ae8fcb989b10d0351fb3c9c5e56e719f820442612a67fff334438f3f5d35ff6db1b5f7a50670c7fec014f6fc19c352eb011911faf62a230e10c2d16f6c84b46cf9ee7eb1afb9c61a523891e31da2a18b445769d75c11873566dc8196d77e985423226bd1db10e4ce9eb10c2f69db7ce57d47281401617978d2bcfca23b9015b9e705615b8bf773daa87a18417f86e5338a7929fa4f10c6864af09870bfd9ddfb7848\"\", \"\"b41d89a196a35252a965ce3c330eac369ead56e9f06e2016da4d6971fe0b8d6e677e1018e7a1bd2fa0b2c1faaa12650d678352ecc81f6be879213fe78b8004b87dd7dcadec59df4dcafdb3c9aa55dcb2cc2bcf2193574b201c9a1c14764d69716f63b0c1aa30a2846696f2a1c790ca2cb26370d7e20904a8748ea98a95ee3cbb95c5f342de4e71bbf0262e84d59188ea72fe4449a16e7c73f88ed06b9cb724902a85d063c03e9b1a63dd18b9604625ca3cb8110d9c8f93e1771525c51b6ee092d554e84d61df5b557994f32191bb2b6801d9727fb707d5287e6c83d6b16763a6e66526baf80765a58d36df744be7872d2750eb28a86a519a21ee710f618c09cb2bd45f21e805ae4e11eb2987d7be31c32164d4f828fc35c389d516d0d6a54e25041985cffcb6124b4d3fa5b0ba91e19d60e3102370e9c1c768df1b427c682304a1dfdea2d3e514db22057f43d8121b8daf7c434831e5b618bbca9f4e198741927bdc168e4703fb1f703957f7b70491e06bec4adee19d29ef5e938695e1d49ef50ceef0a9c3e46bd8fe309e013e5ea0d35c5ebf3dddd97573\"\"], \"\"s\"\": null, \"\"u\"\": \"\"962d77dfaf892b596b3255c022359e54f3e8dc8b21c3d1b32ebd05555f433192\"\", \"\"v\"\": 1, \"\"sv\"\": null, \"\"ocf\"\": null, \"\"ocv\"\": null}\")\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"); + let data_row = DataRow::try_from(&bytes).unwrap(); - // let column = ciphertext.first().unwrap().as_ref().unwrap(); + // let e = DataColumn::parse(bytes).unwrap(); + let col = &data_row.columns[2]; + let e: EqlEncrypted = col.try_into().unwrap(); + let expected = Identifier::new("encrypted", "encrypted_text"); - // assert_eq!(column.kind, "ct"); + assert_eq!(e.identifier, expected); } - // #[test] - // pub fn data_column_to_json_bytes() { - // log::init(&None); - // let bytes = to_message(b"D\0\0\0i\0\x01\0\0\0_\x01{\"c\": \"51b72947dc25481880175ef53a35af34\", \"i\": {\"c\": \"name\", \"t\": \"users\"}, \"k\": \"ct\", \"v\": 1}"); - // // let expected = bytes.clone(); - - // let data_row = DataRow::try_from(&bytes).expect("ok"); - - // let ciphertext = data_row.to_ciphertext().expect("ok"); - - // info!("{:?}", data_row); - - // // info!("{:?}", ciphertext.first()); - - // // let column = ciphertext.first().unwrap().as_ref().unwrap(); - - // // assert_eq!(column.kind, "ct"); - // } - #[test] pub fn parse_data_row() { - let bytes = to_message(b"D\0\0\0\x0e\0\x01\0\0\0\x04\0\0\x1e\xa2"); - let expected = bytes.clone(); + log::init(LogConfig::with_level(LogLevel::Debug)); - let data_row = DataRow::try_from(&bytes).unwrap(); + let messages = vec![ + to_message(b"D\0\0\0\x0e\0\x01\0\0\0\x04\0\0\x1e\xa2"), + // SELECT encrypted_jsonb FROM encrypted LIMIT 1 + to_message(b"D\0\0\x03\xba\0\x01\0\0\x03\xb0(\"{\"\"b\"\": null, \"\"c\"\": \"\"mBbLR(BvRN1BF^PAFs!B^`U;mA>uOUiFLgDpZXhU#s#%c4wyi&Z7`(d0IxUty-cI#Yp%o~QFF39^sRf>4*EG{zlk;}ArEQ}NQHa9@;T73aPOSTpuh\"\", \"\"i\"\": {\"\"c\"\": \"\"encrypted_jsonb\"\", \"\"t\"\": \"\"encrypted\"\"}, \"\"m\"\": null, \"\"o\"\": null, \"\"s\"\": null, \"\"u\"\": null, \"\"v\"\": 1, \"\"sv\"\": [{\"\"b\"\": \"\"8067db44a848ab32c3056a3dbe4edf16\"\", \"\"c\"\": \"\"mBbLR(BvRN1BF^PAFs!B^`U;mA>uOUiFLgDpZXhU#s#%c4wyi&Z7`(d0IxUty-cI#Yp%o~QFF39^sRf>4*EG{zlk;}ArEQ}NQHa9@;T73aPOSTpuh\"\", \"\"m\"\": null, \"\"o\"\": null, \"\"s\"\": \"\"9493d6010fe7845d52149b697729c745\"\", \"\"u\"\": null, \"\"sv\"\": null, \"\"ocf\"\": null, \"\"ocv\"\": null}, {\"\"b\"\": null, \"\"c\"\": \"\"mBbLR(BvRN1BF^PAFs!B^`U;m8QkTKr|h>Q`^NbW(CC|>SD}UM=o%mz(Fw#LQFF39^sRf>4*EG{zlk;}ArEQ}NQHa9@;T73aPOSTpuh\"\", \"\"m\"\": null, \"\"o\"\": null, \"\"s\"\": \"\"b1f0e4bb3855bc33936ef1fddf532765\"\", \"\"u\"\": null, \"\"sv\"\": null, \"\"ocf\"\": null, \"\"ocv\"\": \"\"fbc7a11fc81f2a31c904c5b05572b054824e3b5f5ece78f1b711f93175f0a4a9726157cea247e107\"\"}], \"\"ocf\"\": null, \"\"ocv\"\": null}\")"), + ]; - let data_col = data_row.columns.first().unwrap(); + for bytes in messages { + let expected = bytes.clone(); - let mut buf: &[u8] = data_col.bytes.as_ref().unwrap(); - let value = buf.get_i32(); - assert_eq!(value, 7842); + let data_row = DataRow::try_from(&bytes).unwrap(); - let bytes = BytesMut::try_from(data_row).unwrap(); - assert_eq!(bytes, expected); + let bytes = BytesMut::try_from(data_row).unwrap(); + assert_eq!(bytes, expected); + } } #[test] From 2f2e9ff18d0f1fe453d0adf4e9430057f70fddbb Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Mon, 12 May 2025 16:30:02 +1000 Subject: [PATCH 26/50] fix: update cargo.lock --- Cargo.lock | 69 +++++++++++++++++------------------------------------- 1 file changed, 21 insertions(+), 48 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index afc01fba..45b21856 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -194,7 +194,7 @@ dependencies = [ "asn1-rs-derive", "asn1-rs-impl", "displaydoc", - "nom 7.1.3", + "nom", "num-traits", "rusticata-macros", "thiserror 2.0.12", @@ -599,7 +599,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" dependencies = [ - "nom 7.1.3", + "nom", ] [[package]] @@ -651,6 +651,8 @@ dependencies = [ [[package]] name = "cipherstash-client" version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fe21509165da6daf50b84d4dc9bc46b558e5afb34db75dbd2371b963faabe4d" dependencies = [ "aes-gcm-siv", "anyhow", @@ -662,7 +664,7 @@ dependencies = [ "blake3", "cfg-if", "chrono", - "cipherstash-config 0.2.3", + "cipherstash-config", "cipherstash-core", "cllw-ore", "cts-common", @@ -681,7 +683,7 @@ dependencies = [ "percent-encoding", "rand 0.8.5", "rand_chacha 0.3.1", - "recipher 0.1.3", + "recipher", "reqwest", "reqwest-middleware", "reqwest-retry", @@ -706,14 +708,6 @@ dependencies = [ "zerokms-protocol", ] -[[package]] -name = "cipherstash-config" -version = "0.2.3" -dependencies = [ - "serde", - "thiserror 1.0.69", -] - [[package]] name = "cipherstash-config" version = "0.2.3" @@ -727,6 +721,8 @@ dependencies = [ [[package]] name = "cipherstash-core" version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd56dfac0a35146968ef6696fb822b22f70a664a8739874385876d5452844b7a" dependencies = [ "hmac", "lazy_static", @@ -760,7 +756,7 @@ dependencies = [ "postgres-protocol", "postgres-types", "rand 0.9.0", - "recipher 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "recipher", "regex", "ring", "rust_decimal", @@ -791,13 +787,13 @@ version = "0.1.0" dependencies = [ "chrono", "cipherstash-client", - "cipherstash-config 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "cipherstash-config", "cipherstash-proxy", "clap", "fake 4.2.0", "hex", "rand 0.9.0", - "recipher 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "recipher", "rustls", "serde", "serde_json", @@ -866,6 +862,8 @@ checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" [[package]] name = "cllw-ore" version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1b01c26e11101044f85802e31d842483ef983a890c03472d9489f6969cf865a" dependencies = [ "bit-vec", "bitvec", @@ -1024,6 +1022,8 @@ dependencies = [ [[package]] name = "cts-common" version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938da7d14d05c2769bf7ae33c5a395eb6a34ffdd25ec286e97702ae563314f9b" dependencies = [ "arrayvec", "axum", @@ -1032,7 +1032,6 @@ dependencies = [ "fake 3.1.0", "http", "miette", - "nom 8.0.0", "rand 0.8.5", "regex", "serde", @@ -1111,7 +1110,7 @@ checksum = "07da5016415d5a3c4dd39b11ed26f915f52fc4e0dc197d87908bc916e51bc1a6" dependencies = [ "asn1-rs", "displaydoc", - "nom 7.1.3", + "nom", "num-bigint", "num-traits", "rusticata-macros", @@ -2354,15 +2353,6 @@ dependencies = [ "minimal-lexical", ] -[[package]] -name = "nom" -version = "8.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" -dependencies = [ - "memchr", -] - [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -2964,25 +2954,6 @@ dependencies = [ "bitflags 2.9.0", ] -[[package]] -name = "recipher" -version = "0.1.3" -dependencies = [ - "aes", - "async-trait", - "cmac", - "hex", - "hex-literal", - "opaque-debug", - "rand 0.8.5", - "rand_chacha 0.3.1", - "serde", - "serde_cbor", - "sha2", - "thiserror 1.0.69", - "zeroize", -] - [[package]] name = "recipher" version = "0.1.3" @@ -3284,7 +3255,7 @@ version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632" dependencies = [ - "nom 7.1.3", + "nom", ] [[package]] @@ -4939,7 +4910,7 @@ dependencies = [ "data-encoding", "der-parser", "lazy_static", - "nom 7.1.3", + "nom", "oid-registry", "rusticata-macros", "thiserror 2.0.12", @@ -5060,10 +5031,12 @@ dependencies = [ [[package]] name = "zerokms-protocol" version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01a9d0d8103cfa862b451f2c35144301df25a233f7fae041666b890a1578c3b1" dependencies = [ "async-trait", "base64", - "cipherstash-config 0.2.3", + "cipherstash-config", "fake 2.10.0", "opaque-debug", "rand 0.8.5", From a5be8c9e7d0ceb88aedf7cebfc26b7402b485ccf Mon Sep 17 00:00:00 2001 From: James Sadler Date: Mon, 12 May 2025 22:39:49 +1000 Subject: [PATCH 27/50] fix: depend on newly release cipherstash-client (and fix build error) --- Cargo.lock | 8 ++++---- packages/cipherstash-proxy-integration/Cargo.toml | 2 +- packages/cipherstash-proxy/Cargo.toml | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 45b21856..d3eb654f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -650,9 +650,9 @@ dependencies = [ [[package]] name = "cipherstash-client" -version = "0.20.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fe21509165da6daf50b84d4dc9bc46b558e5afb34db75dbd2371b963faabe4d" +checksum = "84a83c23fbd5b42bf7af65f882a584afd6f534a60ffd734a87618d0c3341c4b3" dependencies = [ "aes-gcm-siv", "anyhow", @@ -1021,9 +1021,9 @@ dependencies = [ [[package]] name = "cts-common" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "938da7d14d05c2769bf7ae33c5a395eb6a34ffdd25ec286e97702ae563314f9b" +checksum = "058540fce9a147af37cab4f55a5f9d8ae7f35f66efeec58c08cce6beb173d9c3" dependencies = [ "arrayvec", "axum", diff --git a/packages/cipherstash-proxy-integration/Cargo.toml b/packages/cipherstash-proxy-integration/Cargo.toml index 74ea4334..4291432e 100644 --- a/packages/cipherstash-proxy-integration/Cargo.toml +++ b/packages/cipherstash-proxy-integration/Cargo.toml @@ -24,7 +24,7 @@ tracing-subscriber = { workspace = true } webpki-roots = "0.26.7" [dev-dependencies] -cipherstash-client = { version = "0.20.0", features = ["tokio"] } +cipherstash-client = { version = "0.21.0", features = ["tokio"] } cipherstash-config = "0.2.3" clap = "4.5.32" fake = { version = "4", features = ["chrono", "derive"] } diff --git a/packages/cipherstash-proxy/Cargo.toml b/packages/cipherstash-proxy/Cargo.toml index 957f3a1b..26b17bac 100644 --- a/packages/cipherstash-proxy/Cargo.toml +++ b/packages/cipherstash-proxy/Cargo.toml @@ -8,7 +8,7 @@ bigdecimal = { version = "0.4.6", features = ["serde-json"] } arc-swap = "1.7.1" bytes = { version = "1.9", default-features = false } chrono = { version = "0.4.39", features = ["clock"] } -cipherstash-client = { version = "0.20.0", features = ["tokio"] } +cipherstash-client = { version = "0.21.0", features = ["tokio"] } clap = { version = "4.5.31", features = ["derive", "env"] } config = { version = "0.15", features = [ "async", From 1fddf99b8e06d355dd47a33c87fc7f11f888c3f1 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Tue, 13 May 2025 09:44:52 +1000 Subject: [PATCH 28/50] chore(proxy): add CS_REGION to test.yml CS_REGION is an env var required by the latest cipherstash-client release. --- .github/workflows/test.yml | 1 + Cargo.lock | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 64c17a0c..35aaa983 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -26,6 +26,7 @@ jobs: CS_DEFAULT_KEYSET_ID: ${{ secrets.CS_DEFAULT_KEYSET_ID }} CS_CLIENT_ID: ${{ secrets.CS_CLIENT_ID }} CS_CLIENT_KEY: ${{ secrets.CS_CLIENT_KEY }} + CS_REGION: "ap-southeast-2.aws" RUST_BACKTRACE: "1" run: | diff --git a/Cargo.lock b/Cargo.lock index d3eb654f..92a4eb8e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -194,7 +194,7 @@ dependencies = [ "asn1-rs-derive", "asn1-rs-impl", "displaydoc", - "nom", + "nom 7.1.3", "num-traits", "rusticata-macros", "thiserror 2.0.12", @@ -599,7 +599,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766" dependencies = [ - "nom", + "nom 7.1.3", ] [[package]] @@ -1032,6 +1032,7 @@ dependencies = [ "fake 3.1.0", "http", "miette", + "nom 8.0.0", "rand 0.8.5", "regex", "serde", @@ -1110,7 +1111,7 @@ checksum = "07da5016415d5a3c4dd39b11ed26f915f52fc4e0dc197d87908bc916e51bc1a6" dependencies = [ "asn1-rs", "displaydoc", - "nom", + "nom 7.1.3", "num-bigint", "num-traits", "rusticata-macros", @@ -2353,6 +2354,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -3255,7 +3265,7 @@ version = "4.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "faf0c4a6ece9950b9abdb62b1cfcf2a68b3b67a10ba445b3bb85be2a293d0632" dependencies = [ - "nom", + "nom 7.1.3", ] [[package]] @@ -4910,7 +4920,7 @@ dependencies = [ "data-encoding", "der-parser", "lazy_static", - "nom", + "nom 7.1.3", "oid-registry", "rusticata-macros", "thiserror 2.0.12", From 22a35dfa1e5bc2eea6fafa117730f122fbdcaea0 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Tue, 13 May 2025 10:26:30 +1000 Subject: [PATCH 29/50] fix(proxy): ensure CS_REGION is set in tests/docker-compose.yml --- tests/docker-compose.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml index 2f0b1111..8b791d68 100644 --- a/tests/docker-compose.yml +++ b/tests/docker-compose.yml @@ -64,6 +64,7 @@ services: - CS_CLIENT_ID=${CS_CLIENT_ID} - CS_PROMETHEUS__ENABLED=${CS_PROMETHEUS__ENABLED:-true} - CS_SERVER__WORKER_THREADS=${CS_SERVER__WORKER_THREADS:-4} + - CS_REGION=${CS_REGION} networks: - postgres deploy: @@ -99,6 +100,7 @@ services: - CS_TLS__PRIVATE_KEY_PATH=${CS_TLS__PRIVATE_KEY:-/etc/cipherstash-proxy/server.key} - CS_SERVER__REQUIRE_TLS=true - CS_PROMETHEUS__ENABLED=${CS_PROMETHEUS__ENABLED:-true} + - CS_REGION=${CS_REGION} volumes: - ./tls/server.cert:/etc/cipherstash-proxy/server.cert From 4590dea8ed1905d8aa6dcd0be6b57e490309625e Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 13 May 2025 10:56:11 +1000 Subject: [PATCH 30/50] doc: add not to test.yml about env var config Signed-off-by: Toby Hede --- .github/workflows/test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 35aaa983..e1f8b028 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,6 +21,8 @@ jobs: - run: | mise run postgres:up --extra-args "--detach --wait" - env: + # REMEMBER TO ADD ENVIRONMENT VARIABLES TO tests/docker-compose.yml + # The tests/docker-compose.yml config passes the ENV vars into the container CS_WORKSPACE_ID: ${{ secrets.CS_WORKSPACE_ID }} CS_CLIENT_ACCESS_KEY: ${{ secrets.CS_CLIENT_ACCESS_KEY }} CS_DEFAULT_KEYSET_ID: ${{ secrets.CS_DEFAULT_KEYSET_ID }} From ce7fa9367a5523ebd766272101f36b49ffaffeae Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 13 May 2025 12:00:25 +1000 Subject: [PATCH 31/50] feat: handle binary encoding of custom types in DataRow --- .../src/postgresql/messages/data_row.rs | 71 ++++++++++++++----- 1 file changed, 53 insertions(+), 18 deletions(-) diff --git a/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs b/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs index d89a25c3..905b6a29 100644 --- a/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs +++ b/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs @@ -169,20 +169,36 @@ impl TryFrom<&DataColumn> for eql::EqlEncrypted { fn try_from(col: &DataColumn) -> Result { if let Some(bytes) = &col.bytes { - // Encrypted record is in the form ("{}") - // json data can be extracted by dropping the first and last two bytes to remove (" and ") - let start = 2; - let end = bytes.len() - 2; - let sliced = &bytes[start..end]; - - let input = String::from_utf8_lossy(sliced).to_string(); - let input = input.replace("\"\"", "\""); - - match serde_json::from_str(&input) { - Ok(e) => return Ok(e), - Err(err) => { - debug!(target: DECRYPT, error = err.to_string()); - return Err(err.into()); + if &bytes[0..=1] == b"(\"" { + // Text encoding + // Encrypted record is in the form ("{}") + // json data can be extracted by dropping the first and last two bytes to remove (" and ") + let start = 2; + let end = bytes.len() - 2; + let sliced = &bytes[start..end]; + + let input = String::from_utf8_lossy(sliced).to_string(); + let input = input.replace("\"\"", "\""); + + match serde_json::from_str(&input) { + Ok(e) => return Ok(e), + Err(err) => { + debug!(target: DECRYPT, error = err.to_string()); + return Err(err.into()); + } + } + } else { + // 12 bytes for the binary rowtype header + // 1 byte for the jsonb header (value of 1) + let start = 12 + 1; + let sliced = &bytes[start..]; + + match serde_json::from_slice(sliced) { + Ok(e) => return Ok(e), + Err(err) => { + debug!(target: DECRYPT, error = err.to_string()); + return Err(err.into()); + } } } } @@ -193,6 +209,8 @@ impl TryFrom<&DataColumn> for eql::EqlEncrypted { #[cfg(test)] mod tests { + use std::io::Cursor; + use super::DataRow; use crate::{ config::{LogConfig, LogLevel}, @@ -201,6 +219,7 @@ mod tests { }; use crate::{EqlEncrypted, Identifier}; use bytes::BytesMut; + use tracing::info; fn to_message(s: &[u8]) -> BytesMut { BytesMut::from(s) @@ -209,6 +228,26 @@ mod tests { #[test] pub fn parse_encrypted_column() { log::init(LogConfig::with_level(LogLevel::Debug)); + use bytes::Buf; + + let bytes = to_message(b"\0\0\0\x01\0\0\x0e\xda\0\0\n0\x01"); + + let mut cursor = Cursor::new(bytes); + let a = cursor.get_i32(); + let b = cursor.get_i32(); + let c = cursor.get_i32(); + + info!("{a}, {b}, {c}"); + + // Binary + // SELECT id, encrypted_text FROM encrypted WHERE id = $1 + let bytes = to_message(b"D\0\0\nR\0\x02\0\0\0\x08w\xaam\xf8Y$\x9dI\0\0\n<\0\0\0\x01\0\0\x0e\xda\0\0\n0\x01{\"b\": null, \"c\": \"mBbLbP2ww9ymEpm_yfj>@=^)JCqtLxcewai)Ilzx#HbC2p3F;dB`XP9af|s-igMjdMWLYPqYWAB#2|%uOUiFLgDpZXhU#s#%c4wyi&Z7`(d0IxUty-cI#Yp%o~QFF39^sRf>4*EG{zlk;}ArEQ}NQHa9@;T73aPOSTpuh\"\", \"\"i\"\": {\"\"c\"\": \"\"encrypted_jsonb\"\", \"\"t\"\": \"\"encrypted\"\"}, \"\"m\"\": null, \"\"o\"\": null, \"\"s\"\": null, \"\"u\"\": null, \"\"v\"\": 1, \"\"sv\"\": [{\"\"b\"\": \"\"8067db44a848ab32c3056a3dbe4edf16\"\", \"\"c\"\": \"\"mBbLR(BvRN1BF^PAFs!B^`U;mA>uOUiFLgDpZXhU#s#%c4wyi&Z7`(d0IxUty-cI#Yp%o~QFF39^sRf>4*EG{zlk;}ArEQ}NQHa9@;T73aPOSTpuh\"\", \"\"m\"\": null, \"\"o\"\": null, \"\"s\"\": \"\"9493d6010fe7845d52149b697729c745\"\", \"\"u\"\": null, \"\"sv\"\": null, \"\"ocf\"\": null, \"\"ocv\"\": null}, {\"\"b\"\": null, \"\"c\"\": \"\"mBbLR(BvRN1BF^PAFs!B^`U;m8QkTKr|h>Q`^NbW(CC|>SD}UM=o%mz(Fw#LQFF39^sRf>4*EG{zlk;}ArEQ}NQHa9@;T73aPOSTpuh\"\", \"\"m\"\": null, \"\"o\"\": null, \"\"s\"\": \"\"b1f0e4bb3855bc33936ef1fddf532765\"\", \"\"u\"\": null, \"\"sv\"\": null, \"\"ocf\"\": null, \"\"ocv\"\": \"\"fbc7a11fc81f2a31c904c5b05572b054824e3b5f5ece78f1b711f93175f0a4a9726157cea247e107\"\"}], \"\"ocf\"\": null, \"\"ocv\"\": null}\")"); @@ -216,9 +255,7 @@ mod tests { let col = data_row.columns.first().unwrap(); let e: EqlEncrypted = col.try_into().unwrap(); - let expected = Identifier::new("encrypted", "encrypted_jsonb"); - assert_eq!(e.identifier, expected); // SELECT * FROM encrypted WHERE id = $1; @@ -226,11 +263,9 @@ mod tests { let bytes = to_message(b"D\0\0\n\x91\0\n\0\0\0\n1297231342\xff\xff\xff\xff\0\0\nY(\"{\"\"b\"\": null, \"\"c\"\": \"\"mBbJ;S^xMu@++(U20{lxK;qYYaDYF#30N~x;wyOUMoFOB9K!>A_9g9j@+M6V3wENqu#H8gDb9OZewzJaCBv4Uvy=7bie\"\", \"\"i\"\": {\"\"c\"\": \"\"encrypted_text\"\", \"\"t\"\": \"\"encrypted\"\"}, \"\"m\"\": [369, 381, 1758, 403, 35, 609, 1181, 1098, 1347, 1633, 1150, 815, 1997, 234, 1858, 656, 1335, 936, 1204, 630, 1764, 1328, 1649, 1396, 113, 1149, 1499, 1147, 586, 1942, 901, 1256, 1226, 1045, 637, 279, 1162, 1077, 1340, 1336, 1448, 700, 176, 1849, 1915, 1389, 71, 515, 633, 388, 1877, 1339, 1239, 638, 1365, 1380, 1273, 581, 1792, 1716, 145, 512, 814, 272, 1333, 1775, 1572, 1744, 2018, 433, 1641, 1529, 647, 1317, 652, 1606, 1737, 470, 826, 80, 929, 1700, 1619, 1253, 358, 1589, 1971, 1019, 1533, 1624, 573, 1684, 1287, 575, 1761, 527, 404, 1369, 894, 18, 1101, 986, 1772, 1090, 1506, 2015, 1988, 205, 141, 445, 1982], \"\"o\"\": [\"\"faa1f63cb6d36094d1aa50db6c0217eb447a987071119bb127f677b6a7ee0b4fe40eed7cd84e96e8a11bbe3ea14331f3ec4c8f149ce9d2b0253b4676c86557fcec4a5f8ca4e1ee081c66bf0a3cb594c6b5739f77f62fc5e76991869c23a97f01816cde3dfc24b2ca2fbb12b50fde324f18aa51718d681772bf9caf3c059a6748cbcaf4dd1c4fa02645d74699d7d265faf938c339f6cc8f57db9bd4cff8e03cae9e5d21a651b33525e86e335dff61520e8f23d7002f05fa186075a335fb7b2c740133b5a72760ccd216127d69983aa31a090a3b6ca56a48b6372cab60c979465d84dc94e5452c92517b643882fa82c22a26b4feaaa1b0ae8fcb989b10d0351fb3c9c5e56e719f820442612a67fff334438f3f5d35ff6db1b5f7a50670c7fec014f6fc19c352eb011911faf62a230e10c2d16f6c84b46cf9ee7eb1afb9c61a523891e31da2a18b445769d75c11873566dc8196d77e985423226bd1db10e4ce9eb10c2f69db7ce57d47281401617978d2bcfca23b9015b9e705615b8bf773daa87a18417f86e5338a7929fa4f10c6864af09870bfd9ddfb7848\"\", \"\"b41d89a196a35252a965ce3c330eac369ead56e9f06e2016da4d6971fe0b8d6e677e1018e7a1bd2fa0b2c1faaa12650d678352ecc81f6be879213fe78b8004b87dd7dcadec59df4dcafdb3c9aa55dcb2cc2bcf2193574b201c9a1c14764d69716f63b0c1aa30a2846696f2a1c790ca2cb26370d7e20904a8748ea98a95ee3cbb95c5f342de4e71bbf0262e84d59188ea72fe4449a16e7c73f88ed06b9cb724902a85d063c03e9b1a63dd18b9604625ca3cb8110d9c8f93e1771525c51b6ee092d554e84d61df5b557994f32191bb2b6801d9727fb707d5287e6c83d6b16763a6e66526baf80765a58d36df744be7872d2750eb28a86a519a21ee710f618c09cb2bd45f21e805ae4e11eb2987d7be31c32164d4f828fc35c389d516d0d6a54e25041985cffcb6124b4d3fa5b0ba91e19d60e3102370e9c1c768df1b427c682304a1dfdea2d3e514db22057f43d8121b8daf7c434831e5b618bbca9f4e198741927bdc168e4703fb1f703957f7b70491e06bec4adee19d29ef5e938695e1d49ef50ceef0a9c3e46bd8fe309e013e5ea0d35c5ebf3dddd97573\"\"], \"\"s\"\": null, \"\"u\"\": \"\"962d77dfaf892b596b3255c022359e54f3e8dc8b21c3d1b32ebd05555f433192\"\", \"\"v\"\": 1, \"\"sv\"\": null, \"\"ocf\"\": null, \"\"ocv\"\": null}\")\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"); let data_row = DataRow::try_from(&bytes).unwrap(); - // let e = DataColumn::parse(bytes).unwrap(); let col = &data_row.columns[2]; let e: EqlEncrypted = col.try_into().unwrap(); let expected = Identifier::new("encrypted", "encrypted_text"); - assert_eq!(e.identifier, expected); } From d63ab5a6c04e93a716aa66591246bab2aac0e3fe Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 13 May 2025 15:04:37 +1000 Subject: [PATCH 32/50] feat: handle null encrypted column with binary encoding --- packages/cipherstash-proxy/src/error.rs | 3 + .../src/postgresql/messages/data_row.rs | 72 +++++++++++++------ 2 files changed, 55 insertions(+), 20 deletions(-) diff --git a/packages/cipherstash-proxy/src/error.rs b/packages/cipherstash-proxy/src/error.rs index 2fcd6eea..ad3762af 100644 --- a/packages/cipherstash-proxy/src/error.rs +++ b/packages/cipherstash-proxy/src/error.rs @@ -205,6 +205,9 @@ pub enum EncryptError { #[error("Encrypted column could not be parsed")] ColumnCouldNotBeParsed, + #[error("Encrypted column is null")] + ColumnIsNull, + #[error("Column '{column}' in table '{table}' could not be deserialised. For help visit {}#encrypt-column-could-not-be-deserialised", ERROR_DOC_BASE_URL)] ColumnCouldNotBeDeserialised { table: String, column: String }, diff --git a/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs b/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs index 905b6a29..33adcfb2 100644 --- a/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs +++ b/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs @@ -32,12 +32,17 @@ impl DataRow { .and_then(|config| { data_column .try_into() - .inspect_err(|_| { - let err = EncryptError::ColumnCouldNotBeDeserialised { - table: config.identifier.table.to_owned(), - column: config.identifier.column.to_owned(), - }; - error!(target: DECRYPT, msg = err.to_string()); + .inspect_err(|err| match err { + Error::Encrypt(EncryptError::ColumnIsNull) => { + // Not an error, as you were + } + _ => { + let err = EncryptError::ColumnCouldNotBeDeserialised { + table: config.identifier.table.to_owned(), + column: config.identifier.column.to_owned(), + }; + error!(target: DECRYPT, msg = err.to_string()); + } }) .ok() }); @@ -189,7 +194,23 @@ impl TryFrom<&DataColumn> for eql::EqlEncrypted { } } else { // 12 bytes for the binary rowtype header - // 1 byte for the jsonb header (value of 1) + // plus 1 byte for the jsonb header (value of 1) + // [Int32] Number of fields (N) + // [Int32] OID of the field’s type + // [Int32] Length of the field (in bytes), or -1 for NULL + + let start = 4 + 4; + let end = 4 + 4 + 4; + + let mut len_bytes = [0u8; 4]; // Create a fixed-size array + len_bytes.copy_from_slice(&bytes[start..end]); + + let len = i32::from_be_bytes(len_bytes); + + if len == NULL { + return Err(EncryptError::ColumnIsNull.into()); + } + let start = 12 + 1; let sliced = &bytes[start..]; @@ -226,19 +247,7 @@ mod tests { } #[test] - pub fn parse_encrypted_column() { - log::init(LogConfig::with_level(LogLevel::Debug)); - use bytes::Buf; - - let bytes = to_message(b"\0\0\0\x01\0\0\x0e\xda\0\0\n0\x01"); - - let mut cursor = Cursor::new(bytes); - let a = cursor.get_i32(); - let b = cursor.get_i32(); - let c = cursor.get_i32(); - - info!("{a}, {b}, {c}"); - + pub fn parse_encrypted_column_with_binary_encoding() { // Binary // SELECT id, encrypted_text FROM encrypted WHERE id = $1 let bytes = to_message(b"D\0\0\nR\0\x02\0\0\0\x08w\xaam\xf8Y$\x9dI\0\0\n<\0\0\0\x01\0\0\x0e\xda\0\0\n0\x01{\"b\": null, \"c\": \"mBbLbP2ww9ymEpm_yfj>@=^)JCqtLxcewai)Ilzx#HbC2p3F;dB`XP9af|s-igMjdMWLYPqYWAB#2|% = col.try_into(); + + result.expect_err("Expected parsing of NULL column to fail"); + } + + #[test] + pub fn parse_encrypted_column_with_text_encoding() { // SELECT encrypted_jsonb FROM encrypted LIMIT 1 let bytes = to_message(b"D\0\0\x03\xba\0\x01\0\0\x03\xb0(\"{\"\"b\"\": null, \"\"c\"\": \"\"mBbLR(BvRN1BF^PAFs!B^`U;mA>uOUiFLgDpZXhU#s#%c4wyi&Z7`(d0IxUty-cI#Yp%o~QFF39^sRf>4*EG{zlk;}ArEQ}NQHa9@;T73aPOSTpuh\"\", \"\"i\"\": {\"\"c\"\": \"\"encrypted_jsonb\"\", \"\"t\"\": \"\"encrypted\"\"}, \"\"m\"\": null, \"\"o\"\": null, \"\"s\"\": null, \"\"u\"\": null, \"\"v\"\": 1, \"\"sv\"\": [{\"\"b\"\": \"\"8067db44a848ab32c3056a3dbe4edf16\"\", \"\"c\"\": \"\"mBbLR(BvRN1BF^PAFs!B^`U;mA>uOUiFLgDpZXhU#s#%c4wyi&Z7`(d0IxUty-cI#Yp%o~QFF39^sRf>4*EG{zlk;}ArEQ}NQHa9@;T73aPOSTpuh\"\", \"\"m\"\": null, \"\"o\"\": null, \"\"s\"\": \"\"9493d6010fe7845d52149b697729c745\"\", \"\"u\"\": null, \"\"sv\"\": null, \"\"ocf\"\": null, \"\"ocv\"\": null}, {\"\"b\"\": null, \"\"c\"\": \"\"mBbLR(BvRN1BF^PAFs!B^`U;m8QkTKr|h>Q`^NbW(CC|>SD}UM=o%mz(Fw#LQFF39^sRf>4*EG{zlk;}ArEQ}NQHa9@;T73aPOSTpuh\"\", \"\"m\"\": null, \"\"o\"\": null, \"\"s\"\": \"\"b1f0e4bb3855bc33936ef1fddf532765\"\", \"\"u\"\": null, \"\"sv\"\": null, \"\"ocf\"\": null, \"\"ocv\"\": \"\"fbc7a11fc81f2a31c904c5b05572b054824e3b5f5ece78f1b711f93175f0a4a9726157cea247e107\"\"}], \"\"ocf\"\": null, \"\"ocv\"\": null}\")"); let data_row = DataRow::try_from(&bytes).unwrap(); @@ -257,7 +286,10 @@ mod tests { let e: EqlEncrypted = col.try_into().unwrap(); let expected = Identifier::new("encrypted", "encrypted_jsonb"); assert_eq!(e.identifier, expected); + } + #[test] + pub fn parse_encrypted_column_with_text_encoding_and_null() { // SELECT * FROM encrypted WHERE id = $1; // Only encrypted_text is NOT NULL let bytes = to_message(b"D\0\0\n\x91\0\n\0\0\0\n1297231342\xff\xff\xff\xff\0\0\nY(\"{\"\"b\"\": null, \"\"c\"\": \"\"mBbJ;S^xMu@++(U20{lxK;qYYaDYF#30N~x;wyOUMoFOB9K!>A_9g9j@+M6V3wENqu#H8gDb9OZewzJaCBv4Uvy=7bie\"\", \"\"i\"\": {\"\"c\"\": \"\"encrypted_text\"\", \"\"t\"\": \"\"encrypted\"\"}, \"\"m\"\": [369, 381, 1758, 403, 35, 609, 1181, 1098, 1347, 1633, 1150, 815, 1997, 234, 1858, 656, 1335, 936, 1204, 630, 1764, 1328, 1649, 1396, 113, 1149, 1499, 1147, 586, 1942, 901, 1256, 1226, 1045, 637, 279, 1162, 1077, 1340, 1336, 1448, 700, 176, 1849, 1915, 1389, 71, 515, 633, 388, 1877, 1339, 1239, 638, 1365, 1380, 1273, 581, 1792, 1716, 145, 512, 814, 272, 1333, 1775, 1572, 1744, 2018, 433, 1641, 1529, 647, 1317, 652, 1606, 1737, 470, 826, 80, 929, 1700, 1619, 1253, 358, 1589, 1971, 1019, 1533, 1624, 573, 1684, 1287, 575, 1761, 527, 404, 1369, 894, 18, 1101, 986, 1772, 1090, 1506, 2015, 1988, 205, 141, 445, 1982], \"\"o\"\": [\"\"faa1f63cb6d36094d1aa50db6c0217eb447a987071119bb127f677b6a7ee0b4fe40eed7cd84e96e8a11bbe3ea14331f3ec4c8f149ce9d2b0253b4676c86557fcec4a5f8ca4e1ee081c66bf0a3cb594c6b5739f77f62fc5e76991869c23a97f01816cde3dfc24b2ca2fbb12b50fde324f18aa51718d681772bf9caf3c059a6748cbcaf4dd1c4fa02645d74699d7d265faf938c339f6cc8f57db9bd4cff8e03cae9e5d21a651b33525e86e335dff61520e8f23d7002f05fa186075a335fb7b2c740133b5a72760ccd216127d69983aa31a090a3b6ca56a48b6372cab60c979465d84dc94e5452c92517b643882fa82c22a26b4feaaa1b0ae8fcb989b10d0351fb3c9c5e56e719f820442612a67fff334438f3f5d35ff6db1b5f7a50670c7fec014f6fc19c352eb011911faf62a230e10c2d16f6c84b46cf9ee7eb1afb9c61a523891e31da2a18b445769d75c11873566dc8196d77e985423226bd1db10e4ce9eb10c2f69db7ce57d47281401617978d2bcfca23b9015b9e705615b8bf773daa87a18417f86e5338a7929fa4f10c6864af09870bfd9ddfb7848\"\", \"\"b41d89a196a35252a965ce3c330eac369ead56e9f06e2016da4d6971fe0b8d6e677e1018e7a1bd2fa0b2c1faaa12650d678352ecc81f6be879213fe78b8004b87dd7dcadec59df4dcafdb3c9aa55dcb2cc2bcf2193574b201c9a1c14764d69716f63b0c1aa30a2846696f2a1c790ca2cb26370d7e20904a8748ea98a95ee3cbb95c5f342de4e71bbf0262e84d59188ea72fe4449a16e7c73f88ed06b9cb724902a85d063c03e9b1a63dd18b9604625ca3cb8110d9c8f93e1771525c51b6ee092d554e84d61df5b557994f32191bb2b6801d9727fb707d5287e6c83d6b16763a6e66526baf80765a58d36df744be7872d2750eb28a86a519a21ee710f618c09cb2bd45f21e805ae4e11eb2987d7be31c32164d4f828fc35c389d516d0d6a54e25041985cffcb6124b4d3fa5b0ba91e19d60e3102370e9c1c768df1b427c682304a1dfdea2d3e514db22057f43d8121b8daf7c434831e5b618bbca9f4e198741927bdc168e4703fb1f703957f7b70491e06bec4adee19d29ef5e938695e1d49ef50ceef0a9c3e46bd8fe309e013e5ea0d35c5ebf3dddd97573\"\"], \"\"s\"\": null, \"\"u\"\": \"\"962d77dfaf892b596b3255c022359e54f3e8dc8b21c3d1b32ebd05555f433192\"\", \"\"v\"\": 1, \"\"sv\"\": null, \"\"ocf\"\": null, \"\"ocv\"\": null}\")\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"); From 497a40b764ab6fbca65a1a531c4c0220fec06902 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 13 May 2025 20:40:12 +1000 Subject: [PATCH 33/50] fix: handle datarow with binary encoding and null --- .../src/extended_protocol_error_messages.rs | 2 +- .../src/postgresql/backend.rs | 4 +- .../src/postgresql/messages/data_row.rs | 143 +++++++++++++----- 3 files changed, 111 insertions(+), 38 deletions(-) diff --git a/packages/cipherstash-proxy-integration/src/extended_protocol_error_messages.rs b/packages/cipherstash-proxy-integration/src/extended_protocol_error_messages.rs index 37ac99ea..92ef24cb 100644 --- a/packages/cipherstash-proxy-integration/src/extended_protocol_error_messages.rs +++ b/packages/cipherstash-proxy-integration/src/extended_protocol_error_messages.rs @@ -70,7 +70,7 @@ mod tests { // returns eql_v1_encrypted and the client cannot convert to a string. // If mapping errors are enabled (enable_mapping_errors or CS_DEVELOPMENT__ENABLE_MAPPING_ERRORS), // then Proxy will return an error that says "Column X in table Y has no Encrypt configuration" - assert_eq!(msg, "error serializing parameter 1: cannot convert between the Rust type `&str` and the Postgres type `eql_v1_encrypted`"); + assert_eq!(msg, "error serializing parameter 1: cannot convert between the Rust type `&str` and the Postgres type `jsonb`"); } else { unreachable!(); } diff --git a/packages/cipherstash-proxy/src/postgresql/backend.rs b/packages/cipherstash-proxy/src/postgresql/backend.rs index 893addb1..1e162b6c 100644 --- a/packages/cipherstash-proxy/src/postgresql/backend.rs +++ b/packages/cipherstash-proxy/src/postgresql/backend.rs @@ -246,7 +246,7 @@ where } }; - let rows: Vec = self.buffer.drain().into_iter().collect(); + let mut rows: Vec = self.buffer.drain().into_iter().collect(); debug!(target: DEVELOPMENT, client_id = self.context.client_id, rows = rows.len()); let result_column_count = match rows.first() { @@ -264,7 +264,7 @@ where // Each row is converted into Vec> let ciphertexts: Vec> = rows - .iter() + .iter_mut() .flat_map(|row| row.to_ciphertext(projection_columns)) .collect::>(); diff --git a/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs b/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs index 33adcfb2..9d5c6709 100644 --- a/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs +++ b/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs @@ -21,11 +21,11 @@ pub struct DataColumn { impl DataRow { pub fn to_ciphertext( - &self, + &mut self, column_configuration: &Vec>, ) -> Vec> { let mut result = vec![]; - for (data_column, column_config) in self.columns.iter().zip(column_configuration) { + for (data_column, column_config) in self.columns.iter_mut().zip(column_configuration) { let encrypted = column_config .as_ref() .filter(|_| data_column.is_not_null()) @@ -35,6 +35,7 @@ impl DataRow { .inspect_err(|err| match err { Error::Encrypt(EncryptError::ColumnIsNull) => { // Not an error, as you were + data_column.set_null(); } _ => { let err = EncryptError::ColumnCouldNotBeDeserialised { @@ -80,6 +81,10 @@ impl DataColumn { self.bytes.is_some() } + pub fn set_null(&mut self) { + self.bytes = None; + } + pub fn rewrite(&mut self, b: &[u8]) { if let Some(ref mut bytes) = self.bytes { bytes.clear(); @@ -169,10 +174,10 @@ impl TryFrom for BytesMut { } } -impl TryFrom<&DataColumn> for eql::EqlEncrypted { +impl TryFrom<&mut DataColumn> for eql::EqlEncrypted { type Error = Error; - fn try_from(col: &DataColumn) -> Result { + fn try_from(col: &mut DataColumn) -> Result { if let Some(bytes) = &col.bytes { if &bytes[0..=1] == b"(\"" { // Text encoding @@ -230,75 +235,143 @@ impl TryFrom<&DataColumn> for eql::EqlEncrypted { #[cfg(test)] mod tests { - use std::io::Cursor; - use super::DataRow; + use crate::Identifier; use crate::{ config::{LogConfig, LogLevel}, log, - postgresql::messages::data_row::DataColumn, + postgresql::{data, messages::data_row::DataColumn, Column}, }; - use crate::{EqlEncrypted, Identifier}; use bytes::BytesMut; - use tracing::info; + use cipherstash_client::schema::{ColumnConfig, ColumnType}; fn to_message(s: &[u8]) -> BytesMut { BytesMut::from(s) } + fn column_config(column: &str) -> Option { + let identifier = Identifier::new("encrypted", column); + let config = ColumnConfig::build("column".to_string()).casts_as(ColumnType::SmallInt); + let column = Column::new(identifier, config); + Some(column) + } + + fn column_config_with_id(column: &str) -> Vec> { + vec![None, column_config(column)] + } + #[test] - pub fn parse_encrypted_column_with_binary_encoding() { - // Binary + pub fn to_ciphertext_with_binary_encoding() { + log::init(LogConfig::with_level(LogLevel::Debug)); + + // Binary // SELECT id, encrypted_text FROM encrypted WHERE id = $1 let bytes = to_message(b"D\0\0\nR\0\x02\0\0\0\x08w\xaam\xf8Y$\x9dI\0\0\n<\0\0\0\x01\0\0\x0e\xda\0\0\n0\x01{\"b\": null, \"c\": \"mBbLbP2ww9ymEpm_yfj>@=^)JCqtLxcewai)Ilzx#HbC2p3F;dB`XP9af|s-igMjdMWLYPqYWAB#2|% = col.try_into(); + // let bytes = to_message(b"D\0\0\0\"\0\x02\0\0\0\x089\"\x88A\xe59\xb0\x13\0\0\0\x0c\0\0\0\x01\0\0\x0e\xda\xff\xff\xff\xff"); + let bytes = to_message(b"D\0\0\0\"\0\x02\0\0\0\x08>\xe6=uOUiFLgDpZXhU#s#%c4wyi&Z7`(d0IxUty-cI#Yp%o~QFF39^sRf>4*EG{zlk;}ArEQ}NQHa9@;T73aPOSTpuh\"\", \"\"i\"\": {\"\"c\"\": \"\"encrypted_jsonb\"\", \"\"t\"\": \"\"encrypted\"\"}, \"\"m\"\": null, \"\"o\"\": null, \"\"s\"\": null, \"\"u\"\": null, \"\"v\"\": 1, \"\"sv\"\": [{\"\"b\"\": \"\"8067db44a848ab32c3056a3dbe4edf16\"\", \"\"c\"\": \"\"mBbLR(BvRN1BF^PAFs!B^`U;mA>uOUiFLgDpZXhU#s#%c4wyi&Z7`(d0IxUty-cI#Yp%o~QFF39^sRf>4*EG{zlk;}ArEQ}NQHa9@;T73aPOSTpuh\"\", \"\"m\"\": null, \"\"o\"\": null, \"\"s\"\": \"\"9493d6010fe7845d52149b697729c745\"\", \"\"u\"\": null, \"\"sv\"\": null, \"\"ocf\"\": null, \"\"ocv\"\": null}, {\"\"b\"\": null, \"\"c\"\": \"\"mBbLR(BvRN1BF^PAFs!B^`U;m8QkTKr|h>Q`^NbW(CC|>SD}UM=o%mz(Fw#LQFF39^sRf>4*EG{zlk;}ArEQ}NQHa9@;T73aPOSTpuh\"\", \"\"m\"\": null, \"\"o\"\": null, \"\"s\"\": \"\"b1f0e4bb3855bc33936ef1fddf532765\"\", \"\"u\"\": null, \"\"sv\"\": null, \"\"ocf\"\": null, \"\"ocv\"\": \"\"fbc7a11fc81f2a31c904c5b05572b054824e3b5f5ece78f1b711f93175f0a4a9726157cea247e107\"\"}], \"\"ocf\"\": null, \"\"ocv\"\": null}\")"); - let data_row = DataRow::try_from(&bytes).unwrap(); + let mut data_row = DataRow::try_from(&bytes).unwrap(); - let col = data_row.columns.first().unwrap(); - let e: EqlEncrypted = col.try_into().unwrap(); - let expected = Identifier::new("encrypted", "encrypted_jsonb"); - assert_eq!(e.identifier, expected); + assert!(data_row.columns[0].bytes.is_some()); + + let column_config = vec![column_config("encrypted_jsonb")]; + let encrypted = data_row.to_ciphertext(&column_config); + + assert_eq!(encrypted.len(), 1); + assert!(encrypted[0].is_some()); + + assert_eq!( + column_config[0].as_ref().unwrap().identifier, + encrypted[0].as_ref().unwrap().identifier + ); } #[test] - pub fn parse_encrypted_column_with_text_encoding_and_null() { + pub fn to_ciphertext_with_text_encoding_and_null() { + log::init(LogConfig::with_level(LogLevel::Debug)); + // SELECT * FROM encrypted WHERE id = $1; // Only encrypted_text is NOT NULL let bytes = to_message(b"D\0\0\n\x91\0\n\0\0\0\n1297231342\xff\xff\xff\xff\0\0\nY(\"{\"\"b\"\": null, \"\"c\"\": \"\"mBbJ;S^xMu@++(U20{lxK;qYYaDYF#30N~x;wyOUMoFOB9K!>A_9g9j@+M6V3wENqu#H8gDb9OZewzJaCBv4Uvy=7bie\"\", \"\"i\"\": {\"\"c\"\": \"\"encrypted_text\"\", \"\"t\"\": \"\"encrypted\"\"}, \"\"m\"\": [369, 381, 1758, 403, 35, 609, 1181, 1098, 1347, 1633, 1150, 815, 1997, 234, 1858, 656, 1335, 936, 1204, 630, 1764, 1328, 1649, 1396, 113, 1149, 1499, 1147, 586, 1942, 901, 1256, 1226, 1045, 637, 279, 1162, 1077, 1340, 1336, 1448, 700, 176, 1849, 1915, 1389, 71, 515, 633, 388, 1877, 1339, 1239, 638, 1365, 1380, 1273, 581, 1792, 1716, 145, 512, 814, 272, 1333, 1775, 1572, 1744, 2018, 433, 1641, 1529, 647, 1317, 652, 1606, 1737, 470, 826, 80, 929, 1700, 1619, 1253, 358, 1589, 1971, 1019, 1533, 1624, 573, 1684, 1287, 575, 1761, 527, 404, 1369, 894, 18, 1101, 986, 1772, 1090, 1506, 2015, 1988, 205, 141, 445, 1982], \"\"o\"\": [\"\"faa1f63cb6d36094d1aa50db6c0217eb447a987071119bb127f677b6a7ee0b4fe40eed7cd84e96e8a11bbe3ea14331f3ec4c8f149ce9d2b0253b4676c86557fcec4a5f8ca4e1ee081c66bf0a3cb594c6b5739f77f62fc5e76991869c23a97f01816cde3dfc24b2ca2fbb12b50fde324f18aa51718d681772bf9caf3c059a6748cbcaf4dd1c4fa02645d74699d7d265faf938c339f6cc8f57db9bd4cff8e03cae9e5d21a651b33525e86e335dff61520e8f23d7002f05fa186075a335fb7b2c740133b5a72760ccd216127d69983aa31a090a3b6ca56a48b6372cab60c979465d84dc94e5452c92517b643882fa82c22a26b4feaaa1b0ae8fcb989b10d0351fb3c9c5e56e719f820442612a67fff334438f3f5d35ff6db1b5f7a50670c7fec014f6fc19c352eb011911faf62a230e10c2d16f6c84b46cf9ee7eb1afb9c61a523891e31da2a18b445769d75c11873566dc8196d77e985423226bd1db10e4ce9eb10c2f69db7ce57d47281401617978d2bcfca23b9015b9e705615b8bf773daa87a18417f86e5338a7929fa4f10c6864af09870bfd9ddfb7848\"\", \"\"b41d89a196a35252a965ce3c330eac369ead56e9f06e2016da4d6971fe0b8d6e677e1018e7a1bd2fa0b2c1faaa12650d678352ecc81f6be879213fe78b8004b87dd7dcadec59df4dcafdb3c9aa55dcb2cc2bcf2193574b201c9a1c14764d69716f63b0c1aa30a2846696f2a1c790ca2cb26370d7e20904a8748ea98a95ee3cbb95c5f342de4e71bbf0262e84d59188ea72fe4449a16e7c73f88ed06b9cb724902a85d063c03e9b1a63dd18b9604625ca3cb8110d9c8f93e1771525c51b6ee092d554e84d61df5b557994f32191bb2b6801d9727fb707d5287e6c83d6b16763a6e66526baf80765a58d36df744be7872d2750eb28a86a519a21ee710f618c09cb2bd45f21e805ae4e11eb2987d7be31c32164d4f828fc35c389d516d0d6a54e25041985cffcb6124b4d3fa5b0ba91e19d60e3102370e9c1c768df1b427c682304a1dfdea2d3e514db22057f43d8121b8daf7c434831e5b618bbca9f4e198741927bdc168e4703fb1f703957f7b70491e06bec4adee19d29ef5e938695e1d49ef50ceef0a9c3e46bd8fe309e013e5ea0d35c5ebf3dddd97573\"\"], \"\"s\"\": null, \"\"u\"\": \"\"962d77dfaf892b596b3255c022359e54f3e8dc8b21c3d1b32ebd05555f433192\"\", \"\"v\"\": 1, \"\"sv\"\": null, \"\"ocf\"\": null, \"\"ocv\"\": null}\")\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"); - let data_row = DataRow::try_from(&bytes).unwrap(); - let col = &data_row.columns[2]; - let e: EqlEncrypted = col.try_into().unwrap(); - let expected = Identifier::new("encrypted", "encrypted_text"); - assert_eq!(e.identifier, expected); + let mut data_row = DataRow::try_from(&bytes).unwrap(); + + assert!(data_row.columns[0].bytes.is_some()); + + let column_config = vec![ + None, + None, + column_config("encrypted_text"), + column_config("encrypted_bool"), + column_config("encrypted_int2"), + column_config("encrypted_int4"), + column_config("encrypted_int8"), + column_config("encrypted_float8"), + column_config("encrypted_date"), + column_config("encrypted_jsonb"), + ]; + + let encrypted = data_row.to_ciphertext(&column_config); + + assert_eq!(encrypted.len(), 10); + + assert!(encrypted[0].is_none()); + assert!(encrypted[1].is_none()); + assert!(encrypted[2].is_some()); // <-- Some + assert!(encrypted[3].is_none()); + // etc + + assert_eq!( + column_config[2].as_ref().unwrap().identifier, + encrypted[2].as_ref().unwrap().identifier + ); } #[test] From 8aef2828103dd67e583fa18370e0fb39210131c2 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 14 May 2025 09:22:58 +1000 Subject: [PATCH 34/50] fix: cast literals and params as encrypted --- packages/eql-mapper/src/lib.rs | 18 +++++----- ...erals.rs => cast_literals_as_encrypted.rs} | 36 ++++--------------- ..._in_row.rs => cast_params_as_encrypted.rs} | 21 +++++------ .../src/transformation_rules/helpers.rs | 36 +++++++++---------- .../src/transformation_rules/mod.rs | 8 ++--- .../eql-mapper/src/type_checked_statement.rs | 10 +++--- 6 files changed, 49 insertions(+), 80 deletions(-) rename packages/eql-mapper/src/transformation_rules/{replace_plaintext_eql_literals.rs => cast_literals_as_encrypted.rs} (57%) rename packages/eql-mapper/src/transformation_rules/{wrap_eql_params_in_row.rs => cast_params_as_encrypted.rs} (85%) diff --git a/packages/eql-mapper/src/lib.rs b/packages/eql-mapper/src/lib.rs index e9988c4b..d9fa8d4b 100644 --- a/packages/eql-mapper/src/lib.rs +++ b/packages/eql-mapper/src/lib.rs @@ -983,7 +983,7 @@ mod test { } #[test] - fn select_with_literal_subsitution() { + fn select_with_literal_cast_as_encrypted() { // init_tracing(); let schema = resolver(schema! { @@ -1026,14 +1026,14 @@ mod test { )])) { Ok(transformed_statement) => assert_eq!( transformed_statement.to_string(), - "SELECT * FROM employees WHERE salary > ROW('ENCRYPTED'::JSONB)" + "SELECT * FROM employees WHERE salary > 'ENCRYPTED'::JSONB::eql_v1_encrypted" ), Err(err) => panic!("statement transformation failed: {}", err), }; } #[test] - fn insert_with_literal_subsitution() { + fn insert_with_literal_cast_as_encrypted() { // init_tracing(); let schema = resolver(schema! { @@ -1073,7 +1073,7 @@ mod test { )])) { Ok(transformed_statement) => assert_eq!( transformed_statement.to_string(), - "INSERT INTO employees (salary) VALUES (ROW('ENCRYPTED'::JSONB))" + "INSERT INTO employees (salary) VALUES ('ENCRYPTED'::JSONB::eql_v1_encrypted)" ), Err(err) => panic!("statement transformation failed: {}", err), }; @@ -1392,7 +1392,7 @@ mod test { } #[test] - fn eql_params_are_wrapped_in_row() { + fn select_with_params_cast_as_encrypted() { // init_tracing(); let schema = resolver(schema! { tables: { @@ -1415,7 +1415,7 @@ mod test { Ok(statement) => { assert_eq!( statement.to_string(), - "SELECT * FROM employees WHERE eql_col = ROW($1::JSONB) AND native_col = $2" + "SELECT * FROM employees WHERE eql_col = $1::JSONB::eql_v1_encrypted AND native_col = $2" ); } Err(err) => panic!("transformation failed: {err}"), @@ -1450,7 +1450,7 @@ mod test { Ok(statement) => { assert_eq!( statement.to_string(), - "SELECT eql_v1.jsonb_path_query(eql_col, ROW(''::JSONB)), jsonb_path_query(native_col, '$.not-secret') FROM employees" + "SELECT eql_v1.jsonb_path_query(eql_col, ''::JSONB::eql_v1_encrypted), jsonb_path_query(native_col, '$.not-secret') FROM employees" ); } Err(err) => panic!("transformation failed: {err}"), @@ -1567,7 +1567,7 @@ mod test { .map(|expr| match expr { ast::Expr::Identifier(ident) => ident.to_string(), ast::Expr::Value(ast::Value::SingleQuotedString(s)) => { - format!("ROW(''::JSONB)", s) + format!("''::JSONB::eql_v1_encrypted", s) } _ => panic!("unsupported expr type in test util"), }) @@ -1623,7 +1623,7 @@ mod test { match typed.transform(test_helpers::dummy_encrypted_json_selector(&statement, ast::Value::SingleQuotedString("medications".to_owned()))) { Ok(statement) => assert_eq!( statement.to_string(), - format!("SELECT id, notes {} ROW(''::JSONB) AS meds FROM patients", op) + format!("SELECT id, notes {} ''::JSONB::eql_v1_encrypted AS meds FROM patients", op) ), Err(err) => panic!("transformation failed: {err}"), } diff --git a/packages/eql-mapper/src/transformation_rules/replace_plaintext_eql_literals.rs b/packages/eql-mapper/src/transformation_rules/cast_literals_as_encrypted.rs similarity index 57% rename from packages/eql-mapper/src/transformation_rules/replace_plaintext_eql_literals.rs rename to packages/eql-mapper/src/transformation_rules/cast_literals_as_encrypted.rs index a153b115..af195130 100644 --- a/packages/eql-mapper/src/transformation_rules/replace_plaintext_eql_literals.rs +++ b/packages/eql-mapper/src/transformation_rules/cast_literals_as_encrypted.rs @@ -1,27 +1,25 @@ use std::{any::type_name, collections::HashMap}; -use sqltk::parser::ast::{ - CastKind, DataType, Expr, Function, FunctionArg, FunctionArgExpr, FunctionArgumentList, - FunctionArguments, Ident, ObjectName, Value, -}; +use sqltk::parser::ast::{Expr, Value}; use sqltk::{NodeKey, NodePath, Visitable}; use crate::EqlMapperError; +use super::helpers::cast_as_encrypted; use super::TransformationRule; #[derive(Debug)] -pub struct ReplacePlaintextEqlLiterals<'ast> { +pub struct CastLiteralsAsEncrypted<'ast> { encrypted_literals: HashMap, Value>, } -impl<'ast> ReplacePlaintextEqlLiterals<'ast> { +impl<'ast> CastLiteralsAsEncrypted<'ast> { pub fn new(encrypted_literals: HashMap, Value>) -> Self { Self { encrypted_literals } } } -impl<'ast> TransformationRule<'ast> for ReplacePlaintextEqlLiterals<'ast> { +impl<'ast> TransformationRule<'ast> for CastLiteralsAsEncrypted<'ast> { fn apply( &mut self, node_path: &NodePath<'ast>, @@ -31,7 +29,7 @@ impl<'ast> TransformationRule<'ast> for ReplacePlaintextEqlLiterals<'ast> { if let Some((Expr::Value(value),)) = node_path.last_1_as::() { if let Some(replacement) = self.encrypted_literals.remove(&NodeKey::new(value)) { let target_node = target_node.downcast_mut::().unwrap(); - *target_node = make_row_expression(replacement); + *target_node = cast_as_encrypted(replacement); return Ok(true); } } @@ -58,25 +56,3 @@ impl<'ast> TransformationRule<'ast> for ReplacePlaintextEqlLiterals<'ast> { } } } - -fn make_row_expression(replacement: Value) -> Expr { - Expr::Function(Function { - name: ObjectName(vec![Ident::new("ROW")]), - uses_odbc_syntax: false, - parameters: FunctionArguments::None, - args: FunctionArguments::List(FunctionArgumentList { - duplicate_treatment: None, - clauses: vec![], - args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Cast { - kind: CastKind::DoubleColon, - expr: Box::new(Expr::Value(replacement)), - data_type: DataType::JSONB, - format: None, - }))], - }), - filter: None, - null_treatment: None, - over: None, - within_group: vec![], - }) -} diff --git a/packages/eql-mapper/src/transformation_rules/wrap_eql_params_in_row.rs b/packages/eql-mapper/src/transformation_rules/cast_params_as_encrypted.rs similarity index 85% rename from packages/eql-mapper/src/transformation_rules/wrap_eql_params_in_row.rs rename to packages/eql-mapper/src/transformation_rules/cast_params_as_encrypted.rs index 514ddf8c..ee89fa6e 100644 --- a/packages/eql-mapper/src/transformation_rules/wrap_eql_params_in_row.rs +++ b/packages/eql-mapper/src/transformation_rules/cast_params_as_encrypted.rs @@ -1,26 +1,23 @@ -use std::collections::HashMap; -use std::sync::Arc; - +use super::helpers::cast_as_encrypted; +use super::TransformationRule; +use crate::{EqlMapperError, Type}; use sqltk::parser::ast::{Expr, Value}; use sqltk::{NodeKey, NodePath, Visitable}; - -use crate::{EqlMapperError, Type}; - -use super::helpers::make_row_expression; -use super::TransformationRule; +use std::collections::HashMap; +use std::sync::Arc; #[derive(Debug)] -pub struct WrapEqlParamsInRow<'ast> { +pub struct CastParamsAsEncrypted<'ast> { node_types: Arc, Type>>, } -impl<'ast> WrapEqlParamsInRow<'ast> { +impl<'ast> CastParamsAsEncrypted<'ast> { pub fn new(node_types: Arc, Type>>) -> Self { Self { node_types } } } -impl<'ast> TransformationRule<'ast> for WrapEqlParamsInRow<'ast> { +impl<'ast> TransformationRule<'ast> for CastParamsAsEncrypted<'ast> { fn apply( &mut self, node_path: &NodePath<'ast>, @@ -33,7 +30,7 @@ impl<'ast> TransformationRule<'ast> for WrapEqlParamsInRow<'ast> { unreachable!("the Expr is known to be Expr::Value(Value::Placeholder(_))") }; - *expr = make_row_expression(value); + *expr = cast_as_encrypted(value); return Ok(true); } } diff --git a/packages/eql-mapper/src/transformation_rules/helpers.rs b/packages/eql-mapper/src/transformation_rules/helpers.rs index 675e9a2d..a4dc965a 100644 --- a/packages/eql-mapper/src/transformation_rules/helpers.rs +++ b/packages/eql-mapper/src/transformation_rules/helpers.rs @@ -48,26 +48,22 @@ pub(crate) fn wrap_in_1_arg_function(expr: Expr, name: ObjectName) -> Expr { }) } -pub(crate) fn make_row_expression(wrapped: sqltk::parser::ast::Value) -> Expr { - Expr::Function(Function { - name: ObjectName(vec![Ident::new("ROW")]), - uses_odbc_syntax: false, - parameters: FunctionArguments::None, - args: FunctionArguments::List(FunctionArgumentList { - duplicate_treatment: None, - clauses: vec![], - args: vec![FunctionArg::Unnamed(FunctionArgExpr::Expr(Expr::Cast { - kind: CastKind::DoubleColon, - expr: Box::new(Expr::Value(wrapped)), - data_type: DataType::JSONB, - format: None, - }))], - }), - filter: None, - null_treatment: None, - over: None, - within_group: vec![], - }) +pub(crate) fn cast_as_encrypted(wrapped: sqltk::parser::ast::Value) -> Expr { + let cast_jsonb = Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(Expr::Value(wrapped)), + data_type: DataType::JSONB, + format: None, + }; + + let encrypted_type = ObjectName(vec![Ident::new("eql_v1_encrypted")]); + + Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(cast_jsonb), + data_type: DataType::Custom(encrypted_type, vec![]), + format: None, + } } struct ContainsExprWithType<'ast, 't> { diff --git a/packages/eql-mapper/src/transformation_rules/mod.rs b/packages/eql-mapper/src/transformation_rules/mod.rs index 7054e707..33964e9d 100644 --- a/packages/eql-mapper/src/transformation_rules/mod.rs +++ b/packages/eql-mapper/src/transformation_rules/mod.rs @@ -11,24 +11,24 @@ mod helpers; +mod cast_literals_as_encrypted; +mod cast_params_as_encrypted; mod fail_on_placeholder_change; mod group_by_eql_col; mod preserve_effective_aliases; -mod replace_plaintext_eql_literals; mod rewrite_standard_sql_fns_on_eql_types; mod wrap_eql_cols_in_order_by_with_ore_fn; -mod wrap_eql_params_in_row; mod wrap_grouped_eql_col_in_aggregate_fn; use std::marker::PhantomData; +pub(crate) use cast_literals_as_encrypted::*; +pub(crate) use cast_params_as_encrypted::*; pub(crate) use fail_on_placeholder_change::*; pub(crate) use group_by_eql_col::*; pub(crate) use preserve_effective_aliases::*; -pub(crate) use replace_plaintext_eql_literals::*; pub(crate) use rewrite_standard_sql_fns_on_eql_types::*; pub(crate) use wrap_eql_cols_in_order_by_with_ore_fn::*; -pub(crate) use wrap_eql_params_in_row::*; pub(crate) use wrap_grouped_eql_col_in_aggregate_fn::*; use crate::EqlMapperError; diff --git a/packages/eql-mapper/src/type_checked_statement.rs b/packages/eql-mapper/src/type_checked_statement.rs index 8c0be314..425390cf 100644 --- a/packages/eql-mapper/src/type_checked_statement.rs +++ b/packages/eql-mapper/src/type_checked_statement.rs @@ -4,10 +4,10 @@ use sqltk::parser::ast::{self, Statement}; use sqltk::{AsNodeKey, NodeKey, Transformable}; use crate::{ - DryRunnable, EqlMapperError, EqlValue, FailOnPlaceholderChange, GroupByEqlCol, Param, - PreserveEffectiveAliases, Projection, ReplacePlaintextEqlLiterals, + CastLiteralsAsEncrypted, CastParamsAsEncrypted, DryRunnable, EqlMapperError, EqlValue, + FailOnPlaceholderChange, GroupByEqlCol, Param, PreserveEffectiveAliases, Projection, RewriteStandardSqlFnsOnEqlTypes, TransformationRule, Type, Value, - WrapEqlColsInOrderByWithOreFn, WrapEqlParamsInRow, WrapGroupedEqlColInAggregateFn, + WrapEqlColsInOrderByWithOreFn, WrapGroupedEqlColInAggregateFn, }; /// A `TypeCheckedStatement` is returned from a successful call to [`crate::type_check`]. @@ -145,9 +145,9 @@ impl<'ast> TypeCheckedStatement<'ast> { GroupByEqlCol::new(Arc::clone(&self.node_types)), WrapEqlColsInOrderByWithOreFn::new(Arc::clone(&self.node_types)), PreserveEffectiveAliases, - ReplacePlaintextEqlLiterals::new(encrypted_literals), + CastLiteralsAsEncrypted::new(encrypted_literals), FailOnPlaceholderChange::new(), - WrapEqlParamsInRow::new(Arc::clone(&self.node_types)), + CastParamsAsEncrypted::new(Arc::clone(&self.node_types)), )) } } From 8f53a362e97f309eca313534c0e5105932993d08 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 14 May 2025 10:23:39 +1000 Subject: [PATCH 35/50] fix: skip seralizing unconfigured index terms --- packages/cipherstash-proxy/src/encrypt/mod.rs | 1 - packages/cipherstash-proxy/src/eql/mod.rs | 16 ++++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/packages/cipherstash-proxy/src/encrypt/mod.rs b/packages/cipherstash-proxy/src/encrypt/mod.rs index 681e4b78..386fa8d4 100644 --- a/packages/cipherstash-proxy/src/encrypt/mod.rs +++ b/packages/cipherstash-proxy/src/encrypt/mod.rs @@ -60,7 +60,6 @@ impl Encrypt { let rows = client .query("SELECT eql_v1.version() AS version;", &[]) .await; - // let rows = client.query("SELECT 'WAT' AS version;", &[]).await; match rows { Ok(rows) => rows.first().map(|row| row.get("version")), diff --git a/packages/cipherstash-proxy/src/eql/mod.rs b/packages/cipherstash-proxy/src/eql/mod.rs index 03ec0141..a896be28 100644 --- a/packages/cipherstash-proxy/src/eql/mod.rs +++ b/packages/cipherstash-proxy/src/eql/mod.rs @@ -84,25 +84,25 @@ pub struct EqlEncryptedBody { #[derive(Debug, Deserialize, Serialize, Default)] pub struct EqlEncryptedIndexes { - #[serde(rename = "o")] + #[serde(rename = "o", skip_serializing_if = "Option::is_none")] pub(crate) ore_index: Option>, - #[serde(rename = "m")] + #[serde(rename = "m", skip_serializing_if = "Option::is_none")] pub(crate) match_index: Option>, - #[serde(rename = "u")] + #[serde(rename = "u", skip_serializing_if = "Option::is_none")] pub(crate) unique_index: Option, - #[serde(rename = "s")] + #[serde(rename = "s", skip_serializing_if = "Option::is_none")] pub(crate) selector: Option, - #[serde(rename = "b")] + #[serde(rename = "b", skip_serializing_if = "Option::is_none")] pub(crate) blake3_index: Option, - #[serde(rename = "ocf")] + #[serde(rename = "ocf", skip_serializing_if = "Option::is_none")] pub(crate) ore_cclw_fixed_index: Option, - #[serde(rename = "ocv")] + #[serde(rename = "ocv", skip_serializing_if = "Option::is_none")] pub(crate) ore_cclw_var_index: Option, - #[serde(rename = "sv")] + #[serde(rename = "sv", skip_serializing_if = "Option::is_none")] pub(crate) ste_vec_index: Option>, } From 00d85dbae57734853cf90ad84e88418a87beb361 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 14 May 2025 10:29:30 +1000 Subject: [PATCH 36/50] fix: ensure data for update query --- .../cipherstash-proxy-integration/src/map_concat.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/cipherstash-proxy-integration/src/map_concat.rs b/packages/cipherstash-proxy-integration/src/map_concat.rs index 04005227..90abd9e4 100644 --- a/packages/cipherstash-proxy-integration/src/map_concat.rs +++ b/packages/cipherstash-proxy-integration/src/map_concat.rs @@ -1,11 +1,19 @@ #[cfg(test)] mod tests { - use crate::common::{connect_with_tls, PROXY}; + use crate::common::{clear, connect_with_tls, id, PROXY}; #[tokio::test] async fn map_concat_regression() { let client = connect_with_tls(PROXY).await; + clear().await; + + let id = id(); + let encrypted_text = "hello@cipherstash.com"; + + let sql = "INSERT INTO encrypted (id, encrypted_text) VALUES ($1, $2)"; + client.query(sql, &[&id, &encrypted_text]).await.unwrap(); + let sql = "UPDATE encrypted SET encrypted_text = encrypted_text || 'suffix';"; client From f5be624021b3fe1876c23ff357f2d267c59c929d Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 14 May 2025 10:34:32 +1000 Subject: [PATCH 37/50] chore: clippy tweaks --- packages/cipherstash-proxy/src/postgresql/backend.rs | 2 +- .../src/postgresql/messages/data_row.rs | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/cipherstash-proxy/src/postgresql/backend.rs b/packages/cipherstash-proxy/src/postgresql/backend.rs index 1e162b6c..8722b765 100644 --- a/packages/cipherstash-proxy/src/postgresql/backend.rs +++ b/packages/cipherstash-proxy/src/postgresql/backend.rs @@ -265,7 +265,7 @@ where // Each row is converted into Vec> let ciphertexts: Vec> = rows .iter_mut() - .flat_map(|row| row.to_ciphertext(projection_columns)) + .flat_map(|row| row.as_ciphertext(projection_columns)) .collect::>(); let start = Instant::now(); diff --git a/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs b/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs index 9d5c6709..5190d911 100644 --- a/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs +++ b/packages/cipherstash-proxy/src/postgresql/messages/data_row.rs @@ -20,7 +20,7 @@ pub struct DataColumn { } impl DataRow { - pub fn to_ciphertext( + pub fn as_ciphertext( &mut self, column_configuration: &Vec>, ) -> Vec> { @@ -240,7 +240,7 @@ mod tests { use crate::{ config::{LogConfig, LogLevel}, log, - postgresql::{data, messages::data_row::DataColumn, Column}, + postgresql::{messages::data_row::DataColumn, Column}, }; use bytes::BytesMut; use cipherstash_client::schema::{ColumnConfig, ColumnType}; @@ -270,7 +270,7 @@ mod tests { let mut data_row = DataRow::try_from(&bytes).unwrap(); let column_config = column_config_with_id("encrypted_text"); - let encrypted = data_row.to_ciphertext(&column_config); + let encrypted = data_row.as_ciphertext(&column_config); assert_eq!(encrypted.len(), 2); @@ -299,7 +299,7 @@ mod tests { assert!(data_row.columns[1].bytes.is_some()); let column_config = column_config_with_id("encrypted_text"); - let encrypted = data_row.to_ciphertext(&column_config); + let encrypted = data_row.as_ciphertext(&column_config); assert_eq!(encrypted.len(), 2); @@ -322,7 +322,7 @@ mod tests { assert!(data_row.columns[0].bytes.is_some()); let column_config = vec![column_config("encrypted_jsonb")]; - let encrypted = data_row.to_ciphertext(&column_config); + let encrypted = data_row.as_ciphertext(&column_config); assert_eq!(encrypted.len(), 1); assert!(encrypted[0].is_some()); @@ -358,7 +358,7 @@ mod tests { column_config("encrypted_jsonb"), ]; - let encrypted = data_row.to_ciphertext(&column_config); + let encrypted = data_row.as_ciphertext(&column_config); assert_eq!(encrypted.len(), 10); From 709124a2854d55d44795c74b766bf6ffd513917a Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 14 May 2025 14:39:41 +1000 Subject: [PATCH 38/50] feat: encrypted columns without config are always an error --- .../src/extended_protocol_error_messages.rs | 6 +----- .../cipherstash-proxy/src/postgresql/frontend.rs | 12 ++++-------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/packages/cipherstash-proxy-integration/src/extended_protocol_error_messages.rs b/packages/cipherstash-proxy-integration/src/extended_protocol_error_messages.rs index 92ef24cb..ae868aea 100644 --- a/packages/cipherstash-proxy-integration/src/extended_protocol_error_messages.rs +++ b/packages/cipherstash-proxy-integration/src/extended_protocol_error_messages.rs @@ -66,11 +66,7 @@ mod tests { if let Err(err) = result { let msg = err.to_string(); - // This is similar to below. The error message comes from tokio-postgres when Proxy - // returns eql_v1_encrypted and the client cannot convert to a string. - // If mapping errors are enabled (enable_mapping_errors or CS_DEVELOPMENT__ENABLE_MAPPING_ERRORS), - // then Proxy will return an error that says "Column X in table Y has no Encrypt configuration" - assert_eq!(msg, "error serializing parameter 1: cannot convert between the Rust type `&str` and the Postgres type `jsonb`"); + assert_eq!(msg, "db error: ERROR: Column 'encrypted_unconfigured' in table 'unconfigured' has no Encrypt configuration. For help visit https://github.com/cipherstash/proxy/blob/main/docs/errors.md#encrypt-unknown-column"); } else { unreachable!(); } diff --git a/packages/cipherstash-proxy/src/postgresql/frontend.rs b/packages/cipherstash-proxy/src/postgresql/frontend.rs index 2cc83aea..da272e19 100644 --- a/packages/cipherstash-proxy/src/postgresql/frontend.rs +++ b/packages/cipherstash-proxy/src/postgresql/frontend.rs @@ -894,15 +894,11 @@ where msg = "Configured column not found. Encryption configuration may have been deleted.", ?identifier, ); - if self.encrypt.config.mapping_errors_enabled() { - Err(EncryptError::UnknownColumn { - table: identifier.table.to_owned(), - column: identifier.column.to_owned(), - } - .into()) - } else { - Ok(None) + Err(EncryptError::UnknownColumn { + table: identifier.table.to_owned(), + column: identifier.column.to_owned(), } + .into()) } } } From 8f600d31a3af6e55eb34a1003e20fedca6a06ba7 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 14 May 2025 14:40:47 +1000 Subject: [PATCH 39/50] chore: use eql-2.0.0-alpha.2 --- mise.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mise.toml b/mise.toml index ec398917..625aeda9 100644 --- a/mise.toml +++ b/mise.toml @@ -24,8 +24,7 @@ CS_PROXY__HOST = "proxy" # Misc DOCKER_CLI_HINTS = "false" # Please don't show us What's Next. -# CS_EQL_VERSION = "eql-1.0.1" -CS_EQL_VERSION = "eql-2.0.0-alpha.1" +CS_EQL_VERSION = "eql-2.0.0-alpha.2" [tools] "cargo:cargo-binstall" = "latest" From 19afee7c18c6e5e9aa3fda2fcd7b8f44f9e25436 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 14 May 2025 14:51:20 +1000 Subject: [PATCH 40/50] feat: update python test to check for unconfigured error message --- tests/python/tests/test_error_messages.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/python/tests/test_error_messages.py b/tests/python/tests/test_error_messages.py index f16fd07b..af351e6f 100644 --- a/tests/python/tests/test_error_messages.py +++ b/tests/python/tests/test_error_messages.py @@ -54,10 +54,7 @@ def test_encrypted_column_with_no_configuration(): sql = "INSERT INTO unconfigured (id, encrypted_unconfigured) VALUES (%s, %s)" - # This is EQL catching the error and returning it. Details are in docs/errors.md - # When mapping errors are enabled, (enable_mapping_errors or CS_DEVELOPMENT__ENABLE_MAPPING_ERRORS) - # Proxy will return an error that says "Column X in table Y has no Encrypt configuration" - with pytest.raises(psycopg.Error, match=r"Encrypted column missing \w+ \(\w+\) field"): + with pytest.raises(psycopg.Error, match=r"Column 'encrypted_unconfigured' in table 'unconfigured' has no Encrypt configuration."): cursor.execute(sql, [id, val]) From de490d414b374e75668acf052a94231ebd310b2e Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 14 May 2025 14:56:27 +1000 Subject: [PATCH 41/50] ci: Add CS_REGION var to benchmark test setup --- .github/workflows/benchmark.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 7fd37064..ac24e7de 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -34,6 +34,7 @@ jobs: CS_DEFAULT_KEYSET_ID: ${{ secrets.CS_DEFAULT_KEYSET_ID }} CS_CLIENT_ID: ${{ secrets.CS_CLIENT_ID }} CS_CLIENT_KEY: ${{ secrets.CS_CLIENT_KEY }} + CS_REGION: "ap-southeast-2.aws" RUST_BACKTRACE: "1" run: mise run benchmark:continuous # Download previous benchmark result from cache (if exists) From 86fa6ab1bc7e9b84049aa5aadd232eb78068fba6 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 14 May 2025 15:14:51 +1000 Subject: [PATCH 42/50] ci: Update benchmark schema to use EQL 2.0 --- tests/benchmark/sql/benchmark-schema.sql | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/benchmark/sql/benchmark-schema.sql b/tests/benchmark/sql/benchmark-schema.sql index e975f2e8..7d7b4503 100644 --- a/tests/benchmark/sql/benchmark-schema.sql +++ b/tests/benchmark/sql/benchmark-schema.sql @@ -11,13 +11,14 @@ DROP TABLE IF EXISTS benchmark_encrypted; CREATE TABLE benchmark_encrypted ( id serial primary key, username text, - email cs_encrypted_v1 + email eql_v1_encrypted ); -SELECT cs_add_column_v1( +SELECT eql_v1.add_column( 'benchmark_encrypted', 'email' ); -SELECT cs_encrypt_v1(); -SELECT cs_activate_v1(); +SELECT eql_v1.encrypt(); +SELECT eql_v1.activate(); + From adb080f265593ab7645d9af5e132bd5894111669 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Thu, 15 May 2025 12:41:23 +1000 Subject: [PATCH 43/50] docs: Update error doc for encrypt-column-could-not-be-deserialised --- docs/errors.md | 59 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 7 deletions(-) diff --git a/docs/errors.md b/docs/errors.md index 1105d064..8bc1f669 100644 --- a/docs/errors.md +++ b/docs/errors.md @@ -22,6 +22,9 @@ - [Unknown table](#encrypt-unknown-table) - [Unknown index term](#encrypt-unknown-index-term) +- Decrypt errors: + - [Column could not be deserialised](#encrypt-column-could-not-be-deserialised) + - Configuration errors: - [Missing or invalid TLS configuration](#config-missing-or-invalid-tls) @@ -263,9 +266,9 @@ The most likely cause is network access to the ZeroKMS service. ### How to Fix 1. Check that CipherStash ZeroKMS is available at [the status page](https://status.cipherstash.com/). -1. Check that CipherStash Proxy has network access to ZeroKMS in the appropriate region. +2. Check that CipherStash Proxy has network access to ZeroKMS in the appropriate region. -1. Check that the encrypted configuration `cast` matches the expected type. +3. Check that the encrypted configuration `cast` matches the expected type. @@ -307,8 +310,8 @@ For example: ### How to fix 1. Check the encrypted configuration has the correct type. -1. Check that the configuration has not changed. -1. Check [EQL](https://github.com/cipherstash/encrypt-query-language). +2. Check that the configuration has not changed. +3. Check [EQL](https://github.com/cipherstash/encrypt-query-language). @@ -331,7 +334,7 @@ Column 'column_name' in table 'table_name' has no Encrypt configuration 1. Define the encrypted configuration using [EQL](https://github.com/cipherstash/encrypt-query-language). -1. Add `users.email` as an encrypted column: +2. Add `users.email` as an encrypted column: ```sql SELECT cs_add_column_v1('users', 'email'); ``` @@ -357,7 +360,7 @@ Table 'table_name' has no Encrypt configuration 1. Define the encrypted configuration using [EQL](https://github.com/cipherstash/encrypt-query-language). -1. Add `users.email` as an encrypted column: +2. Add `users.email` as an encrypted column: ```sql SELECT cs_add_column_v1('users', 'email'); ``` @@ -385,13 +388,55 @@ Unknown Index Term for column '{column_name}' in table '{table_name}'. ### How to fix 1. Check the Encrypt configuration for the column. -1. Define the encrypted configuration using [EQL](https://github.com/cipherstash/encrypt-query-language). +2. Define the encrypted configuration using [EQL](https://github.com/cipherstash/encrypt-query-language). + + + + +# Decrypt errors + + +## Column could not be deserialised + +The column could not be deserialised for decryption. + + +### Error message + +``` +Column 'column_name' in table 'table_name' could not be deserialised. +``` + +### Notes + +CipherStash Proxy stores encrypted data and search terms as `jsonb`. The structure is defined as part of EQL. + +The error indicates an internal issue has occurred deserialising and extracting the ciphertext data for decryption. +It may be caused if the the encrypted data has been altered by another process or application. + +If the error persists, please contact CipherStash [support](https://cipherstash.com/support). + + +### How to Fix + +1. Check that the data in the encrypted column is in correct format [EQL](https://github.com/cipherstash/encrypt-query-language). + + + + + + + + + + + # Configuration errors From 81cff57b9294bc3f9d5d2887e0bdc86762470a66 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Thu, 15 May 2025 16:53:23 +1000 Subject: [PATCH 44/50] feat: check column configuration against encrypted column before decrypt --- docs/errors.md | 29 ++++++++++++++ docs/how-to.md | 2 +- packages/cipherstash-proxy/src/error.rs | 3 ++ .../src/postgresql/backend.rs | 40 ++++++++++++++++++- 4 files changed, 71 insertions(+), 3 deletions(-) diff --git a/docs/errors.md b/docs/errors.md index 8bc1f669..1b7eebf5 100644 --- a/docs/errors.md +++ b/docs/errors.md @@ -21,6 +21,7 @@ - [Unknown column](#encrypt-unknown-column) - [Unknown table](#encrypt-unknown-table) - [Unknown index term](#encrypt-unknown-index-term) + - [Column configuration mismatch](#encrypt-column-config-mismatch) - Decrypt errors: - [Column could not be deserialised](#encrypt-column-could-not-be-deserialised) @@ -392,6 +393,34 @@ Unknown Index Term for column '{column_name}' in table '{table_name}'. + + + +## Column configuration mismatch + +A returned encrypted column does not match the column configuration. + +### Error message + +``` +Column configuration for column '{column_name}' in table '{table_name}' does not match the encrypted column. +``` + +### Notes + +CipherStash Proxy validates that encrypted columns match the configuration before decrypting any data. +If the table and column are not the same, this error is returned. +The check is there to help prevent "confused deputy" issues and the error should *never* appear during normal operation. + +If the error persists, please contact CipherStash [support](https://cipherstash.com/support). + + +### Further reading + +[AWS: The confused deputy problem](https://docs.aws.amazon.com/IAM/latest/UserGuide/confused-deputy.html) +[Wikipedia: Confused deputy problem](https://en.wikipedia.org/wiki/Confused_deputy_problem) + + diff --git a/docs/how-to.md b/docs/how-to.md index 5f906a84..562500a7 100644 --- a/docs/how-to.md +++ b/docs/how-to.md @@ -163,7 +163,7 @@ This will output the version of EQL installed. In your existing PostgreSQL database, you store your data in tables and columns. Those columns have types like `integer`, `text`, `timestamp`, and `boolean`. When storing encrypted data in PostgreSQL with Proxy, you use a special column type called `eql_v1_encrypted`, which is [provided by EQL](#setting-up-the-database-schema). -`eql_v1_encrypted` is a container column type that can be used for any type of encrypted data you want to store or search, whether they are numbers (`int`, `small_int`, `big_int`), text (`text`), dates and times (`date`), or booleans (`boolean`). +`eql_v1_encrypted` is a container column type that can be used for any type of encrypted data you want to store or search, whether they are numbers (`int`, `small_int`, `big_int`), text (`text`), dates and times (`date`. `timestamp`), or booleans (`boolean`). Create a table with an encrypted column for `email`: diff --git a/packages/cipherstash-proxy/src/error.rs b/packages/cipherstash-proxy/src/error.rs index ad3762af..96621f35 100644 --- a/packages/cipherstash-proxy/src/error.rs +++ b/packages/cipherstash-proxy/src/error.rs @@ -214,6 +214,9 @@ pub enum EncryptError { #[error("Column '{column}' in table '{table}' could not be encrypted. For help visit {}#encrypt-column-could-not-be-encrypted", ERROR_DOC_BASE_URL)] ColumnCouldNotBeEncrypted { table: String, column: String }, + #[error("Column configuration for column '{column}' in table '{table}' does not match the encrypted column. For help visit {}#encrypt-column-config-mismatch", ERROR_DOC_BASE_URL)] + ColumnConfigurationMismatch { table: String, column: String }, + /// This should in practice be unreachable #[error("Missing encrypt configuration for column type `{plaintext_type}`. For help visit {}#encrypt-missing-encrypt-configuration", ERROR_DOC_BASE_URL)] MissingEncryptConfiguration { plaintext_type: String }, diff --git a/packages/cipherstash-proxy/src/postgresql/backend.rs b/packages/cipherstash-proxy/src/postgresql/backend.rs index 8722b765..b423630f 100644 --- a/packages/cipherstash-proxy/src/postgresql/backend.rs +++ b/packages/cipherstash-proxy/src/postgresql/backend.rs @@ -4,11 +4,12 @@ use super::message_buffer::MessageBuffer; use super::messages::error_response::ErrorResponse; use super::messages::row_description::RowDescription; use super::messages::BackendCode; +use super::Column; use crate::connect::Sender; use crate::encrypt::Encrypt; use crate::eql::EqlEncrypted; -use crate::error::Error; -use crate::log::{DEVELOPMENT, MAPPER, PROTOCOL}; +use crate::error::{EncryptError, Error}; +use crate::log::{DECRYPT, DEVELOPMENT, MAPPER, PROTOCOL}; use crate::postgresql::context::Portal; use crate::postgresql::messages::data_row::DataRow; use crate::postgresql::messages::param_description::ParamDescription; @@ -270,6 +271,8 @@ where let start = Instant::now(); + self.check_column_config(projection_columns, &ciphertexts)?; + // Decrypt CipherText -> Plaintext let plaintexts = self.encrypt.decrypt(ciphertexts).await.inspect_err(|_| { counter!(DECRYPTION_ERROR_TOTAL).increment(1); @@ -313,6 +316,39 @@ where Ok(()) } + fn check_column_config( + &mut self, + projection_columns: &[Option], + ciphertexts: &[Option], + ) -> Result<(), Error> { + for (col, ct) in projection_columns.iter().zip(ciphertexts) { + match (col, ct) { + (Some(col), Some(ct)) => { + if col.identifier != ct.identifier { + return Err(EncryptError::ColumnConfigurationMismatch { + table: col.identifier.table.to_owned(), + column: col.identifier.column.to_owned(), + } + .into()); + } + } + // configured column with NULL ciphertext + (Some(_), None) => {} + // unconfigured column *should* have no ciphertext, + (None, None) => {} + // ciphertext with no column configuration is bad + (None, Some(ct)) => { + return Err(EncryptError::ColumnConfigurationMismatch { + table: ct.identifier.table.to_owned(), + column: ct.identifier.column.to_owned(), + } + .into()); + } + } + } + Ok(()) + } + async fn parameter_description_handler( &self, bytes: &BytesMut, From 4b06e95f3f02c435199a513a37c9cd441bd7fbf8 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 20 May 2025 14:02:39 +1000 Subject: [PATCH 45/50] feat: Add array flag to encrypted structure --- packages/cipherstash-proxy/src/encrypt/mod.rs | 58 +++++++++++-------- packages/cipherstash-proxy/src/eql/mod.rs | 4 ++ 2 files changed, 38 insertions(+), 24 deletions(-) diff --git a/packages/cipherstash-proxy/src/encrypt/mod.rs b/packages/cipherstash-proxy/src/encrypt/mod.rs index 386fa8d4..36c1529a 100644 --- a/packages/cipherstash-proxy/src/encrypt/mod.rs +++ b/packages/cipherstash-proxy/src/encrypt/mod.rs @@ -300,6 +300,7 @@ fn to_eql_encrypted( selector, ste_vec_index: None, }, + is_array_item: None, }, }) } @@ -308,30 +309,38 @@ fn to_eql_encrypted( let ste_vec_index: Vec = ste_vec .into_iter() - .map(|EncryptedEntry(selector, term, ciphertext)| { - let indexes = match term { - EncryptedSteVecTerm::Mac(bytes) => EqlEncryptedIndexes { - selector: Some(hex::encode(selector.as_bytes())), - blake3_index: Some(hex::encode(bytes)), - ..Default::default() - }, - EncryptedSteVecTerm::OreFixed(ore) => EqlEncryptedIndexes { - selector: Some(hex::encode(selector.as_bytes())), - ore_cclw_fixed_index: Some(hex::encode(&ore)), - ..Default::default() - }, - EncryptedSteVecTerm::OreVariable(ore) => EqlEncryptedIndexes { - selector: Some(hex::encode(selector.as_bytes())), - ore_cclw_var_index: Some(hex::encode(&ore)), - ..Default::default() - }, - }; - - eql::EqlEncryptedBody { - ciphertext, - indexes, - } - }) + .map( + |EncryptedEntry { + tokenized_selector, + term, + record, + parent_is_array, + }| { + let indexes = match term { + EncryptedSteVecTerm::Mac(bytes) => EqlEncryptedIndexes { + selector: Some(hex::encode(selector.as_bytes())), + blake3_index: Some(hex::encode(bytes)), + ..Default::default() + }, + EncryptedSteVecTerm::OreFixed(ore) => EqlEncryptedIndexes { + selector: Some(hex::encode(selector.as_bytes())), + ore_cclw_fixed_index: Some(hex::encode(&ore)), + ..Default::default() + }, + EncryptedSteVecTerm::OreVariable(ore) => EqlEncryptedIndexes { + selector: Some(hex::encode(selector.as_bytes())), + ore_cclw_var_index: Some(hex::encode(&ore)), + ..Default::default() + }, + }; + + eql::EqlEncryptedBody { + ciphertext, + indexes, + is_array_item: Some(parent_is_array), + } + }, + ) .collect(); // FIXME: I'm unsure if I've handled the root ciphertext correctly @@ -351,6 +360,7 @@ fn to_eql_encrypted( selector: None, ste_vec_index: Some(ste_vec_index), }, + is_array_item: None, }, }) } diff --git a/packages/cipherstash-proxy/src/eql/mod.rs b/packages/cipherstash-proxy/src/eql/mod.rs index a896be28..e5ef20ea 100644 --- a/packages/cipherstash-proxy/src/eql/mod.rs +++ b/packages/cipherstash-proxy/src/eql/mod.rs @@ -80,6 +80,9 @@ pub struct EqlEncryptedBody { #[serde(flatten)] pub(crate) indexes: EqlEncryptedIndexes, + + #[serde(rename = "a", skip_serializing_if = "Option::is_none")] + pub(crate) is_array_item: Option, } #[derive(Debug, Deserialize, Serialize, Default)] @@ -163,6 +166,7 @@ mod tests { ore_cclw_var_index: None, ste_vec_index: None, }, + is_array_item: None, }, }; From 5b53272fabc1374ef731f9b75a820ba21d40ecd8 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Tue, 20 May 2025 14:31:12 +1000 Subject: [PATCH 46/50] Make it compile --- packages/cipherstash-proxy/src/encrypt/mod.rs | 8 ++++---- packages/cipherstash-proxy/src/postgresql/backend.rs | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/cipherstash-proxy/src/encrypt/mod.rs b/packages/cipherstash-proxy/src/encrypt/mod.rs index 36c1529a..55ec9a03 100644 --- a/packages/cipherstash-proxy/src/encrypt/mod.rs +++ b/packages/cipherstash-proxy/src/encrypt/mod.rs @@ -318,24 +318,24 @@ fn to_eql_encrypted( }| { let indexes = match term { EncryptedSteVecTerm::Mac(bytes) => EqlEncryptedIndexes { - selector: Some(hex::encode(selector.as_bytes())), + selector: Some(hex::encode(tokenized_selector.as_bytes())), blake3_index: Some(hex::encode(bytes)), ..Default::default() }, EncryptedSteVecTerm::OreFixed(ore) => EqlEncryptedIndexes { - selector: Some(hex::encode(selector.as_bytes())), + selector: Some(hex::encode(tokenized_selector.as_bytes())), ore_cclw_fixed_index: Some(hex::encode(&ore)), ..Default::default() }, EncryptedSteVecTerm::OreVariable(ore) => EqlEncryptedIndexes { - selector: Some(hex::encode(selector.as_bytes())), + selector: Some(hex::encode(tokenized_selector.as_bytes())), ore_cclw_var_index: Some(hex::encode(&ore)), ..Default::default() }, }; eql::EqlEncryptedBody { - ciphertext, + ciphertext: record, indexes, is_array_item: Some(parent_is_array), } diff --git a/packages/cipherstash-proxy/src/postgresql/backend.rs b/packages/cipherstash-proxy/src/postgresql/backend.rs index b423630f..7a60466d 100644 --- a/packages/cipherstash-proxy/src/postgresql/backend.rs +++ b/packages/cipherstash-proxy/src/postgresql/backend.rs @@ -9,7 +9,7 @@ use crate::connect::Sender; use crate::encrypt::Encrypt; use crate::eql::EqlEncrypted; use crate::error::{EncryptError, Error}; -use crate::log::{DECRYPT, DEVELOPMENT, MAPPER, PROTOCOL}; +use crate::log::{DEVELOPMENT, MAPPER, PROTOCOL}; use crate::postgresql::context::Portal; use crate::postgresql::messages::data_row::DataRow; use crate::postgresql::messages::param_description::ParamDescription; From baf42947acecbfac86781a3bd2e4ccab9313bd8f Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 21 May 2025 13:07:20 +1000 Subject: [PATCH 47/50] feat: Update EQL version to eql-2.0.0 --- mise.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mise.toml b/mise.toml index 625aeda9..cd840193 100644 --- a/mise.toml +++ b/mise.toml @@ -24,7 +24,7 @@ CS_PROXY__HOST = "proxy" # Misc DOCKER_CLI_HINTS = "false" # Please don't show us What's Next. -CS_EQL_VERSION = "eql-2.0.0-alpha.2" +CS_EQL_VERSION = "eql-2.0.0" [tools] "cargo:cargo-binstall" = "latest" From fa7bbec367f78b3b752fd5d494b6154c80443fce Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 21 May 2025 13:28:12 +1000 Subject: [PATCH 48/50] feat: update to cs-client 0.22 --- Cargo.lock | 8 ++++---- packages/cipherstash-proxy-integration/Cargo.toml | 4 ++-- packages/cipherstash-proxy/Cargo.toml | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 92a4eb8e..de9237ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -650,9 +650,9 @@ dependencies = [ [[package]] name = "cipherstash-client" -version = "0.21.0" +version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84a83c23fbd5b42bf7af65f882a584afd6f534a60ffd734a87618d0c3341c4b3" +checksum = "0f85784b109d3cacec64a735ca5ac791ef4e9c4d1d451156dd3bb513c9b4ddf0" dependencies = [ "aes-gcm-siv", "anyhow", @@ -4518,9 +4518,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.26.8" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2210b291f7ea53617fbafcc4939f10914214ec15aace5ba62293a668f322c5c9" +checksum = "2853738d1cc4f2da3a225c18ec6c3721abb31961096e9dbf5ab35fa88b19cfdb" dependencies = [ "rustls-pki-types", ] diff --git a/packages/cipherstash-proxy-integration/Cargo.toml b/packages/cipherstash-proxy-integration/Cargo.toml index 4291432e..90162eb9 100644 --- a/packages/cipherstash-proxy-integration/Cargo.toml +++ b/packages/cipherstash-proxy-integration/Cargo.toml @@ -21,10 +21,10 @@ tokio-postgres-rustls = "0.13.0" tokio-rustls = "0.26.0" tracing = { workspace = true } tracing-subscriber = { workspace = true } -webpki-roots = "0.26.7" +webpki-roots = "1.0" [dev-dependencies] -cipherstash-client = { version = "0.21.0", features = ["tokio"] } +cipherstash-client = { version = "0.22.0", features = ["tokio"] } cipherstash-config = "0.2.3" clap = "4.5.32" fake = { version = "4", features = ["chrono", "derive"] } diff --git a/packages/cipherstash-proxy/Cargo.toml b/packages/cipherstash-proxy/Cargo.toml index 26b17bac..5808db9e 100644 --- a/packages/cipherstash-proxy/Cargo.toml +++ b/packages/cipherstash-proxy/Cargo.toml @@ -8,7 +8,7 @@ bigdecimal = { version = "0.4.6", features = ["serde-json"] } arc-swap = "1.7.1" bytes = { version = "1.9", default-features = false } chrono = { version = "0.4.39", features = ["clock"] } -cipherstash-client = { version = "0.21.0", features = ["tokio"] } +cipherstash-client = { version = "0.22.0", features = ["tokio"] } clap = { version = "4.5.31", features = ["derive", "env"] } config = { version = "0.15", features = [ "async", From f2bc2eaa5eb1ee062e80a9316614a6442a9c4b5c Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 21 May 2025 14:35:58 +1000 Subject: [PATCH 49/50] chore: fixes for rust 1.87 clippy lints --- .../src/connect/async_stream.rs | 2 +- .../src/postgresql/handler.rs | 2 +- .../messages/authentication/auth.rs | 6 +- .../src/postgresql/startup.rs | 2 +- packages/eql-mapper/src/inference/mod.rs | 2 +- .../eql-mapper/src/inference/type_error.rs | 18 +++--- .../eql-mapper/src/inference/unifier/mod.rs | 2 +- packages/eql-mapper/src/model/schema_delta.rs | 2 +- packages/eql-mapper/src/test_helpers.rs | 2 +- .../src/transformation_rules/helpers.rs | 2 +- .../preserve_effective_aliases.rs | 64 +++++++++---------- 11 files changed, 49 insertions(+), 55 deletions(-) diff --git a/packages/cipherstash-proxy/src/connect/async_stream.rs b/packages/cipherstash-proxy/src/connect/async_stream.rs index 2c0b6b2e..9daa1058 100644 --- a/packages/cipherstash-proxy/src/connect/async_stream.rs +++ b/packages/cipherstash-proxy/src/connect/async_stream.rs @@ -23,7 +23,7 @@ use x509_parser::prelude::{FromDer, X509Certificate}; #[derive(Debug)] pub enum AsyncStream { Tcp(TcpStream), - Tls(TlsStream), + Tls(Box>), } impl AsyncStream { diff --git a/packages/cipherstash-proxy/src/postgresql/handler.rs b/packages/cipherstash-proxy/src/postgresql/handler.rs index b1acd30e..711e92e2 100644 --- a/packages/cipherstash-proxy/src/postgresql/handler.rs +++ b/packages/cipherstash-proxy/src/postgresql/handler.rs @@ -84,7 +84,7 @@ pub async fn handler( AsyncStream::Tcp(stream) => { // The Client is connecting to our Server let tls_stream = tls::server(stream, tls).await?; - client_stream = AsyncStream::Tls(tls_stream); + client_stream = AsyncStream::Tls(Box::new(tls_stream)); } AsyncStream::Tls(_) => { unreachable!(); diff --git a/packages/cipherstash-proxy/src/postgresql/messages/authentication/auth.rs b/packages/cipherstash-proxy/src/postgresql/messages/authentication/auth.rs index 25f98da1..e87a4849 100644 --- a/packages/cipherstash-proxy/src/postgresql/messages/authentication/auth.rs +++ b/packages/cipherstash-proxy/src/postgresql/messages/authentication/auth.rs @@ -56,9 +56,9 @@ impl Authentication { pub fn is_scram_sha_256_plus(&self) -> bool { match self.method { - AuthenticationMethod::Sasl { ref mechanisms } => mechanisms - .iter() - .any(|m| *m == SaslMechanism::ScramSha256Plus), + AuthenticationMethod::Sasl { ref mechanisms } => { + mechanisms.contains(&SaslMechanism::ScramSha256Plus) + } _ => false, } } diff --git a/packages/cipherstash-proxy/src/postgresql/startup.rs b/packages/cipherstash-proxy/src/postgresql/startup.rs index e079b911..dd45bb31 100644 --- a/packages/cipherstash-proxy/src/postgresql/startup.rs +++ b/packages/cipherstash-proxy/src/postgresql/startup.rs @@ -30,7 +30,7 @@ pub async fn with_tls(stream: AsyncStream, config: &TandemConfig) -> Result { let tls_stream = tls::client(tcp_stream, config).await?; - Ok(AsyncStream::Tls(tls_stream)) + Ok(AsyncStream::Tls(Box::new(tls_stream))) } false => { warn!(msg = "Connecting to database without Transport Layer Security (TLS)"); diff --git a/packages/eql-mapper/src/inference/mod.rs b/packages/eql-mapper/src/inference/mod.rs index b5a96a15..6cb4b390 100644 --- a/packages/eql-mapper/src/inference/mod.rs +++ b/packages/eql-mapper/src/inference/mod.rs @@ -127,11 +127,11 @@ impl<'ast> TypeInferencer<'ast> { match self.unify(self.get_node_type(lhs), self.get_node_type(rhs)) { Ok(unified) => Ok(unified), Err(err) => Err(TypeError::OnNodes( - Box::new(err), format!("{:?}", lhs), self.get_node_type(lhs), format!("{:?}", rhs), self.get_node_type(rhs), + err.to_string(), )), } } diff --git a/packages/eql-mapper/src/inference/type_error.rs b/packages/eql-mapper/src/inference/type_error.rs index 2011a3d8..40c371e0 100644 --- a/packages/eql-mapper/src/inference/type_error.rs +++ b/packages/eql-mapper/src/inference/type_error.rs @@ -1,4 +1,4 @@ -use std::{collections::HashSet, sync::Arc}; +use std::sync::Arc; use crate::{unifier::Type, SchemaError, ScopeError}; @@ -19,9 +19,6 @@ pub enum TypeError { #[error("{}", _0)] Expected(String), - #[error("One or more params failed to unify: {}", _0.iter().cloned().collect::>().join(", "))] - Params(HashSet), - #[error("Expected param count to be {}, but got {}", _0, _1)] ParamCount(usize, usize), @@ -34,14 +31,15 @@ pub enum TypeError { #[error("{}", _0)] SchemaError(#[from] SchemaError), - #[error("Cannot unify node types for nodes:\n 1. node: {} type: {}\n 2. node: {} type: {}\n error: {}", _1, _2, _3, _4, _0)] - OnNodes(Box, String, Arc, String, Arc), - + // #[error("Cannot unify node types for nodes:\n 1. node: {} type: {}\n 2. node: {} type: {}\n error: {}", _1, _2, _3, _4, _0)] + // OnNodes(Box, String, Arc, String, Arc), #[error( - "Cannot unify node with type:\n node: {}\n type: {} error: {}", + "Cannot unify node types for nodes:\n 1. node: {} type: {}\n 2. node: {} type: {}\n error: {}", + _0, _1, _2, - _0 + _3, + _4 )] - OnNode(Box, Type, String), + OnNodes(String, Arc, String, Arc, String), } diff --git a/packages/eql-mapper/src/inference/unifier/mod.rs b/packages/eql-mapper/src/inference/unifier/mod.rs index e1f9e951..fb523356 100644 --- a/packages/eql-mapper/src/inference/unifier/mod.rs +++ b/packages/eql-mapper/src/inference/unifier/mod.rs @@ -447,7 +447,7 @@ pub(crate) mod test_util { } } - root_node.accept(&mut FindNodeFromKeyVisitor(self)); + let _ = root_node.accept(&mut FindNodeFromKeyVisitor(self)); } } } diff --git a/packages/eql-mapper/src/model/schema_delta.rs b/packages/eql-mapper/src/model/schema_delta.rs index b79c6ba9..8ae04467 100644 --- a/packages/eql-mapper/src/model/schema_delta.rs +++ b/packages/eql-mapper/src/model/schema_delta.rs @@ -196,7 +196,7 @@ pub fn collect_ddl(table_resolver: Arc, statement: &Statement) -> schema: schema_with_edits, changed: false, }; - statement.accept(&mut visitor); + let _ = statement.accept(&mut visitor); return visitor.changed; } diff --git a/packages/eql-mapper/src/test_helpers.rs b/packages/eql-mapper/src/test_helpers.rs index 61c225e2..e183f4fd 100644 --- a/packages/eql-mapper/src/test_helpers.rs +++ b/packages/eql-mapper/src/test_helpers.rs @@ -89,7 +89,7 @@ pub(crate) fn find_nodekey_for_value_node( found: None, }; - statement.accept(&mut visitor); + let _ = statement.accept(&mut visitor); visitor.found } diff --git a/packages/eql-mapper/src/transformation_rules/helpers.rs b/packages/eql-mapper/src/transformation_rules/helpers.rs index a4dc965a..0d588f18 100644 --- a/packages/eql-mapper/src/transformation_rules/helpers.rs +++ b/packages/eql-mapper/src/transformation_rules/helpers.rs @@ -23,7 +23,7 @@ pub(crate) fn is_used_in_group_by_clause<'ast, N: AsNodeKey>( ty: needle, found: false, }; - exprs.accept(&mut visitor); + let _ = exprs.accept(&mut visitor); visitor.found } }, diff --git a/packages/eql-mapper/src/transformation_rules/preserve_effective_aliases.rs b/packages/eql-mapper/src/transformation_rules/preserve_effective_aliases.rs index c26445cc..72c29cb1 100644 --- a/packages/eql-mapper/src/transformation_rules/preserve_effective_aliases.rs +++ b/packages/eql-mapper/src/transformation_rules/preserve_effective_aliases.rs @@ -75,18 +75,16 @@ impl PreserveEffectiveAliases { fn effective_aliases_differ(source_node: &SelectItem, target_node: &SelectItem) -> bool { let effective_source_alias = Self::derive_effective_alias(source_node); let effective_target_alias = Self::derive_effective_alias(target_node); - match target_node { - // The captured binding `expr` has type `&mut Expr` but we need an owned `Expr`. to avoid cloning `expr` - // (which can be arbitrarily large) we replace it with another which in return provides us with ownership of - // the original value. `Expr::Wildcard` is chosen as the throwaway value because it's cheap. - SelectItem::UnnamedExpr(_) => { - if let (Some(effective_target_alias), Some(effective_source_alias)) = - (effective_target_alias, effective_source_alias) - { - return effective_target_alias != effective_source_alias; - } + + // The captured binding `expr` has type `&mut Expr` but we need an owned `Expr`. to avoid cloning `expr` + // (which can be arbitrarily large) we replace it with another which in return provides us with ownership of + // the original value. `Expr::Wildcard` is chosen as the throwaway value because it's cheap. + if let SelectItem::UnnamedExpr(_) = target_node { + if let (Some(effective_target_alias), Some(effective_source_alias)) = + (effective_target_alias, effective_source_alias) + { + return effective_target_alias != effective_source_alias; } - _ => {} } false @@ -98,31 +96,29 @@ impl PreserveEffectiveAliases { ) -> bool { let effective_source_alias = Self::derive_effective_alias(source_node); let effective_target_alias = Self::derive_effective_alias(target_node); - match target_node { - // The captured binding `expr` has type `&mut Expr` but we need an owned `Expr`. to avoid cloning `expr` - // (which can be arbitrarily large) we replace it with another which in return provides us with ownership of - // the original value. `Expr::Wildcard` is chosen as the throwaway value because it's cheap. - SelectItem::UnnamedExpr(expr) => { - if let (Some(effective_target_alias), Some(effective_source_alias)) = - (effective_target_alias, effective_source_alias) - { - if effective_target_alias != effective_source_alias { - *target_node = SelectItem::ExprWithAlias { - expr: mem::replace( - expr, - Expr::Wildcard(AttachedToken(TokenWithSpan::new( - Token::EOF, - Span::empty(), - ))), - ), - alias: effective_source_alias, - }; - - return true; - } + + // The captured binding `expr` has type `&mut Expr` but we need an owned `Expr`. to avoid cloning `expr` + // (which can be arbitrarily large) we replace it with another which in return provides us with ownership of + // the original value. `Expr::Wildcard` is chosen as the throwaway value because it's cheap. + if let SelectItem::UnnamedExpr(expr) = target_node { + if let (Some(effective_target_alias), Some(effective_source_alias)) = + (effective_target_alias, effective_source_alias) + { + if effective_target_alias != effective_source_alias { + *target_node = SelectItem::ExprWithAlias { + expr: mem::replace( + expr, + Expr::Wildcard(AttachedToken(TokenWithSpan::new( + Token::EOF, + Span::empty(), + ))), + ), + alias: effective_source_alias, + }; + + return true; } } - _ => {} } false From 1b3601140de163ccd3c647d7b1f59c7023ad7fbc Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 21 May 2025 14:54:36 +1000 Subject: [PATCH 50/50] chore: update to reference eql_v2 schema --- docs/errors.md | 4 +- docs/getting-started/schema-example.sql | 8 +-- docs/how-to.md | 10 ++-- .../src/encrypt/config/manager.rs | 2 +- packages/cipherstash-proxy/src/encrypt/mod.rs | 2 +- .../src/encrypt/schema/manager.rs | 4 +- .../src/encrypt/sql/select_config.sql | 2 +- .../src/postgresql/messages/parse.rs | 8 +-- .../eql-mapper/src/inference/sql_functions.rs | 16 +++--- .../eql-mapper/src/inference/type_error.rs | 2 - packages/eql-mapper/src/lib.rs | 18 +++--- .../transformation_rules/group_by_eql_col.rs | 2 +- .../src/transformation_rules/helpers.rs | 2 +- .../rewrite_standard_sql_fns_on_eql_types.rs | 2 +- .../wrap_eql_cols_in_order_by_with_ore_fn.rs | 6 +- .../wrap_grouped_eql_col_in_aggregate_fn.rs | 4 +- tests/benchmark/sql/benchmark-schema.sql | 10 ++-- tests/sql/schema-uninstall.sql | 2 +- tests/sql/schema.sql | 56 +++++++++---------- .../test/integration/psql-passthrough.sh | 6 +- 20 files changed, 82 insertions(+), 84 deletions(-) diff --git a/docs/errors.md b/docs/errors.md index 1b7eebf5..3ea90ec6 100644 --- a/docs/errors.md +++ b/docs/errors.md @@ -318,7 +318,7 @@ For example: ## Unknown Column -The column has an encrypted type (PostgreSQL `eql_v1_encrypted` type ) with no encryption configuration. +The column has an encrypted type (PostgreSQL `eql_v2_encrypted` type ) with no encryption configuration. Without the configuration, Cipherstash Proxy does not know how to encrypt the column. Any data is unprotected and unencrypted. @@ -345,7 +345,7 @@ Column 'column_name' in table 'table_name' has no Encrypt configuration ## Unknown Table -The table has one or more encrypted columns (PostgreSQL `eql_v1_encrypted` type ) with no encryption configuration. +The table has one or more encrypted columns (PostgreSQL `eql_v2_encrypted` type ) with no encryption configuration. Without the configuration, Cipherstash Proxy does not know how to encrypt the column. Any data is unprotected and unencrypted. diff --git a/docs/getting-started/schema-example.sql b/docs/getting-started/schema-example.sql index 0120cde4..e1627419 100644 --- a/docs/getting-started/schema-example.sql +++ b/docs/getting-started/schema-example.sql @@ -1,12 +1,12 @@ -TRUNCATE TABLE public.eql_v1_configuration; +TRUNCATE TABLE public.eql_v2_configuration; -- Exciting cipherstash table DROP TABLE IF EXISTS users; CREATE TABLE users ( id SERIAL PRIMARY KEY, - encrypted_email eql_v1_encrypted, - encrypted_dob eql_v1_encrypted, - encrypted_salary eql_v1_encrypted + encrypted_email eql_v2_encrypted, + encrypted_dob eql_v2_encrypted, + encrypted_salary eql_v2_encrypted ); SELECT cs_add_index_v1( diff --git a/docs/how-to.md b/docs/how-to.md index 562500a7..0f8954e5 100644 --- a/docs/how-to.md +++ b/docs/how-to.md @@ -153,7 +153,7 @@ You can also install EQL by running [the installation script](https://github.com Once you have installed EQL, you can see what version is installed by querying the database: ```sql -SELECT eql_v1.version(); +SELECT eql_v2.version(); ``` This will output the version of EQL installed. @@ -162,22 +162,22 @@ This will output the version of EQL installed. In your existing PostgreSQL database, you store your data in tables and columns. Those columns have types like `integer`, `text`, `timestamp`, and `boolean`. -When storing encrypted data in PostgreSQL with Proxy, you use a special column type called `eql_v1_encrypted`, which is [provided by EQL](#setting-up-the-database-schema). -`eql_v1_encrypted` is a container column type that can be used for any type of encrypted data you want to store or search, whether they are numbers (`int`, `small_int`, `big_int`), text (`text`), dates and times (`date`. `timestamp`), or booleans (`boolean`). +When storing encrypted data in PostgreSQL with Proxy, you use a special column type called `eql_v2_encrypted`, which is [provided by EQL](#setting-up-the-database-schema). +`eql_v2_encrypted` is a container column type that can be used for any type of encrypted data you want to store or search, whether they are numbers (`int`, `small_int`, `big_int`), text (`text`), dates and times (`date`. `timestamp`), or booleans (`boolean`). Create a table with an encrypted column for `email`: ```sql CREATE TABLE users ( id SERIAL PRIMARY KEY, - email eql_v1_encrypted + email eql_v2_encrypted ) ``` This creates a `users` table with two columns: - `id`, an autoincrementing integer column that is the primary key for the record - - `email`, a `eql_v1_encrypted` column + - `email`, a `eql_v2_encrypted` column There are important differences between the plaintext columns you've traditionally used in PostgreSQL and encrypted columns with CipherStash Proxy: diff --git a/packages/cipherstash-proxy/src/encrypt/config/manager.rs b/packages/cipherstash-proxy/src/encrypt/config/manager.rs index 50c4296d..792f64eb 100644 --- a/packages/cipherstash-proxy/src/encrypt/config/manager.rs +++ b/packages/cipherstash-proxy/src/encrypt/config/manager.rs @@ -197,5 +197,5 @@ pub async fn load_encrypt_config(config: &DatabaseConfig) -> Result bool { let msg = e.to_string(); - msg.contains("eql_v1_configuration") && msg.contains("does not exist") + msg.contains("eql_v2_configuration") && msg.contains("does not exist") } diff --git a/packages/cipherstash-proxy/src/encrypt/mod.rs b/packages/cipherstash-proxy/src/encrypt/mod.rs index 55ec9a03..826c3998 100644 --- a/packages/cipherstash-proxy/src/encrypt/mod.rs +++ b/packages/cipherstash-proxy/src/encrypt/mod.rs @@ -58,7 +58,7 @@ impl Encrypt { let eql_version = { let client = connect::database(&config.database).await?; let rows = client - .query("SELECT eql_v1.version() AS version;", &[]) + .query("SELECT eql_v2.version() AS version;", &[]) .await; match rows { diff --git a/packages/cipherstash-proxy/src/encrypt/schema/manager.rs b/packages/cipherstash-proxy/src/encrypt/schema/manager.rs index b2603a98..9a15f77f 100644 --- a/packages/cipherstash-proxy/src/encrypt/schema/manager.rs +++ b/packages/cipherstash-proxy/src/encrypt/schema/manager.rs @@ -142,8 +142,8 @@ pub async fn load_schema(config: &DatabaseConfig) -> Result { let ident = Ident::with_quote('"', col); let column = match column_type_name.as_deref() { - Some("eql_v1_encrypted") => { - debug!(target: SCHEMA, msg = "eql_v1_encrypted column", table = table_name, column = col); + Some("eql_v2_encrypted") => { + debug!(target: SCHEMA, msg = "eql_v2_encrypted column", table = table_name, column = col); Column::eql(ident) } _ => Column::native(ident), diff --git a/packages/cipherstash-proxy/src/encrypt/sql/select_config.sql b/packages/cipherstash-proxy/src/encrypt/sql/select_config.sql index 8be0732f..b748a9d6 100644 --- a/packages/cipherstash-proxy/src/encrypt/sql/select_config.sql +++ b/packages/cipherstash-proxy/src/encrypt/sql/select_config.sql @@ -1 +1 @@ -SELECT data FROM public.eql_v1_configuration WHERE state = 'active' LIMIT 1; +SELECT data FROM public.eql_v2_configuration WHERE state = 'active' LIMIT 1; diff --git a/packages/cipherstash-proxy/src/postgresql/messages/parse.rs b/packages/cipherstash-proxy/src/postgresql/messages/parse.rs index cc49f6e9..c54d0cfa 100644 --- a/packages/cipherstash-proxy/src/postgresql/messages/parse.rs +++ b/packages/cipherstash-proxy/src/postgresql/messages/parse.rs @@ -24,11 +24,11 @@ impl Parse { } /// - /// Encrypted columns are the eql_v1_encrypted Domain Type - /// eql_v1_encrypted wraps JSONB + /// Encrypted columns are the eql_v2_encrypted Domain Type + /// eql_v2_encrypted wraps JSONB /// - /// Using JSONB to avoid the complexity of loading the OID of eql_v1_encrypted - /// PostgreSQL will coerce JSONB to eql_v1_encrypted if it passes the constaint check + /// Using JSONB to avoid the complexity of loading the OID of eql_v2_encrypted + /// PostgreSQL will coerce JSONB to eql_v2_encrypted if it passes the constaint check /// pub fn rewrite_param_types(&mut self, columns: &[Option]) { for (idx, col) in columns.iter().enumerate() { diff --git a/packages/eql-mapper/src/inference/sql_functions.rs b/packages/eql-mapper/src/inference/sql_functions.rs index 020a8d07..fba4c5e2 100644 --- a/packages/eql-mapper/src/inference/sql_functions.rs +++ b/packages/eql-mapper/src/inference/sql_functions.rs @@ -236,14 +236,14 @@ static SQL_FUNCTIONS: LazyLock>> = LazyL sql_fn!(jsonb_array_elements_text(T) -> T, rewrite), // These are typings for when customer SQL already contains references to EQL functions. // They must be type checked but not rewritten. - sql_fn!(eql_v1.min(T) -> T), - sql_fn!(eql_v1.max(T) -> T), - sql_fn!(eql_v1.jsonb_path_query(T, T) -> T), - sql_fn!(eql_v1.jsonb_path_query_first(T, T) -> T), - sql_fn!(eql_v1.jsonb_path_exists(T, T) -> T), - sql_fn!(eql_v1.jsonb_array_length(T) -> T), - sql_fn!(eql_v1.jsonb_array_elements(T) -> T), - sql_fn!(eql_v1.jsonb_array_elements_text(T) -> T), + sql_fn!(eql_v2.min(T) -> T), + sql_fn!(eql_v2.max(T) -> T), + sql_fn!(eql_v2.jsonb_path_query(T, T) -> T), + sql_fn!(eql_v2.jsonb_path_query_first(T, T) -> T), + sql_fn!(eql_v2.jsonb_path_exists(T, T) -> T), + sql_fn!(eql_v2.jsonb_array_length(T) -> T), + sql_fn!(eql_v2.jsonb_array_elements(T) -> T), + sql_fn!(eql_v2.jsonb_array_elements_text(T) -> T), ]; let mut sql_fns_by_name: HashMap> = HashMap::new(); diff --git a/packages/eql-mapper/src/inference/type_error.rs b/packages/eql-mapper/src/inference/type_error.rs index 40c371e0..6b4a0c4d 100644 --- a/packages/eql-mapper/src/inference/type_error.rs +++ b/packages/eql-mapper/src/inference/type_error.rs @@ -31,8 +31,6 @@ pub enum TypeError { #[error("{}", _0)] SchemaError(#[from] SchemaError), - // #[error("Cannot unify node types for nodes:\n 1. node: {} type: {}\n 2. node: {} type: {}\n error: {}", _1, _2, _3, _4, _0)] - // OnNodes(Box, String, Arc, String, Arc), #[error( "Cannot unify node types for nodes:\n 1. node: {} type: {}\n 2. node: {} type: {}\n error: {}", _0, diff --git a/packages/eql-mapper/src/lib.rs b/packages/eql-mapper/src/lib.rs index d9fa8d4b..be1a0e46 100644 --- a/packages/eql-mapper/src/lib.rs +++ b/packages/eql-mapper/src/lib.rs @@ -1026,7 +1026,7 @@ mod test { )])) { Ok(transformed_statement) => assert_eq!( transformed_statement.to_string(), - "SELECT * FROM employees WHERE salary > 'ENCRYPTED'::JSONB::eql_v1_encrypted" + "SELECT * FROM employees WHERE salary > 'ENCRYPTED'::JSONB::eql_v2_encrypted" ), Err(err) => panic!("statement transformation failed: {}", err), }; @@ -1073,7 +1073,7 @@ mod test { )])) { Ok(transformed_statement) => assert_eq!( transformed_statement.to_string(), - "INSERT INTO employees (salary) VALUES ('ENCRYPTED'::JSONB::eql_v1_encrypted)" + "INSERT INTO employees (salary) VALUES ('ENCRYPTED'::JSONB::eql_v2_encrypted)" ), Err(err) => panic!("statement transformation failed: {}", err), }; @@ -1352,7 +1352,7 @@ mod test { match typed.transform(HashMap::new()) { Ok(statement) => assert_eq!( statement.to_string(), - "SELECT eql_v1.cs_grouped_value(email) AS email FROM users GROUP BY eql_v1.ore_64_8_v1(email)".to_string() + "SELECT eql_v2.grouped_value(email) AS email FROM users GROUP BY eql_v2.ore_64_8_v2(email)".to_string() ), Err(err) => panic!("transformation failed: {err}"), } @@ -1382,7 +1382,7 @@ mod test { match typed.transform(HashMap::new()) { Ok(statement) => assert_eq!( statement.to_string(), - "SELECT eql_v1.min(salary), eql_v1.max(salary), department FROM employees GROUP BY department".to_string() + "SELECT eql_v2.min(salary), eql_v2.max(salary), department FROM employees GROUP BY department".to_string() ), Err(err) => panic!("transformation failed: {err}"), } @@ -1415,7 +1415,7 @@ mod test { Ok(statement) => { assert_eq!( statement.to_string(), - "SELECT * FROM employees WHERE eql_col = $1::JSONB::eql_v1_encrypted AND native_col = $2" + "SELECT * FROM employees WHERE eql_col = $1::JSONB::eql_v2_encrypted AND native_col = $2" ); } Err(err) => panic!("transformation failed: {err}"), @@ -1450,7 +1450,7 @@ mod test { Ok(statement) => { assert_eq!( statement.to_string(), - "SELECT eql_v1.jsonb_path_query(eql_col, ''::JSONB::eql_v1_encrypted), jsonb_path_query(native_col, '$.not-secret') FROM employees" + "SELECT eql_v2.jsonb_path_query(eql_col, ''::JSONB::eql_v2_encrypted), jsonb_path_query(native_col, '$.not-secret') FROM employees" ); } Err(err) => panic!("transformation failed: {err}"), @@ -1567,7 +1567,7 @@ mod test { .map(|expr| match expr { ast::Expr::Identifier(ident) => ident.to_string(), ast::Expr::Value(ast::Value::SingleQuotedString(s)) => { - format!("''::JSONB::eql_v1_encrypted", s) + format!("''::JSONB::eql_v2_encrypted", s) } _ => panic!("unsupported expr type in test util"), }) @@ -1588,7 +1588,7 @@ mod test { match type_check(schema, &statement) { Ok(typed) => match typed.transform(encrypted_literals) { Ok(statement) => { - let rewritten_fn_name = format!("eql_v1.{fn_name}"); + let rewritten_fn_name = format!("eql_v2.{fn_name}"); assert_eq!( statement.to_string(), format!( @@ -1623,7 +1623,7 @@ mod test { match typed.transform(test_helpers::dummy_encrypted_json_selector(&statement, ast::Value::SingleQuotedString("medications".to_owned()))) { Ok(statement) => assert_eq!( statement.to_string(), - format!("SELECT id, notes {} ''::JSONB::eql_v1_encrypted AS meds FROM patients", op) + format!("SELECT id, notes {} ''::JSONB::eql_v2_encrypted AS meds FROM patients", op) ), Err(err) => panic!("transformation failed: {err}"), } diff --git a/packages/eql-mapper/src/transformation_rules/group_by_eql_col.rs b/packages/eql-mapper/src/transformation_rules/group_by_eql_col.rs index 6db7c0b4..78c2f1d6 100644 --- a/packages/eql-mapper/src/transformation_rules/group_by_eql_col.rs +++ b/packages/eql-mapper/src/transformation_rules/group_by_eql_col.rs @@ -40,7 +40,7 @@ impl<'ast> TransformationRule<'ast> for GroupByEqlCol<'ast> { *target_node = helpers::wrap_in_1_arg_function( transformed_expr, - ObjectName(vec![Ident::new("eql_v1"), Ident::new("ore_64_8_v1")]), + ObjectName(vec![Ident::new("eql_v2"), Ident::new("ore_64_8_v2")]), ); return Ok(true); diff --git a/packages/eql-mapper/src/transformation_rules/helpers.rs b/packages/eql-mapper/src/transformation_rules/helpers.rs index 0d588f18..c070ed8d 100644 --- a/packages/eql-mapper/src/transformation_rules/helpers.rs +++ b/packages/eql-mapper/src/transformation_rules/helpers.rs @@ -56,7 +56,7 @@ pub(crate) fn cast_as_encrypted(wrapped: sqltk::parser::ast::Value) -> Expr { format: None, }; - let encrypted_type = ObjectName(vec![Ident::new("eql_v1_encrypted")]); + let encrypted_type = ObjectName(vec![Ident::new("eql_v2_encrypted")]); Expr::Cast { kind: CastKind::DoubleColon, diff --git a/packages/eql-mapper/src/transformation_rules/rewrite_standard_sql_fns_on_eql_types.rs b/packages/eql-mapper/src/transformation_rules/rewrite_standard_sql_fns_on_eql_types.rs index cca69574..8dd12c09 100644 --- a/packages/eql-mapper/src/transformation_rules/rewrite_standard_sql_fns_on_eql_types.rs +++ b/packages/eql-mapper/src/transformation_rules/rewrite_standard_sql_fns_on_eql_types.rs @@ -42,7 +42,7 @@ impl<'ast> TransformationRule<'ast> for RewriteStandardSqlFnsOnEqlTypes<'ast> { { let function = target_node.downcast_mut::().unwrap(); let mut existing_name = mem::take(&mut function.name.0); - existing_name.insert(0, Ident::new("eql_v1")); + existing_name.insert(0, Ident::new("eql_v2")); function.name = ObjectName(existing_name); } } diff --git a/packages/eql-mapper/src/transformation_rules/wrap_eql_cols_in_order_by_with_ore_fn.rs b/packages/eql-mapper/src/transformation_rules/wrap_eql_cols_in_order_by_with_ore_fn.rs index d356bba5..f899a84f 100644 --- a/packages/eql-mapper/src/transformation_rules/wrap_eql_cols_in_order_by_with_ore_fn.rs +++ b/packages/eql-mapper/src/transformation_rules/wrap_eql_cols_in_order_by_with_ore_fn.rs @@ -11,7 +11,7 @@ use crate::{EqlMapperError, Type, Value}; use super::{helpers::wrap_in_1_arg_function, TransformationRule}; /// When an [`Expr`] of a [`SelectItem`] has an EQL type and that EQL type is used in a `GROUP BY` clause then -/// this rule wraps the `Expr` in a call to `eql_v1.grouped_value`. +/// this rule wraps the `Expr` in a call to `eql_v2.grouped_value`. /// /// # Example /// @@ -20,7 +20,7 @@ use super::{helpers::wrap_in_1_arg_function, TransformationRule}; /// SELECT eql_col FROM some_table GROUP BY eql_col; /// /// -- after mapping -/// SELECT eql_v1.grouped_value(eql_col) AS eql_col FROM some_table GROUP BY eql_v1.cs_ore_64_8(eql_col); +/// SELECT eql_v2.grouped_value(eql_col) AS eql_col FROM some_table GROUP BY eql_v2.cs_ore_64_8(eql_col); /// -- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^ /// -- ^ ^ ^ /// -- | | | @@ -54,7 +54,7 @@ impl<'ast> TransformationRule<'ast> for WrapEqlColsInOrderByWithOreFn<'ast> { target_node.expr = wrap_in_1_arg_function( expr_to_wrap, - ObjectName(vec![Ident::new("eql_v1"), Ident::new("ore_64_8_v1")]), + ObjectName(vec![Ident::new("eql_v2"), Ident::new("ore_64_8_v2")]), ); return Ok(true); diff --git a/packages/eql-mapper/src/transformation_rules/wrap_grouped_eql_col_in_aggregate_fn.rs b/packages/eql-mapper/src/transformation_rules/wrap_grouped_eql_col_in_aggregate_fn.rs index 5b5f8e0f..82dfa428 100644 --- a/packages/eql-mapper/src/transformation_rules/wrap_grouped_eql_col_in_aggregate_fn.rs +++ b/packages/eql-mapper/src/transformation_rules/wrap_grouped_eql_col_in_aggregate_fn.rs @@ -20,7 +20,7 @@ use super::{ /// SELECT eql_col FROM some_table GROUP BY eql_col; /// /// -- after mapping -/// SELECT eql_v1.grouped_value(eql_col) AS eql_col FROM some_table GROUP BY eql_v1.ore_64_8_v1(eql_col); +/// SELECT eql_v2.grouped_value(eql_col) AS eql_col FROM some_table GROUP BY eql_v2.ore_64_8_v2(eql_col); /// -- ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^ /// -- ^ ^ ^ /// -- | | | @@ -50,7 +50,7 @@ impl<'ast> TransformationRule<'ast> for WrapGroupedEqlColInAggregateFn<'ast> { let target_node: &mut Expr = target_node.downcast_mut().unwrap(); *target_node = wrap_in_1_arg_function( expr.clone(), - ObjectName(vec![Ident::new("eql_v1"), Ident::new("cs_grouped_value")]), + ObjectName(vec![Ident::new("eql_v2"), Ident::new("grouped_value")]), ); return Ok(true); diff --git a/tests/benchmark/sql/benchmark-schema.sql b/tests/benchmark/sql/benchmark-schema.sql index 7d7b4503..7a5be642 100644 --- a/tests/benchmark/sql/benchmark-schema.sql +++ b/tests/benchmark/sql/benchmark-schema.sql @@ -1,4 +1,4 @@ -TRUNCATE TABLE public.eql_v1_configuration; +TRUNCATE TABLE public.eql_v2_configuration; DROP TABLE IF EXISTS benchmark_plaintext; CREATE TABLE benchmark_plaintext ( @@ -11,14 +11,14 @@ DROP TABLE IF EXISTS benchmark_encrypted; CREATE TABLE benchmark_encrypted ( id serial primary key, username text, - email eql_v1_encrypted + email eql_v2_encrypted ); -SELECT eql_v1.add_column( +SELECT eql_v2.add_column( 'benchmark_encrypted', 'email' ); -SELECT eql_v1.encrypt(); -SELECT eql_v1.activate(); +SELECT eql_v2.encrypt(); +SELECT eql_v2.activate(); diff --git a/tests/sql/schema-uninstall.sql b/tests/sql/schema-uninstall.sql index ae6630cf..0c5fbeac 100644 --- a/tests/sql/schema-uninstall.sql +++ b/tests/sql/schema-uninstall.sql @@ -1,4 +1,4 @@ -DROP TABLE IF EXISTS public.eql_v1_configuration; +DROP TABLE IF EXISTS public.eql_v2_configuration; -- Regular old table DROP TABLE IF EXISTS plaintext; diff --git a/tests/sql/schema.sql b/tests/sql/schema.sql index 57f5e29f..b23cbf74 100644 --- a/tests/sql/schema.sql +++ b/tests/sql/schema.sql @@ -1,4 +1,4 @@ -TRUNCATE TABLE public.eql_v1_configuration; +TRUNCATE TABLE public.eql_v2_configuration; -- Regular old table DROP TABLE IF EXISTS plaintext; @@ -13,95 +13,95 @@ DROP TABLE IF EXISTS encrypted; CREATE TABLE encrypted ( id bigint, plaintext text, - encrypted_text eql_v1_encrypted, - encrypted_bool eql_v1_encrypted, - encrypted_int2 eql_v1_encrypted, - encrypted_int4 eql_v1_encrypted, - encrypted_int8 eql_v1_encrypted, - encrypted_float8 eql_v1_encrypted, - encrypted_date eql_v1_encrypted, - encrypted_jsonb eql_v1_encrypted, + encrypted_text eql_v2_encrypted, + encrypted_bool eql_v2_encrypted, + encrypted_int2 eql_v2_encrypted, + encrypted_int4 eql_v2_encrypted, + encrypted_int8 eql_v2_encrypted, + encrypted_float8 eql_v2_encrypted, + encrypted_date eql_v2_encrypted, + encrypted_jsonb eql_v2_encrypted, PRIMARY KEY(id) ); DROP TABLE IF EXISTS unconfigured; CREATE TABLE unconfigured ( id bigint, - encrypted_unconfigured eql_v1_encrypted, + encrypted_unconfigured eql_v2_encrypted, PRIMARY KEY(id) ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_text', 'unique', 'text' ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_text', 'match', 'text' ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_text', 'ore', 'text' ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_bool', 'unique', 'boolean' ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_bool', 'ore', 'boolean' ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_int2', 'unique', 'small_int' ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_int2', 'ore', 'small_int' ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_int4', 'unique', 'int' ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_int4', 'ore', 'int' ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_int8', 'unique', 'big_int' ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_int8', 'ore', @@ -109,35 +109,35 @@ SELECT eql_v1.add_index( ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_float8', 'unique', 'double' ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_float8', 'ore', 'double' ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_date', 'unique', 'date' ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_date', 'ore', 'date' ); -SELECT eql_v1.add_index( +SELECT eql_v2.add_index( 'encrypted', 'encrypted_jsonb', 'ste_vec', @@ -145,5 +145,5 @@ SELECT eql_v1.add_index( '{"prefix": "encrypted/encrypted_jsonb"}' ); -SELECT eql_v1.encrypt(); -SELECT eql_v1.activate(); +SELECT eql_v2.encrypt(); +SELECT eql_v2.activate(); diff --git a/tests/tasks/test/integration/psql-passthrough.sh b/tests/tasks/test/integration/psql-passthrough.sh index c9319539..96ec1e4e 100755 --- a/tests/tasks/test/integration/psql-passthrough.sh +++ b/tests/tasks/test/integration/psql-passthrough.sh @@ -17,10 +17,10 @@ EOF # Confirm that there is indeed no config set +e -OUTPUT="$(docker exec -i postgres${CONTAINER_SUFFIX} psql 'postgresql://cipherstash:password@proxy:6432/cipherstash?sslmode=disable' --command 'SELECT * FROM eql_v1_configuration' 2>&1)" +OUTPUT="$(docker exec -i postgres${CONTAINER_SUFFIX} psql 'postgresql://cipherstash:password@proxy:6432/cipherstash?sslmode=disable' --command 'SELECT * FROM eql_v2_configuration' 2>&1)" retval=$? -if echo ${OUTPUT} | grep -v 'relation "eql_v1_configuration" does not exist'; then - echo "error: did not see string in output: \"relation "eql_v1_configuration" does not exist\"" +if echo ${OUTPUT} | grep -v 'relation "eql_v2_configuration" does not exist'; then + echo "error: did not see string in output: \"relation "eql_v2_configuration" does not exist\"" exit 1 fi