Skip to content

Commit 32df58e

Browse files
authored
Merge pull request #355 from cipherstash/jsonb-term-filters
feat(ste-vec): add term_filters support with integration tests
2 parents c9c810c + 42a81ea commit 32df58e

5 files changed

Lines changed: 258 additions & 2 deletions

File tree

packages/cipherstash-proxy-integration/src/common.rs

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,82 @@ pub async fn assert_encrypted_jsonb(id: i64, plaintext: &Value) {
371371
}
372372
}
373373

374+
/// Insert a JSON value into the encrypted_jsonb_filtered column (with downcase term filter).
375+
pub async fn insert_jsonb_filtered() -> (i64, Value) {
376+
let id = random_id();
377+
378+
let encrypted_jsonb = serde_json::json!({
379+
"id": id,
380+
"name": "John",
381+
"city": "Melbourne",
382+
"nested": {
383+
"title": "Engineer",
384+
"department": "Technology",
385+
},
386+
"tags": ["Hello", "World"],
387+
});
388+
389+
let sql = "INSERT INTO encrypted (id, encrypted_jsonb_filtered) VALUES ($1, $2)".to_string();
390+
391+
insert(&sql, &[&id, &encrypted_jsonb]).await;
392+
393+
// Verify encryption actually occurred
394+
assert_encrypted_jsonb_filtered(id, &encrypted_jsonb).await;
395+
396+
(id, encrypted_jsonb)
397+
}
398+
399+
/// Insert multiple JSON values for term filter search testing.
400+
/// Creates rows with mixed case strings that should match when queried with lowercase.
401+
pub async fn insert_jsonb_filtered_for_search() -> Vec<(i64, Value)> {
402+
let test_data = vec![
403+
serde_json::json!({"name": "Alice", "number": 1}),
404+
serde_json::json!({"name": "BOB", "number": 2}),
405+
serde_json::json!({"name": "Charlie", "number": 3}),
406+
serde_json::json!({"name": "DIANA", "number": 4}),
407+
serde_json::json!({"name": "Eve", "number": 5}),
408+
];
409+
410+
let mut results = Vec::new();
411+
412+
for encrypted_jsonb in test_data {
413+
let id = random_id();
414+
415+
let sql = "INSERT INTO encrypted (id, encrypted_jsonb_filtered) VALUES ($1, $2)";
416+
insert(sql, &[&id, &encrypted_jsonb]).await;
417+
418+
// Verify encryption actually occurred
419+
assert_encrypted_jsonb_filtered(id, &encrypted_jsonb).await;
420+
421+
results.push((id, encrypted_jsonb));
422+
}
423+
424+
results
425+
}
426+
427+
/// Verifies that a JSON value in encrypted_jsonb_filtered was actually encrypted.
428+
pub async fn assert_encrypted_jsonb_filtered(id: i64, plaintext: &Value) {
429+
let sql = "SELECT encrypted_jsonb_filtered::text FROM encrypted WHERE id = $1";
430+
let stored: Vec<String> = query_direct_by(sql, &id).await;
431+
432+
assert_eq!(stored.len(), 1, "Expected exactly one row");
433+
let stored_text = &stored[0];
434+
435+
let plaintext_str = plaintext.to_string();
436+
assert_ne!(
437+
stored_text, &plaintext_str,
438+
"ENCRYPTION FAILED for encrypted_jsonb_filtered: Stored value matches plaintext! Data was not encrypted."
439+
);
440+
441+
// Additional verification: the encrypted format should be different structure
442+
if let Ok(stored_json) = serde_json::from_str::<Value>(stored_text) {
443+
assert_ne!(
444+
stored_json, *plaintext,
445+
"ENCRYPTION FAILED for encrypted_jsonb_filtered: Stored JSON structure matches plaintext!"
446+
);
447+
}
448+
}
449+
374450
/// Verifies that a numeric value was actually encrypted in the database.
375451
/// Queries directly (bypassing proxy) and asserts stored value differs from plaintext.
376452
pub async fn assert_encrypted_numeric<T>(id: i64, column: &str, plaintext: T)
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
//! Tests for term filters on SteVec indexes.
2+
//!
3+
//! The `encrypted_jsonb_filtered` column has a downcase term filter configured,
4+
//! meaning all string values are lowercased before encryption. This enables
5+
//! case-insensitive queries - but note that the decrypted data is also lowercased.
6+
7+
#[cfg(test)]
8+
mod tests {
9+
use crate::common::{
10+
clear, insert_jsonb_filtered, insert_jsonb_filtered_for_search, query_by_params,
11+
simple_query, trace,
12+
};
13+
use crate::support::json_path::JsonPath;
14+
use serde_json::Value;
15+
16+
/// Test case-insensitive equality matching with the downcase term filter.
17+
/// Data is inserted with mixed case ("Alice", "BOB") but stored/returned as lowercase.
18+
#[tokio::test]
19+
async fn select_jsonb_filtered_case_insensitive_eq() {
20+
trace();
21+
clear().await;
22+
insert_jsonb_filtered_for_search().await;
23+
24+
// Query with lowercase "alice" should match the row originally inserted as "Alice"
25+
let selector = "name";
26+
let value = Value::from("alice");
27+
28+
// Extended protocol
29+
let sql =
30+
"SELECT encrypted_jsonb_filtered FROM encrypted WHERE encrypted_jsonb_filtered -> $1 = $2";
31+
let actual = query_by_params::<Value>(sql, &[&selector, &value]).await;
32+
33+
// Term filter lowercases during encryption, so returned value is lowercase
34+
assert_eq!(actual.len(), 1);
35+
assert_eq!(actual[0]["name"], "alice");
36+
assert_eq!(actual[0]["number"], 1);
37+
}
38+
39+
/// Test that data inserted with uppercase is stored and returned as lowercase
40+
#[tokio::test]
41+
async fn select_jsonb_filtered_uppercase_query_matches() {
42+
trace();
43+
clear().await;
44+
insert_jsonb_filtered_for_search().await;
45+
46+
// Query with "bob" should match the row originally inserted as "BOB"
47+
let selector = "name";
48+
let value = Value::from("bob");
49+
50+
let sql =
51+
"SELECT encrypted_jsonb_filtered FROM encrypted WHERE encrypted_jsonb_filtered -> $1 = $2";
52+
let actual = query_by_params::<Value>(sql, &[&selector, &value]).await;
53+
54+
// Both stored and queried values are lowercased
55+
assert_eq!(actual.len(), 1);
56+
assert_eq!(actual[0]["name"], "bob");
57+
assert_eq!(actual[0]["number"], 2);
58+
}
59+
60+
/// Test simple protocol with case-insensitive matching
61+
#[tokio::test]
62+
async fn select_jsonb_filtered_simple_protocol() {
63+
trace();
64+
clear().await;
65+
insert_jsonb_filtered_for_search().await;
66+
67+
// Simple protocol query - value is lowercased on both sides
68+
let sql =
69+
"SELECT encrypted_jsonb_filtered FROM encrypted WHERE encrypted_jsonb_filtered -> 'name' = '\"charlie\"'";
70+
let actual = simple_query::<Value>(sql).await;
71+
72+
assert_eq!(actual.len(), 1);
73+
assert_eq!(actual[0]["name"], "charlie");
74+
assert_eq!(actual[0]["number"], 3);
75+
}
76+
77+
/// Test that numbers are not affected by the downcase filter
78+
#[tokio::test]
79+
async fn select_jsonb_filtered_numbers_unchanged() {
80+
trace();
81+
clear().await;
82+
insert_jsonb_filtered_for_search().await;
83+
84+
let selector = "number";
85+
let value = Value::from(4);
86+
87+
let sql =
88+
"SELECT encrypted_jsonb_filtered FROM encrypted WHERE encrypted_jsonb_filtered -> $1 = $2";
89+
let actual = query_by_params::<Value>(sql, &[&selector, &value]).await;
90+
91+
assert_eq!(actual.len(), 1);
92+
// Name is lowercased by term filter
93+
assert_eq!(actual[0]["name"], "diana");
94+
assert_eq!(actual[0]["number"], 4);
95+
}
96+
97+
/// Test case-insensitive matching using jsonb_path_query_first
98+
#[tokio::test]
99+
async fn select_jsonb_filtered_path_query_case_insensitive() {
100+
trace();
101+
clear().await;
102+
insert_jsonb_filtered_for_search().await;
103+
104+
let json_path_selector = JsonPath::new("name");
105+
let value = Value::from("eve");
106+
107+
let sql =
108+
"SELECT encrypted_jsonb_filtered FROM encrypted WHERE jsonb_path_query_first(encrypted_jsonb_filtered, $1) = $2";
109+
let actual = query_by_params::<Value>(sql, &[&json_path_selector, &value]).await;
110+
111+
assert_eq!(actual.len(), 1);
112+
assert_eq!(actual[0]["name"], "eve");
113+
assert_eq!(actual[0]["number"], 5);
114+
}
115+
116+
/// Test nested field access with term filter
117+
#[tokio::test]
118+
async fn select_jsonb_filtered_nested_case_insensitive() {
119+
trace();
120+
clear().await;
121+
let (_id, _) = insert_jsonb_filtered().await;
122+
123+
// The fixture has nested.title = "Engineer" which gets lowercased
124+
// Query with lowercase should match
125+
let json_path_selector = JsonPath::new("nested.title");
126+
let value = Value::from("engineer");
127+
128+
let sql =
129+
"SELECT encrypted_jsonb_filtered FROM encrypted WHERE jsonb_path_query_first(encrypted_jsonb_filtered, $1) = $2";
130+
let actual = query_by_params::<Value>(sql, &[&json_path_selector, &value]).await;
131+
132+
assert_eq!(actual.len(), 1);
133+
assert_eq!(actual[0]["nested"]["title"], "engineer");
134+
}
135+
136+
/// Test that original fixture data is correctly inserted and queryable
137+
#[tokio::test]
138+
async fn select_jsonb_filtered_fixture_data() {
139+
trace();
140+
clear().await;
141+
let (_id, _expected) = insert_jsonb_filtered().await;
142+
143+
// Query by name field - both query and stored data are lowercased
144+
let selector = "name";
145+
let value = Value::from("john");
146+
147+
let sql =
148+
"SELECT encrypted_jsonb_filtered FROM encrypted WHERE encrypted_jsonb_filtered -> $1 = $2";
149+
let actual = query_by_params::<Value>(sql, &[&selector, &value]).await;
150+
151+
assert_eq!(actual.len(), 1);
152+
assert_eq!(actual[0]["name"], "john");
153+
assert_eq!(actual[0]["city"], "melbourne");
154+
}
155+
}

packages/cipherstash-proxy-integration/src/select/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ mod jsonb_get_field_as_ciphertext;
99
mod jsonb_path_exists;
1010
mod jsonb_path_query;
1111
mod jsonb_path_query_first;
12+
mod jsonb_term_filter;
1213
mod order_by;
1314
mod order_by_with_null;
1415
mod pg_catalog;

packages/cipherstash-proxy/src/proxy/encrypt_config/config.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,8 @@ pub struct MatchIndexOpts {
9696
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
9797
pub struct SteVecIndexOpts {
9898
prefix: String,
99+
#[serde(default)]
100+
term_filters: Vec<TokenFilter>,
99101
}
100102

101103
fn default_tokenizer() -> Tokenizer {
@@ -182,10 +184,14 @@ impl Column {
182184
}))
183185
}
184186

185-
if let Some(SteVecIndexOpts { prefix }) = self.indexes.ste_vec_index {
187+
if let Some(SteVecIndexOpts {
188+
prefix,
189+
term_filters,
190+
}) = self.indexes.ste_vec_index
191+
{
186192
config = config.add_index(Index::new(IndexType::SteVec {
187193
prefix,
188-
term_filters: vec![],
194+
term_filters,
189195
}))
190196
}
191197

tests/sql/schema.sql

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ CREATE TABLE encrypted (
3333
encrypted_float8 eql_v2_encrypted,
3434
encrypted_date eql_v2_encrypted,
3535
encrypted_jsonb eql_v2_encrypted,
36+
encrypted_jsonb_filtered eql_v2_encrypted,
3637
PRIMARY KEY(id)
3738
);
3839

@@ -157,6 +158,14 @@ SELECT eql_v2.add_search_config(
157158
'{"prefix": "encrypted/encrypted_jsonb"}'
158159
);
159160

161+
SELECT eql_v2.add_search_config(
162+
'encrypted',
163+
'encrypted_jsonb_filtered',
164+
'ste_vec',
165+
'jsonb',
166+
'{"prefix": "encrypted/encrypted_jsonb_filtered", "term_filters": [{"kind": "downcase"}]}'
167+
);
168+
160169
SELECT eql_v2.add_encrypted_constraint('encrypted', 'encrypted_text');
161170

162171

@@ -177,6 +186,7 @@ CREATE TABLE encrypted_elixir (
177186
encrypted_float8 eql_v2_encrypted,
178187
encrypted_date eql_v2_encrypted,
179188
encrypted_jsonb eql_v2_encrypted,
189+
encrypted_jsonb_filtered eql_v2_encrypted,
180190
PRIMARY KEY(id)
181191
);
182192

@@ -301,5 +311,13 @@ SELECT eql_v2.add_search_config(
301311
'{"prefix": "encrypted/encrypted_jsonb"}'
302312
);
303313

314+
SELECT eql_v2.add_search_config(
315+
'encrypted_elixir',
316+
'encrypted_jsonb_filtered',
317+
'ste_vec',
318+
'jsonb',
319+
'{"prefix": "encrypted/encrypted_jsonb_filtered", "term_filters": [{"kind": "downcase"}]}'
320+
);
321+
304322
SELECT eql_v2.add_encrypted_constraint('encrypted_elixir', 'encrypted_text');
305323

0 commit comments

Comments
 (0)