|
9 | 9 | use std::collections::BTreeMap; |
10 | 10 |
|
11 | 11 | use super::ast::PathElement; |
12 | | -use super::resolver::{ExpressionMaps, resolve_element_name, resolve_path}; |
| 12 | +use super::resolver::{ExpressionMaps, resolve_element_name, resolve_name_ref}; |
13 | 13 | use super::tokenizer::Token; |
14 | 14 | use crate::error::DynamoDbError; |
15 | 15 | use crate::types::{AttributeValue, Item}; |
@@ -39,131 +39,134 @@ pub fn parse_projection(tokens: &[Token]) -> Result<Vec<Vec<PathElement>>, Dynam |
39 | 39 | Ok(paths) |
40 | 40 | } |
41 | 41 |
|
| 42 | +/// A node in the projection trie. |
| 43 | +/// |
| 44 | +/// Each node selects part of the item. `terminal` means the whole value at |
| 45 | +/// this position is projected. Otherwise the node descends via `attrs` (for a |
| 46 | +/// map) or `indices` (for a list). A value is either a map or a list, so only |
| 47 | +/// one of the two child maps is populated on a given node in practice. |
| 48 | +/// |
| 49 | +/// `indices` is a `BTreeMap` so selected list elements come out in ascending |
| 50 | +/// original-index order, which is how DynamoDB compacts list projections. |
| 51 | +#[derive(Default)] |
| 52 | +struct ProjNode { |
| 53 | + terminal: bool, |
| 54 | + attrs: BTreeMap<String, ProjNode>, |
| 55 | + indices: BTreeMap<usize, ProjNode>, |
| 56 | +} |
| 57 | + |
42 | 58 | /// Apply a projection to an item, returning only the requested attributes. |
43 | 59 | /// |
| 60 | +/// List elements selected by index are returned in a new list compacted in |
| 61 | +/// ascending original-index order (matching Amazon DynamoDB), not in the order |
| 62 | +/// the indices appear in the expression. Map keys not on a projected path are |
| 63 | +/// dropped. The structure of the original item is otherwise preserved. |
| 64 | +/// |
44 | 65 | /// # Errors |
45 | 66 | /// |
46 | | -/// Returns `ValidationException` for unresolvable `#name` references. |
| 67 | +/// Returns `ValidationException` for unresolvable `#name` references or a path |
| 68 | +/// that starts with an index. |
47 | 69 | pub fn apply_projection( |
48 | 70 | item: &Item, |
49 | 71 | paths: &[Vec<PathElement>], |
50 | 72 | maps: &ExpressionMaps, |
51 | 73 | ) -> Result<Item, DynamoDbError> { |
52 | | - let mut result = BTreeMap::new(); |
| 74 | + let root = build_trie(paths, maps)?; |
53 | 75 |
|
54 | | - for path in paths { |
55 | | - if path.is_empty() { |
56 | | - continue; |
57 | | - } |
58 | | - let top_name = resolve_element_name(&path[0], maps)?; |
59 | | - if path.len() == 1 { |
60 | | - // Top-level attribute |
61 | | - if let Some(val) = item.get(top_name.as_ref()) { |
62 | | - result.insert(top_name.into_owned(), val.clone()); |
63 | | - } |
64 | | - } else { |
65 | | - // Nested path — resolve the value and insert at the top level |
66 | | - // with the nested structure preserved |
67 | | - if let Some(val) = resolve_path(path, item, maps)? { |
68 | | - insert_nested(&mut result, path, maps, val)?; |
69 | | - } |
| 76 | + let mut result = BTreeMap::new(); |
| 77 | + for (name, child) in &root.attrs { |
| 78 | + if let Some(val) = item.get(name) |
| 79 | + && let Some(projected) = project_value(val, child) |
| 80 | + { |
| 81 | + result.insert(name.clone(), projected); |
70 | 82 | } |
71 | 83 | } |
72 | | - |
73 | 84 | Ok(result) |
74 | 85 | } |
75 | 86 |
|
76 | | -/// Insert a value at a nested path in the result item, creating intermediate |
77 | | -/// maps/lists as needed. |
78 | | -/// |
79 | | -/// DynamoDB projection semantics: |
80 | | -/// - `mylist[N]` → `{"mylist": [value]}` |
81 | | -/// - `mylist[N].attr` → `{"mylist": [{"attr": value}]}` |
82 | | -/// - `mymap.attr` → `{"mymap": {"attr": value}}` |
83 | | -fn insert_nested( |
84 | | - result: &mut Item, |
85 | | - path: &[PathElement], |
| 87 | +/// Build the projection trie from the parsed paths, resolving `#name` refs. |
| 88 | +fn build_trie( |
| 89 | + paths: &[Vec<PathElement>], |
86 | 90 | maps: &ExpressionMaps, |
87 | | - value: &AttributeValue, |
88 | | -) -> Result<(), DynamoDbError> { |
89 | | - if path.is_empty() { |
90 | | - return Ok(()); |
91 | | - } |
92 | | - |
93 | | - let top_name = resolve_element_name(&path[0], maps)?.into_owned(); |
94 | | - |
95 | | - if path.len() == 1 { |
96 | | - result.insert(top_name, value.clone()); |
97 | | - return Ok(()); |
98 | | - } |
99 | | - |
100 | | - // Build the value from the inside out, starting from the leaf. |
101 | | - let wrapped = wrap_from_tail(&path[1..], maps, value)?; |
102 | | - let entry = result.entry(top_name); |
103 | | - match entry { |
104 | | - std::collections::btree_map::Entry::Vacant(e) => { |
105 | | - e.insert(wrapped); |
| 91 | +) -> Result<ProjNode, DynamoDbError> { |
| 92 | + let mut root = ProjNode::default(); |
| 93 | + for path in paths { |
| 94 | + if path.is_empty() { |
| 95 | + continue; |
106 | 96 | } |
107 | | - std::collections::btree_map::Entry::Occupied(mut e) => { |
108 | | - merge_projected(e.get_mut(), &wrapped); |
| 97 | + let mut node = &mut root; |
| 98 | + for (i, element) in path.iter().enumerate() { |
| 99 | + match element { |
| 100 | + PathElement::Attribute(_) => { |
| 101 | + // The first element must be an attribute; `resolve_element_name` |
| 102 | + // rejects an index-start path, matching the prior behavior. |
| 103 | + let name = if i == 0 { |
| 104 | + resolve_element_name(element, maps)? |
| 105 | + } else { |
| 106 | + let PathElement::Attribute(raw) = element else { |
| 107 | + unreachable!() |
| 108 | + }; |
| 109 | + resolve_name_ref(raw, maps)? |
| 110 | + }; |
| 111 | + node = node.attrs.entry(name.into_owned()).or_default(); |
| 112 | + } |
| 113 | + PathElement::Index(idx) => { |
| 114 | + if i == 0 { |
| 115 | + return Err(DynamoDbError::ValidationException( |
| 116 | + "Invalid expression: path cannot start with an index".to_owned(), |
| 117 | + )); |
| 118 | + } |
| 119 | + node = node.indices.entry(*idx).or_default(); |
| 120 | + } |
| 121 | + } |
109 | 122 | } |
| 123 | + node.terminal = true; |
110 | 124 | } |
111 | | - Ok(()) |
| 125 | + Ok(root) |
112 | 126 | } |
113 | 127 |
|
114 | | -/// Build the nested structure from a path tail and a leaf value. |
115 | | -/// E.g. for path `[Index(0), Attribute("val")]` and value `"target"`, |
116 | | -/// produces `L([M({"val": "target"})])`. |
117 | | -fn wrap_from_tail( |
118 | | - path: &[PathElement], |
119 | | - maps: &ExpressionMaps, |
120 | | - value: &AttributeValue, |
121 | | -) -> Result<AttributeValue, DynamoDbError> { |
122 | | - if path.is_empty() { |
123 | | - return Ok(value.clone()); |
| 128 | +/// Project a single value against a trie node. |
| 129 | +/// |
| 130 | +/// Returns `None` when nothing is selected (missing key, out-of-bounds index, |
| 131 | +/// or a path that does not match the value's type), so the caller omits the |
| 132 | +/// attribute entirely. |
| 133 | +fn project_value(value: &AttributeValue, node: &ProjNode) -> Option<AttributeValue> { |
| 134 | + if node.terminal { |
| 135 | + return Some(value.clone()); |
124 | 136 | } |
125 | 137 |
|
126 | | - match &path[0] { |
127 | | - PathElement::Attribute(name) => { |
128 | | - let resolved = super::resolver::resolve_name_ref(name, maps)?; |
129 | | - let inner = wrap_from_tail(&path[1..], maps, value)?; |
130 | | - let mut map = BTreeMap::new(); |
131 | | - map.insert(resolved.into_owned(), inner); |
132 | | - Ok(AttributeValue::M(map)) |
133 | | - } |
134 | | - PathElement::Index(_) => { |
135 | | - let inner = wrap_from_tail(&path[1..], maps, value)?; |
136 | | - Ok(AttributeValue::L(vec![inner])) |
| 138 | + if !node.attrs.is_empty() { |
| 139 | + let AttributeValue::M(map) = value else { |
| 140 | + return None; |
| 141 | + }; |
| 142 | + let mut out = BTreeMap::new(); |
| 143 | + for (name, child) in &node.attrs { |
| 144 | + if let Some(child_val) = map.get(name) |
| 145 | + && let Some(projected) = project_value(child_val, child) |
| 146 | + { |
| 147 | + out.insert(name.clone(), projected); |
| 148 | + } |
137 | 149 | } |
| 150 | + return (!out.is_empty()).then_some(AttributeValue::M(out)); |
138 | 151 | } |
139 | | -} |
140 | 152 |
|
141 | | -/// Merge a projected value into an existing structure (for multiple projections |
142 | | -/// on the same top-level attribute). |
143 | | -fn merge_projected(existing: &mut AttributeValue, new: &AttributeValue) { |
144 | | - match (existing, new) { |
145 | | - (AttributeValue::M(existing_map), AttributeValue::M(new_map)) => { |
146 | | - for (k, v) in new_map { |
147 | | - match existing_map.get_mut(k) { |
148 | | - Some(existing_v) => merge_projected(existing_v, v), |
149 | | - None => { |
150 | | - existing_map.insert(k.clone(), v.clone()); |
151 | | - } |
152 | | - } |
| 153 | + if !node.indices.is_empty() { |
| 154 | + let AttributeValue::L(list) = value else { |
| 155 | + return None; |
| 156 | + }; |
| 157 | + let mut out = Vec::new(); |
| 158 | + // BTreeMap iterates indices ascending, compacting the projected list. |
| 159 | + for (idx, child) in &node.indices { |
| 160 | + if let Some(element) = list.get(*idx) |
| 161 | + && let Some(projected) = project_value(element, child) |
| 162 | + { |
| 163 | + out.push(projected); |
153 | 164 | } |
154 | 165 | } |
155 | | - (AttributeValue::L(existing_list), AttributeValue::L(new_list)) => { |
156 | | - // For list projections, DynamoDB merges into the single-element list |
157 | | - if existing_list.len() == 1 && new_list.len() == 1 { |
158 | | - merge_projected(&mut existing_list[0], &new_list[0]); |
159 | | - } else { |
160 | | - existing_list.extend(new_list.iter().cloned()); |
161 | | - } |
162 | | - } |
163 | | - (existing, new) => { |
164 | | - *existing = new.clone(); |
165 | | - } |
| 166 | + return (!out.is_empty()).then_some(AttributeValue::L(out)); |
166 | 167 | } |
| 168 | + |
| 169 | + None |
167 | 170 | } |
168 | 171 |
|
169 | 172 | #[cfg(test)] |
@@ -298,6 +301,158 @@ mod tests { |
298 | 301 | let result = project("mylist[5]", &item, HashMap::new()).unwrap(); |
299 | 302 | assert!(result.is_empty()); |
300 | 303 | } |
| 304 | + |
| 305 | + fn list_item() -> Item { |
| 306 | + let mut item = BTreeMap::new(); |
| 307 | + item.insert("pk".into(), AttributeValue::S("p1".into())); |
| 308 | + item.insert( |
| 309 | + "mylist".into(), |
| 310 | + AttributeValue::L(vec![ |
| 311 | + AttributeValue::S("zero".into()), |
| 312 | + AttributeValue::S("one".into()), |
| 313 | + AttributeValue::S("two".into()), |
| 314 | + AttributeValue::S("three".into()), |
| 315 | + ]), |
| 316 | + ); |
| 317 | + item.insert( |
| 318 | + "with_null".into(), |
| 319 | + AttributeValue::L(vec![ |
| 320 | + AttributeValue::S("keep0".into()), |
| 321 | + AttributeValue::Null, |
| 322 | + AttributeValue::S("keep2".into()), |
| 323 | + ]), |
| 324 | + ); |
| 325 | + item |
| 326 | + } |
| 327 | + |
| 328 | + fn assert_list(result: &Item, key: &str, expected: &[AttributeValue]) { |
| 329 | + match result.get(key) { |
| 330 | + Some(AttributeValue::L(list)) => assert_eq!(list.as_slice(), expected), |
| 331 | + other => panic!("Expected L for {key}, got {other:?}"), |
| 332 | + } |
| 333 | + } |
| 334 | + |
| 335 | + #[test] |
| 336 | + fn project_two_list_indices_compacted() { |
| 337 | + let item = list_item(); |
| 338 | + let result = project("mylist[0], mylist[2]", &item, HashMap::new()).unwrap(); |
| 339 | + assert_list( |
| 340 | + &result, |
| 341 | + "mylist", |
| 342 | + &[ |
| 343 | + AttributeValue::S("zero".into()), |
| 344 | + AttributeValue::S("two".into()), |
| 345 | + ], |
| 346 | + ); |
| 347 | + } |
| 348 | + |
| 349 | + #[test] |
| 350 | + fn project_list_indices_ordered_by_index_not_expression() { |
| 351 | + let item = list_item(); |
| 352 | + // Reversed expression order still comes out in ascending index order. |
| 353 | + let result = project("mylist[2], mylist[0]", &item, HashMap::new()).unwrap(); |
| 354 | + assert_list( |
| 355 | + &result, |
| 356 | + "mylist", |
| 357 | + &[ |
| 358 | + AttributeValue::S("zero".into()), |
| 359 | + AttributeValue::S("two".into()), |
| 360 | + ], |
| 361 | + ); |
| 362 | + } |
| 363 | + |
| 364 | + #[test] |
| 365 | + fn project_list_index_gap_compacted() { |
| 366 | + let item = list_item(); |
| 367 | + let result = project("mylist[1], mylist[3]", &item, HashMap::new()).unwrap(); |
| 368 | + assert_list( |
| 369 | + &result, |
| 370 | + "mylist", |
| 371 | + &[ |
| 372 | + AttributeValue::S("one".into()), |
| 373 | + AttributeValue::S("three".into()), |
| 374 | + ], |
| 375 | + ); |
| 376 | + } |
| 377 | + |
| 378 | + #[test] |
| 379 | + fn project_null_element_by_index_preserved() { |
| 380 | + let item = list_item(); |
| 381 | + let result = project("with_null[1]", &item, HashMap::new()).unwrap(); |
| 382 | + assert_list(&result, "with_null", &[AttributeValue::Null]); |
| 383 | + } |
| 384 | + |
| 385 | + #[test] |
| 386 | + fn project_unselected_null_dropped() { |
| 387 | + let item = list_item(); |
| 388 | + let result = project("with_null[0], with_null[2]", &item, HashMap::new()).unwrap(); |
| 389 | + assert_list( |
| 390 | + &result, |
| 391 | + "with_null", |
| 392 | + &[ |
| 393 | + AttributeValue::S("keep0".into()), |
| 394 | + AttributeValue::S("keep2".into()), |
| 395 | + ], |
| 396 | + ); |
| 397 | + } |
| 398 | + |
| 399 | + #[test] |
| 400 | + fn project_whole_list_preserves_null() { |
| 401 | + let item = list_item(); |
| 402 | + let result = project("with_null", &item, HashMap::new()).unwrap(); |
| 403 | + assert_list( |
| 404 | + &result, |
| 405 | + "with_null", |
| 406 | + &[ |
| 407 | + AttributeValue::S("keep0".into()), |
| 408 | + AttributeValue::Null, |
| 409 | + AttributeValue::S("keep2".into()), |
| 410 | + ], |
| 411 | + ); |
| 412 | + } |
| 413 | + |
| 414 | + #[test] |
| 415 | + fn project_list_of_maps_subfield_multi() { |
| 416 | + let mut item = BTreeMap::new(); |
| 417 | + item.insert("pk".into(), AttributeValue::S("p".into())); |
| 418 | + let mk = |v: &str, x: &str| { |
| 419 | + let mut m = BTreeMap::new(); |
| 420 | + m.insert("val".into(), AttributeValue::S(v.into())); |
| 421 | + m.insert("x".into(), AttributeValue::S(x.into())); |
| 422 | + AttributeValue::M(m) |
| 423 | + }; |
| 424 | + item.insert( |
| 425 | + "lom".into(), |
| 426 | + AttributeValue::L(vec![mk("a0", "x0"), mk("a1", "x1"), mk("a2", "x2")]), |
| 427 | + ); |
| 428 | + |
| 429 | + let result = project("lom[0].val, lom[2].val", &item, HashMap::new()).unwrap(); |
| 430 | + let only_val = |v: &str| { |
| 431 | + let mut m = BTreeMap::new(); |
| 432 | + m.insert("val".into(), AttributeValue::S(v.into())); |
| 433 | + AttributeValue::M(m) |
| 434 | + }; |
| 435 | + assert_list(&result, "lom", &[only_val("a0"), only_val("a2")]); |
| 436 | + } |
| 437 | + |
| 438 | + #[test] |
| 439 | + fn project_same_index_merges_subfields() { |
| 440 | + let mut item = BTreeMap::new(); |
| 441 | + item.insert("pk".into(), AttributeValue::S("p".into())); |
| 442 | + let mut m = BTreeMap::new(); |
| 443 | + m.insert("a".into(), AttributeValue::S("av".into())); |
| 444 | + m.insert("b".into(), AttributeValue::S("bv".into())); |
| 445 | + m.insert("c".into(), AttributeValue::S("cv".into())); |
| 446 | + item.insert("l".into(), AttributeValue::L(vec![AttributeValue::M(m)])); |
| 447 | + |
| 448 | + // Two paths selecting the same element merge their subfields. |
| 449 | + let result = project("l[0].a, l[0].c", &item, HashMap::new()).unwrap(); |
| 450 | + let mut expected = BTreeMap::new(); |
| 451 | + expected.insert("a".into(), AttributeValue::S("av".into())); |
| 452 | + expected.insert("c".into(), AttributeValue::S("cv".into())); |
| 453 | + assert_list(&result, "l", &[AttributeValue::M(expected)]); |
| 454 | + } |
| 455 | + |
301 | 456 | #[test] |
302 | 457 | fn project_list_index_into_map_preserves_structure() { |
303 | 458 | let mut item = BTreeMap::new(); |
|
0 commit comments