Skip to content

Commit cbea20f

Browse files
yesyayenpdf-amzn
authored andcommitted
fix: compact nested projection list elements by index
Multi-index list projections (e.g. `a[0], a[2]`) collapsed to one element: each path built a one-element list and the merge overwrote it. Replace the per-path wrap/merge with a projection trie that walks the item once, emitting selected indices in ascending order. Preserves projected NULLs and merges same-index sub-paths.
1 parent ac4abf6 commit cbea20f

2 files changed

Lines changed: 398 additions & 97 deletions

File tree

crates/core/src/expression/projection.rs

Lines changed: 252 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
use std::collections::BTreeMap;
1010

1111
use super::ast::PathElement;
12-
use super::resolver::{ExpressionMaps, resolve_element_name, resolve_path};
12+
use super::resolver::{ExpressionMaps, resolve_element_name, resolve_name_ref};
1313
use super::tokenizer::Token;
1414
use crate::error::DynamoDbError;
1515
use crate::types::{AttributeValue, Item};
@@ -39,131 +39,134 @@ pub fn parse_projection(tokens: &[Token]) -> Result<Vec<Vec<PathElement>>, Dynam
3939
Ok(paths)
4040
}
4141

42+
/// A node in the projection trie.
43+
///
44+
/// Each node selects part of the item. `terminal` means the whole value at
45+
/// this position is projected. Otherwise the node descends via `attrs` (for a
46+
/// map) or `indices` (for a list). A value is either a map or a list, so only
47+
/// one of the two child maps is populated on a given node in practice.
48+
///
49+
/// `indices` is a `BTreeMap` so selected list elements come out in ascending
50+
/// original-index order, which is how DynamoDB compacts list projections.
51+
#[derive(Default)]
52+
struct ProjNode {
53+
terminal: bool,
54+
attrs: BTreeMap<String, ProjNode>,
55+
indices: BTreeMap<usize, ProjNode>,
56+
}
57+
4258
/// Apply a projection to an item, returning only the requested attributes.
4359
///
60+
/// List elements selected by index are returned in a new list compacted in
61+
/// ascending original-index order (matching Amazon DynamoDB), not in the order
62+
/// the indices appear in the expression. Map keys not on a projected path are
63+
/// dropped. The structure of the original item is otherwise preserved.
64+
///
4465
/// # Errors
4566
///
46-
/// Returns `ValidationException` for unresolvable `#name` references.
67+
/// Returns `ValidationException` for unresolvable `#name` references or a path
68+
/// that starts with an index.
4769
pub fn apply_projection(
4870
item: &Item,
4971
paths: &[Vec<PathElement>],
5072
maps: &ExpressionMaps,
5173
) -> Result<Item, DynamoDbError> {
52-
let mut result = BTreeMap::new();
74+
let root = build_trie(paths, maps)?;
5375

54-
for path in paths {
55-
if path.is_empty() {
56-
continue;
57-
}
58-
let top_name = resolve_element_name(&path[0], maps)?;
59-
if path.len() == 1 {
60-
// Top-level attribute
61-
if let Some(val) = item.get(top_name.as_ref()) {
62-
result.insert(top_name.into_owned(), val.clone());
63-
}
64-
} else {
65-
// Nested path — resolve the value and insert at the top level
66-
// with the nested structure preserved
67-
if let Some(val) = resolve_path(path, item, maps)? {
68-
insert_nested(&mut result, path, maps, val)?;
69-
}
76+
let mut result = BTreeMap::new();
77+
for (name, child) in &root.attrs {
78+
if let Some(val) = item.get(name)
79+
&& let Some(projected) = project_value(val, child)
80+
{
81+
result.insert(name.clone(), projected);
7082
}
7183
}
72-
7384
Ok(result)
7485
}
7586

76-
/// Insert a value at a nested path in the result item, creating intermediate
77-
/// maps/lists as needed.
78-
///
79-
/// DynamoDB projection semantics:
80-
/// - `mylist[N]` → `{"mylist": [value]}`
81-
/// - `mylist[N].attr` → `{"mylist": [{"attr": value}]}`
82-
/// - `mymap.attr` → `{"mymap": {"attr": value}}`
83-
fn insert_nested(
84-
result: &mut Item,
85-
path: &[PathElement],
87+
/// Build the projection trie from the parsed paths, resolving `#name` refs.
88+
fn build_trie(
89+
paths: &[Vec<PathElement>],
8690
maps: &ExpressionMaps,
87-
value: &AttributeValue,
88-
) -> Result<(), DynamoDbError> {
89-
if path.is_empty() {
90-
return Ok(());
91-
}
92-
93-
let top_name = resolve_element_name(&path[0], maps)?.into_owned();
94-
95-
if path.len() == 1 {
96-
result.insert(top_name, value.clone());
97-
return Ok(());
98-
}
99-
100-
// Build the value from the inside out, starting from the leaf.
101-
let wrapped = wrap_from_tail(&path[1..], maps, value)?;
102-
let entry = result.entry(top_name);
103-
match entry {
104-
std::collections::btree_map::Entry::Vacant(e) => {
105-
e.insert(wrapped);
91+
) -> Result<ProjNode, DynamoDbError> {
92+
let mut root = ProjNode::default();
93+
for path in paths {
94+
if path.is_empty() {
95+
continue;
10696
}
107-
std::collections::btree_map::Entry::Occupied(mut e) => {
108-
merge_projected(e.get_mut(), &wrapped);
97+
let mut node = &mut root;
98+
for (i, element) in path.iter().enumerate() {
99+
match element {
100+
PathElement::Attribute(_) => {
101+
// The first element must be an attribute; `resolve_element_name`
102+
// rejects an index-start path, matching the prior behavior.
103+
let name = if i == 0 {
104+
resolve_element_name(element, maps)?
105+
} else {
106+
let PathElement::Attribute(raw) = element else {
107+
unreachable!()
108+
};
109+
resolve_name_ref(raw, maps)?
110+
};
111+
node = node.attrs.entry(name.into_owned()).or_default();
112+
}
113+
PathElement::Index(idx) => {
114+
if i == 0 {
115+
return Err(DynamoDbError::ValidationException(
116+
"Invalid expression: path cannot start with an index".to_owned(),
117+
));
118+
}
119+
node = node.indices.entry(*idx).or_default();
120+
}
121+
}
109122
}
123+
node.terminal = true;
110124
}
111-
Ok(())
125+
Ok(root)
112126
}
113127

114-
/// Build the nested structure from a path tail and a leaf value.
115-
/// E.g. for path `[Index(0), Attribute("val")]` and value `"target"`,
116-
/// produces `L([M({"val": "target"})])`.
117-
fn wrap_from_tail(
118-
path: &[PathElement],
119-
maps: &ExpressionMaps,
120-
value: &AttributeValue,
121-
) -> Result<AttributeValue, DynamoDbError> {
122-
if path.is_empty() {
123-
return Ok(value.clone());
128+
/// Project a single value against a trie node.
129+
///
130+
/// Returns `None` when nothing is selected (missing key, out-of-bounds index,
131+
/// or a path that does not match the value's type), so the caller omits the
132+
/// attribute entirely.
133+
fn project_value(value: &AttributeValue, node: &ProjNode) -> Option<AttributeValue> {
134+
if node.terminal {
135+
return Some(value.clone());
124136
}
125137

126-
match &path[0] {
127-
PathElement::Attribute(name) => {
128-
let resolved = super::resolver::resolve_name_ref(name, maps)?;
129-
let inner = wrap_from_tail(&path[1..], maps, value)?;
130-
let mut map = BTreeMap::new();
131-
map.insert(resolved.into_owned(), inner);
132-
Ok(AttributeValue::M(map))
133-
}
134-
PathElement::Index(_) => {
135-
let inner = wrap_from_tail(&path[1..], maps, value)?;
136-
Ok(AttributeValue::L(vec![inner]))
138+
if !node.attrs.is_empty() {
139+
let AttributeValue::M(map) = value else {
140+
return None;
141+
};
142+
let mut out = BTreeMap::new();
143+
for (name, child) in &node.attrs {
144+
if let Some(child_val) = map.get(name)
145+
&& let Some(projected) = project_value(child_val, child)
146+
{
147+
out.insert(name.clone(), projected);
148+
}
137149
}
150+
return (!out.is_empty()).then_some(AttributeValue::M(out));
138151
}
139-
}
140152

141-
/// Merge a projected value into an existing structure (for multiple projections
142-
/// on the same top-level attribute).
143-
fn merge_projected(existing: &mut AttributeValue, new: &AttributeValue) {
144-
match (existing, new) {
145-
(AttributeValue::M(existing_map), AttributeValue::M(new_map)) => {
146-
for (k, v) in new_map {
147-
match existing_map.get_mut(k) {
148-
Some(existing_v) => merge_projected(existing_v, v),
149-
None => {
150-
existing_map.insert(k.clone(), v.clone());
151-
}
152-
}
153+
if !node.indices.is_empty() {
154+
let AttributeValue::L(list) = value else {
155+
return None;
156+
};
157+
let mut out = Vec::new();
158+
// BTreeMap iterates indices ascending, compacting the projected list.
159+
for (idx, child) in &node.indices {
160+
if let Some(element) = list.get(*idx)
161+
&& let Some(projected) = project_value(element, child)
162+
{
163+
out.push(projected);
153164
}
154165
}
155-
(AttributeValue::L(existing_list), AttributeValue::L(new_list)) => {
156-
// For list projections, DynamoDB merges into the single-element list
157-
if existing_list.len() == 1 && new_list.len() == 1 {
158-
merge_projected(&mut existing_list[0], &new_list[0]);
159-
} else {
160-
existing_list.extend(new_list.iter().cloned());
161-
}
162-
}
163-
(existing, new) => {
164-
*existing = new.clone();
165-
}
166+
return (!out.is_empty()).then_some(AttributeValue::L(out));
166167
}
168+
169+
None
167170
}
168171

169172
#[cfg(test)]
@@ -298,6 +301,158 @@ mod tests {
298301
let result = project("mylist[5]", &item, HashMap::new()).unwrap();
299302
assert!(result.is_empty());
300303
}
304+
305+
fn list_item() -> Item {
306+
let mut item = BTreeMap::new();
307+
item.insert("pk".into(), AttributeValue::S("p1".into()));
308+
item.insert(
309+
"mylist".into(),
310+
AttributeValue::L(vec![
311+
AttributeValue::S("zero".into()),
312+
AttributeValue::S("one".into()),
313+
AttributeValue::S("two".into()),
314+
AttributeValue::S("three".into()),
315+
]),
316+
);
317+
item.insert(
318+
"with_null".into(),
319+
AttributeValue::L(vec![
320+
AttributeValue::S("keep0".into()),
321+
AttributeValue::Null,
322+
AttributeValue::S("keep2".into()),
323+
]),
324+
);
325+
item
326+
}
327+
328+
fn assert_list(result: &Item, key: &str, expected: &[AttributeValue]) {
329+
match result.get(key) {
330+
Some(AttributeValue::L(list)) => assert_eq!(list.as_slice(), expected),
331+
other => panic!("Expected L for {key}, got {other:?}"),
332+
}
333+
}
334+
335+
#[test]
336+
fn project_two_list_indices_compacted() {
337+
let item = list_item();
338+
let result = project("mylist[0], mylist[2]", &item, HashMap::new()).unwrap();
339+
assert_list(
340+
&result,
341+
"mylist",
342+
&[
343+
AttributeValue::S("zero".into()),
344+
AttributeValue::S("two".into()),
345+
],
346+
);
347+
}
348+
349+
#[test]
350+
fn project_list_indices_ordered_by_index_not_expression() {
351+
let item = list_item();
352+
// Reversed expression order still comes out in ascending index order.
353+
let result = project("mylist[2], mylist[0]", &item, HashMap::new()).unwrap();
354+
assert_list(
355+
&result,
356+
"mylist",
357+
&[
358+
AttributeValue::S("zero".into()),
359+
AttributeValue::S("two".into()),
360+
],
361+
);
362+
}
363+
364+
#[test]
365+
fn project_list_index_gap_compacted() {
366+
let item = list_item();
367+
let result = project("mylist[1], mylist[3]", &item, HashMap::new()).unwrap();
368+
assert_list(
369+
&result,
370+
"mylist",
371+
&[
372+
AttributeValue::S("one".into()),
373+
AttributeValue::S("three".into()),
374+
],
375+
);
376+
}
377+
378+
#[test]
379+
fn project_null_element_by_index_preserved() {
380+
let item = list_item();
381+
let result = project("with_null[1]", &item, HashMap::new()).unwrap();
382+
assert_list(&result, "with_null", &[AttributeValue::Null]);
383+
}
384+
385+
#[test]
386+
fn project_unselected_null_dropped() {
387+
let item = list_item();
388+
let result = project("with_null[0], with_null[2]", &item, HashMap::new()).unwrap();
389+
assert_list(
390+
&result,
391+
"with_null",
392+
&[
393+
AttributeValue::S("keep0".into()),
394+
AttributeValue::S("keep2".into()),
395+
],
396+
);
397+
}
398+
399+
#[test]
400+
fn project_whole_list_preserves_null() {
401+
let item = list_item();
402+
let result = project("with_null", &item, HashMap::new()).unwrap();
403+
assert_list(
404+
&result,
405+
"with_null",
406+
&[
407+
AttributeValue::S("keep0".into()),
408+
AttributeValue::Null,
409+
AttributeValue::S("keep2".into()),
410+
],
411+
);
412+
}
413+
414+
#[test]
415+
fn project_list_of_maps_subfield_multi() {
416+
let mut item = BTreeMap::new();
417+
item.insert("pk".into(), AttributeValue::S("p".into()));
418+
let mk = |v: &str, x: &str| {
419+
let mut m = BTreeMap::new();
420+
m.insert("val".into(), AttributeValue::S(v.into()));
421+
m.insert("x".into(), AttributeValue::S(x.into()));
422+
AttributeValue::M(m)
423+
};
424+
item.insert(
425+
"lom".into(),
426+
AttributeValue::L(vec![mk("a0", "x0"), mk("a1", "x1"), mk("a2", "x2")]),
427+
);
428+
429+
let result = project("lom[0].val, lom[2].val", &item, HashMap::new()).unwrap();
430+
let only_val = |v: &str| {
431+
let mut m = BTreeMap::new();
432+
m.insert("val".into(), AttributeValue::S(v.into()));
433+
AttributeValue::M(m)
434+
};
435+
assert_list(&result, "lom", &[only_val("a0"), only_val("a2")]);
436+
}
437+
438+
#[test]
439+
fn project_same_index_merges_subfields() {
440+
let mut item = BTreeMap::new();
441+
item.insert("pk".into(), AttributeValue::S("p".into()));
442+
let mut m = BTreeMap::new();
443+
m.insert("a".into(), AttributeValue::S("av".into()));
444+
m.insert("b".into(), AttributeValue::S("bv".into()));
445+
m.insert("c".into(), AttributeValue::S("cv".into()));
446+
item.insert("l".into(), AttributeValue::L(vec![AttributeValue::M(m)]));
447+
448+
// Two paths selecting the same element merge their subfields.
449+
let result = project("l[0].a, l[0].c", &item, HashMap::new()).unwrap();
450+
let mut expected = BTreeMap::new();
451+
expected.insert("a".into(), AttributeValue::S("av".into()));
452+
expected.insert("c".into(), AttributeValue::S("cv".into()));
453+
assert_list(&result, "l", &[AttributeValue::M(expected)]);
454+
}
455+
301456
#[test]
302457
fn project_list_index_into_map_preserves_structure() {
303458
let mut item = BTreeMap::new();

0 commit comments

Comments
 (0)