@@ -1910,76 +1910,50 @@ async fn test_segment_ordering_zonemaps_after_data() -> VortexResult<()> {
19101910
19111911 check_zoned_ordering ( root. as_ref ( ) , segment_specs) ;
19121912
1913- // Additionally: all zone map segments across all columns should appear after
1914- // all data segments across all columns. Array tree segments (if present) appear
1915- // between data and zones.
1913+ // Additionally: every zone-map segment across all columns must appear after every data
1914+ // segment across all columns. This holds even with cross-column interleaving inside the
1915+ // metadata phase because the root writer splits the sequence universe — chunk data uses
1916+ // IDs derived from `ptr` and all metadata (zones, etc.) derives from `eof`, with
1917+ // `ptr < eof` so all data segments globally precede all metadata segments.
1918+ //
1919+ // Per-column `data < array_trees < zones` ordering for the new `ArrayTreeLayout` is
1920+ // covered separately by the array-tree-specific ordering tests.
19161921 let mut all_data_offsets = Vec :: new ( ) ;
1917- let mut all_array_tree_offsets = Vec :: new ( ) ;
19181922 let mut all_zones_offsets = Vec :: new ( ) ;
19191923
19201924 fn collect_all_zoned (
19211925 layout : & dyn Layout ,
19221926 segment_specs : & [ SegmentSpec ] ,
19231927 all_data : & mut Vec < u64 > ,
1924- all_array_trees : & mut Vec < u64 > ,
19251928 all_zones : & mut Vec < u64 > ,
19261929 ) {
19271930 if layout. encoding_id ( ) . as_ref ( ) == "vortex.stats" {
1928- // child 0 = data (may contain array_tree layouts), child 1 = zones
1929- let data_child = layout. child ( 0 ) . unwrap ( ) ;
1930- // If the data child is an array_tree layout, split its segments.
1931- if data_child. encoding_id ( ) . as_ref ( ) == "vortex.array_tree" {
1932- // child 0 = actual data, child 1 = array_trees auxiliary
1933- all_data. extend ( collect_segment_offsets (
1934- data_child. child ( 0 ) . unwrap ( ) . as_ref ( ) ,
1935- segment_specs,
1936- ) ) ;
1937- all_array_trees. extend ( collect_segment_offsets (
1938- data_child. child ( 1 ) . unwrap ( ) . as_ref ( ) ,
1939- segment_specs,
1940- ) ) ;
1941- } else {
1942- all_data. extend ( collect_segment_offsets ( data_child. as_ref ( ) , segment_specs) ) ;
1943- }
1931+ all_data. extend ( collect_segment_offsets (
1932+ layout. child ( 0 ) . unwrap ( ) . as_ref ( ) ,
1933+ segment_specs,
1934+ ) ) ;
19441935 all_zones. extend ( collect_segment_offsets (
19451936 layout. child ( 1 ) . unwrap ( ) . as_ref ( ) ,
19461937 segment_specs,
19471938 ) ) ;
19481939 return ;
19491940 }
19501941 for child in layout. children ( ) . unwrap ( ) {
1951- collect_all_zoned (
1952- child. as_ref ( ) ,
1953- segment_specs,
1954- all_data,
1955- all_array_trees,
1956- all_zones,
1957- ) ;
1942+ collect_all_zoned ( child. as_ref ( ) , segment_specs, all_data, all_zones) ;
19581943 }
19591944 }
19601945
19611946 collect_all_zoned (
19621947 root. as_ref ( ) ,
19631948 segment_specs,
19641949 & mut all_data_offsets,
1965- & mut all_array_tree_offsets,
19661950 & mut all_zones_offsets,
19671951 ) ;
19681952
1969- // The root writer splits the sequence universe into two: data chunks use IDs from `ptr`
1970- // and all metadata (array trees, zones) derive from `eof`. Since ptr < eof, all data
1971- // segments are globally before all metadata segments.
1972- //
1973- // Within the eof universe, per-column ordering guarantees array_trees < zones within
1974- // each column, but cross-column interleaving means we cannot assert
1975- // all_array_trees < all_zones globally.
1976- let mut all_metadata_offsets = all_array_tree_offsets;
1977- all_metadata_offsets. extend ( & all_zones_offsets) ;
1978-
19791953 assert_offsets_ordered (
19801954 & all_data_offsets,
1981- & all_metadata_offsets ,
1982- "global: all data segments should come before all metadata segments (array trees + zone maps) " ,
1955+ & all_zones_offsets ,
1956+ "global: all data segments should come before all zone-map segments " ,
19831957 ) ;
19841958
19851959 Ok ( ( ) )
@@ -2146,3 +2120,77 @@ async fn test_segment_ordering_array_trees_before_zones() -> VortexResult<()> {
21462120
21472121 Ok ( ( ) )
21482122}
2123+
2124+ #[ tokio:: test]
2125+ #[ cfg_attr( miri, ignore) ]
2126+ async fn test_roundtrip_array_tree_layout ( ) -> VortexResult < ( ) > {
2127+ // End-to-end coverage of the new ArrayTreeLayout: write a multi-column struct with
2128+ // `with_array_tree(true)`, then read it back through the override-registration path and
2129+ // assert the data matches. Exercises:
2130+ // - ArrayTreeCollectorStrategy collecting compact trees via the side-channel sink
2131+ // - ArrayTreeLayout::new_reader deriving a ctx with the source-injecting override
2132+ // - ArrayTreeFlatReader looking up its compact tree from ArrayTreesSource by segment_id
2133+ // - SerializedArray::from_flatbuffer_and_segment decoding the data segment
2134+ let mut ctx = SESSION . create_execution_ctx ( ) ;
2135+
2136+ let n = 10_000 ;
2137+ let strings_in: Vec < & str > = ( 0 ..n) . map ( |i| [ "alpha" , "beta" , "gamma" ] [ i % 3 ] ) . collect ( ) ;
2138+ let strings = VarBinArray :: from ( strings_in. clone ( ) ) . into_array ( ) ;
2139+ let numbers_in: Vec < i32 > = ( 0 ..n as i32 ) . collect ( ) ;
2140+ let numbers = PrimitiveArray :: from_iter ( numbers_in. iter ( ) . copied ( ) ) . into_array ( ) ;
2141+
2142+ let st = StructArray :: from_fields ( & [ ( "strings" , strings) , ( "numbers" , numbers) ] ) ?. into_array ( ) ;
2143+ let dtype = st. dtype ( ) . clone ( ) ;
2144+
2145+ let mut buf = ByteBufferMut :: empty ( ) ;
2146+ let strategy = crate :: WriteStrategyBuilder :: default ( )
2147+ . with_array_tree ( true )
2148+ . build ( ) ;
2149+ SESSION
2150+ . write_options ( )
2151+ . with_strategy ( strategy)
2152+ . write ( & mut buf, st. to_array_stream ( ) )
2153+ . await ?;
2154+
2155+ // Sanity-check that we actually wrote ArrayTreeLayout nodes — otherwise the test would
2156+ // silently pass on the default code path.
2157+ let file = SESSION . open_options ( ) . open_buffer ( buf) ?;
2158+ fn has_array_tree ( layout : & dyn Layout ) -> bool {
2159+ if layout. encoding_id ( ) . as_ref ( ) == "vortex.array_tree" {
2160+ return true ;
2161+ }
2162+ layout
2163+ . children ( )
2164+ . map ( |cs| cs. iter ( ) . any ( |c| has_array_tree ( c. as_ref ( ) ) ) )
2165+ . unwrap_or ( false )
2166+ }
2167+ assert ! (
2168+ has_array_tree( file. footer( ) . layout( ) . as_ref( ) ) ,
2169+ "test expected ArrayTreeLayout in the written file"
2170+ ) ;
2171+
2172+ // Read back and assert structure + data round-trip cleanly.
2173+ let result = file. scan ( ) ?. into_array_stream ( ) ?. read_all ( ) . await ?;
2174+ assert_eq ! ( result. len( ) , n) ;
2175+ assert_eq ! ( result. dtype( ) , & dtype) ;
2176+
2177+ let struct_array = result. execute :: < StructArray > ( & mut ctx) ?;
2178+
2179+ let read_numbers = struct_array. unmasked_field_by_name ( "numbers" ) . cloned ( ) ?;
2180+ let expected_numbers = PrimitiveArray :: from_iter ( numbers_in. iter ( ) . copied ( ) ) . into_array ( ) ;
2181+ assert_arrays_eq ! ( read_numbers, expected_numbers) ;
2182+
2183+ let read_strings = struct_array
2184+ . unmasked_field_by_name ( "strings" )
2185+ . cloned ( ) ?
2186+ . execute :: < VarBinViewArray > ( & mut ctx) ?
2187+ . with_iterator ( |iter| {
2188+ iter. map ( |s| s. map ( |st| unsafe { String :: from_utf8_unchecked ( st. to_vec ( ) ) } ) )
2189+ . collect :: < Vec < _ > > ( )
2190+ } ) ;
2191+ let expected_strings: Vec < Option < String > > =
2192+ strings_in. iter ( ) . map ( |s| Some ( ( * s) . to_string ( ) ) ) . collect ( ) ;
2193+ assert_eq ! ( read_strings, expected_strings) ;
2194+
2195+ Ok ( ( ) )
2196+ }
0 commit comments