2121
2222#include " openPMD/auxiliary/Filesystem.hpp"
2323#include " openPMD/auxiliary/Memory.hpp"
24+ #include " openPMD/auxiliary/Option.hpp"
2425#include " openPMD/auxiliary/StringManip.hpp"
2526#include " openPMD/backend/Writable.hpp"
2627#include " openPMD/Datatype.hpp"
@@ -273,11 +274,46 @@ namespace openPMD
273274
274275 namespace
275276 {
277+ // pre-declare since this one is recursive
276278 ChunkTable
277279 chunksInJSON ( nlohmann::json const & );
278280 ChunkTable
279281 chunksInJSON ( nlohmann::json const & j )
280282 {
283+ /*
284+ * Idea:
285+ * Iterate (n-1)-dimensional hyperslabs line by line and query
286+ * their chunks recursively.
287+ * If two or more successive (n-1)-dimensional slabs return the
288+ * same chunktable, they can be merged as one chunk.
289+ *
290+ * Notice that this approach is simple, relatively easily
291+ * implemented, but not ideal, since chunks that overlap in some
292+ * dimensions may be ripped apart:
293+ *
294+ * 0123
295+ * 0 ____
296+ * 1 ____
297+ * 2 **__
298+ * 3 **__
299+ * 4 **__
300+ * 5 **__
301+ * 6 **__
302+ * 7 **_*
303+ * 8 ___*
304+ * 9 ___*
305+ *
306+ * Since both of the drawn chunks overlap on line 7, this approach
307+ * will return 4 chunks:
308+ * offset - extent
309+ * (2,0) - (4,2)
310+ * (7,0) - (1,2)
311+ * (7,3) - (1,1)
312+ * (8,3) - (2,1)
313+ *
314+ * Hence, in a second phase, the mergeChunks function below will
315+ * merge things back up.
316+ */
281317 if ( !j.is_array () )
282318 {
283319 return ChunkTable{ Chunk ( Offset{}, Extent{} ) };
@@ -296,8 +332,10 @@ namespace openPMD
296332 {
297333 break ;
298334 }
299- // get block at current position
300- size_t offset = it;
335+ // get chunking at current position
336+ // and additionally, number of successive rows with the same
337+ // recursive results
338+ size_t const offset = it;
301339 ChunkTable referenceTable = chunksInJSON ( j[ it ] );
302340 ++it;
303341 for ( ; it < end; ++it )
@@ -312,7 +350,9 @@ namespace openPMD
312350 break ;
313351 }
314352 }
315- size_t extent = it - offset; // sic! no -1
353+ size_t const extent = it - offset; // sic! no -1
354+ // now we know the number of successive rows with same rec.
355+ // results, let's extend these results to include dimension 0
316356 for ( auto const & chunk : referenceTable )
317357 {
318358 Offset o = { offset };
@@ -332,15 +372,27 @@ namespace openPMD
332372 return res;
333373 }
334374
335- std::pair< bool , Chunk >
336- mergeChunks ( Chunk const & _c1, Chunk const & _c2 )
337- {
338- Chunk const *c1 ( &_c1 ), *c2 ( &_c2 );
339- unsigned dimensionality = _c1.extent .size ();
375+ /*
376+ * Check whether two chunks can be merged to form a large one
377+ * and optionally return that larger chunk
378+ */
379+ auxiliary::Option< Chunk >
380+ mergeChunks ( Chunk const & chunk1, Chunk const & chunk2 )
381+ {
382+ /*
383+ * Idea:
384+ * If two chunks can be merged into one, they agree on offsets and
385+ * extents in all but exactly one dimension dim.
386+ * At dimension dim, the offset of chunk 2 is equal to the offset
387+ * of chunk 1 plus its extent -- or vice versa.
388+ */
389+ unsigned dimensionality = chunk1.extent .size ();
340390 for ( unsigned dim = 0 ; dim < dimensionality; ++dim )
341391 {
392+ Chunk const *c1 ( &chunk1 ), *c2 ( &chunk2 );
342393 // check if one chunk is the extension of the other at
343394 // dimension dim
395+ // first, let's put things in order
344396 if ( c1->offset [ dim ] > c2->offset [ dim ] )
345397 {
346398 std::swap ( c1, c2 );
@@ -376,11 +428,15 @@ namespace openPMD
376428 Offset offset ( c1->offset );
377429 Extent extent ( c1->extent );
378430 extent[ dim ] += c2->extent [ dim ];
379- return std::make_pair ( true , Chunk ( offset, extent ) );
431+ return auxiliary::makeOption ( Chunk ( offset, extent ) );
380432 }
381- return std::make_pair ( false , Chunk () );
433+ return auxiliary::Option< Chunk >( );
382434 }
383435
436+ /*
437+ * Merge chunks in the chunktable until no chunks are left that can be
438+ * merged.
439+ */
384440 void
385441 mergeChunks ( ChunkTable & table )
386442 {
@@ -389,20 +445,26 @@ namespace openPMD
389445 {
390446 stillChanging = false ;
391447 auto innerLoops = [ &table ]() {
448+ /*
449+ * Iterate over pairs of chunks in the table.
450+ * If finding a pair that can be merged, merge it,
451+ * delete the original two chunks from the table,
452+ * put the new one in and return.
453+ */
392454 for ( auto i = table.begin (); i < table.end (); ++i )
393455 {
394456 for ( auto j = i + 1 ; j < table.end (); ++j )
395457 {
396- std::pair< bool , Chunk > merged =
458+ auxiliary::Option< Chunk > merged =
397459 mergeChunks ( *i, *j );
398- if ( merged. first )
460+ if ( merged )
399461 {
400462 // erase order is important due to iterator
401463 // invalidation
402464 table.erase ( j );
403465 table.erase ( i );
404466 table.emplace_back (
405- std::move ( merged.second ) );
467+ std::move ( merged.get () ) );
406468 return true ;
407469 }
408470 }
0 commit comments