Skip to content

Commit 56fe42e

Browse files
committed
Documentation and use of auxiliary::Option
1 parent cf81295 commit 56fe42e

File tree

2 files changed

+93
-16
lines changed

2 files changed

+93
-16
lines changed

include/openPMD/backend/BaseRecordComponent.hpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,25 @@ class BaseRecordComponent : public Attributable
5959
*/
6060
bool constant() const;
6161

62-
ChunkTable availableChunks();
62+
/**
63+
* Get data chunks that are available to be loaded from the backend.
64+
* Note that this is backend-dependent information and the returned
65+
* information may hence differ between different backends:
66+
* * The ADIOS backends (versions 1 and 2) will return those chunks that
67+
* the writer has originally written.
68+
* * The JSON backend will reconstruct the chunks by iterating the dataset.
69+
* * The HDF5 backend will return the whole dataset as one large chunk.
70+
*
71+
* The results depend solely on the backend and are independent of any
72+
* openPMD-related information. Especially is this call explicitly unrelated
73+
* to openPMD's concept of particle patches, which users may additionally
74+
* wish to use to store user-defined, backend-independent chunking
75+
* information on particle datasets.
76+
*/
77+
ChunkTable
78+
availableChunks();
6379

64-
OPENPMD_protected:
65-
BaseRecordComponent();
80+
OPENPMD_protected : BaseRecordComponent();
6681

6782
std::shared_ptr< Dataset > m_dataset;
6883
std::shared_ptr< bool > m_isConstant;

src/IO/JSON/JSONIOHandlerImpl.cpp

Lines changed: 75 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
#include "openPMD/auxiliary/Filesystem.hpp"
2323
#include "openPMD/auxiliary/Memory.hpp"
24+
#include "openPMD/auxiliary/Option.hpp"
2425
#include "openPMD/auxiliary/StringManip.hpp"
2526
#include "openPMD/backend/Writable.hpp"
2627
#include "openPMD/Datatype.hpp"
@@ -273,11 +274,46 @@ namespace openPMD
273274

274275
namespace
275276
{
277+
// pre-declare since this one is recursive
276278
ChunkTable
277279
chunksInJSON( nlohmann::json const & );
278280
ChunkTable
279281
chunksInJSON( nlohmann::json const & j )
280282
{
283+
/*
284+
* Idea:
285+
* Iterate (n-1)-dimensional hyperslabs line by line and query
286+
* their chunks recursively.
287+
* If two or more successive (n-1)-dimensional slabs return the
288+
* same chunktable, they can be merged as one chunk.
289+
*
290+
* Notice that this approach is simple, relatively easily
291+
* implemented, but not ideal, since chunks that overlap in some
292+
* dimensions may be ripped apart:
293+
*
294+
* 0123
295+
* 0 ____
296+
* 1 ____
297+
* 2 **__
298+
* 3 **__
299+
* 4 **__
300+
* 5 **__
301+
* 6 **__
302+
* 7 **_*
303+
* 8 ___*
304+
* 9 ___*
305+
*
306+
* Since both of the drawn chunks overlap on line 7, this approach
307+
* will return 4 chunks:
308+
* offset - extent
309+
* (2,0) - (4,2)
310+
* (7,0) - (1,2)
311+
* (7,3) - (1,1)
312+
* (8,3) - (2,1)
313+
*
314+
* Hence, in a second phase, the mergeChunks function below will
315+
* merge things back up.
316+
*/
281317
if( !j.is_array() )
282318
{
283319
return ChunkTable{ Chunk( Offset{}, Extent{} ) };
@@ -296,8 +332,10 @@ namespace openPMD
296332
{
297333
break;
298334
}
299-
// get block at current position
300-
size_t offset = it;
335+
// get chunking at current position
336+
// and additionally, number of successive rows with the same
337+
// recursive results
338+
size_t const offset = it;
301339
ChunkTable referenceTable = chunksInJSON( j[ it ] );
302340
++it;
303341
for( ; it < end; ++it )
@@ -312,7 +350,9 @@ namespace openPMD
312350
break;
313351
}
314352
}
315-
size_t extent = it - offset; // sic! no -1
353+
size_t const extent = it - offset; // sic! no -1
354+
// now we know the number of successive rows with same rec.
355+
// results, let's extend these results to include dimension 0
316356
for( auto const & chunk : referenceTable )
317357
{
318358
Offset o = { offset };
@@ -332,15 +372,27 @@ namespace openPMD
332372
return res;
333373
}
334374

335-
std::pair< bool, Chunk >
336-
mergeChunks( Chunk const & _c1, Chunk const & _c2 )
337-
{
338-
Chunk const *c1( &_c1 ), *c2( &_c2 );
339-
unsigned dimensionality = _c1.extent.size();
375+
/*
376+
* Check whether two chunks can be merged to form a large one
377+
* and optionally return that larger chunk
378+
*/
379+
auxiliary::Option< Chunk >
380+
mergeChunks( Chunk const & chunk1, Chunk const & chunk2 )
381+
{
382+
/*
383+
* Idea:
384+
* If two chunks can be merged into one, they agree on offsets and
385+
* extents in all but exactly one dimension dim.
386+
* At dimension dim, the offset of chunk 2 is equal to the offset
387+
* of chunk 1 plus its extent -- or vice versa.
388+
*/
389+
unsigned dimensionality = chunk1.extent.size();
340390
for( unsigned dim = 0; dim < dimensionality; ++dim )
341391
{
392+
Chunk const *c1( &chunk1 ), *c2( &chunk2 );
342393
// check if one chunk is the extension of the other at
343394
// dimension dim
395+
// first, let's put things in order
344396
if( c1->offset[ dim ] > c2->offset[ dim ] )
345397
{
346398
std::swap( c1, c2 );
@@ -376,11 +428,15 @@ namespace openPMD
376428
Offset offset( c1->offset );
377429
Extent extent( c1->extent );
378430
extent[ dim ] += c2->extent[ dim ];
379-
return std::make_pair( true, Chunk( offset, extent ) );
431+
return auxiliary::makeOption( Chunk( offset, extent ) );
380432
}
381-
return std::make_pair( false, Chunk() );
433+
return auxiliary::Option< Chunk >();
382434
}
383435

436+
/*
437+
* Merge chunks in the chunktable until no chunks are left that can be
438+
* merged.
439+
*/
384440
void
385441
mergeChunks( ChunkTable & table )
386442
{
@@ -389,20 +445,26 @@ namespace openPMD
389445
{
390446
stillChanging = false;
391447
auto innerLoops = [ &table ]() {
448+
/*
449+
* Iterate over pairs of chunks in the table.
450+
* If finding a pair that can be merged, merge it,
451+
* delete the original two chunks from the table,
452+
* put the new one in and return.
453+
*/
392454
for( auto i = table.begin(); i < table.end(); ++i )
393455
{
394456
for( auto j = i + 1; j < table.end(); ++j )
395457
{
396-
std::pair< bool, Chunk > merged =
458+
auxiliary::Option< Chunk > merged =
397459
mergeChunks( *i, *j );
398-
if( merged.first )
460+
if( merged )
399461
{
400462
// erase order is important due to iterator
401463
// invalidation
402464
table.erase( j );
403465
table.erase( i );
404466
table.emplace_back(
405-
std::move( merged.second ) );
467+
std::move( merged.get() ) );
406468
return true;
407469
}
408470
}

0 commit comments

Comments
 (0)