Skip to content

Commit 516dc44

Browse files
Further workaround for #1655: Avoid hangup automatically when using writeIterations() (#1728)
* Add failing test from bug report * Always flush currently active Iteration
1 parent ff52a8d commit 516dc44

5 files changed

Lines changed: 83 additions & 1 deletion

File tree

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -791,6 +791,7 @@ if(openPMD_BUILD_TESTING)
791791
list(APPEND ${out_list}
792792
test/Files_ParallelIO/read_variablebased_randomaccess.cpp
793793
test/Files_ParallelIO/iterate_nonstreaming_series.cpp
794+
test/Files_ParallelIO/bug_1655_bp5_writer_hangup.cpp
794795
)
795796
elseif(${test_name} STREQUAL "Core")
796797
list(APPEND ${out_list}

src/Series.cpp

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2702,7 +2702,8 @@ auto Series::openIterationIfDirty(IterationIndex_t index, Iteration &iteration)
27022702
{
27032703
return IterationOpened::RemainsClosed;
27042704
}
2705-
bool const dirtyRecursive = iteration.dirtyRecursive();
2705+
bool dirtyRecursive = iteration.dirtyRecursive();
2706+
27062707
if (data.m_closed == internal::CloseStatus::Closed)
27072708
{
27082709
// file corresponding with the iteration has previously been
@@ -2731,6 +2732,28 @@ auto Series::openIterationIfDirty(IterationIndex_t index, Iteration &iteration)
27312732
}
27322733
}
27332734

2735+
/*
2736+
* When using writeIterations(), the currently active Iteration should
2737+
* always be flushed (unless, as checked above, it is already closed).
2738+
* This block checks if an Iteration is currently collectively opened.
2739+
*/
2740+
[&]() {
2741+
auto &series = get();
2742+
if (!series.m_sharedStatefulIterator)
2743+
{
2744+
return;
2745+
}
2746+
auto &shared_iterator = *series.m_sharedStatefulIterator;
2747+
if (!shared_iterator.m_data || !shared_iterator.m_data->has_value())
2748+
{
2749+
return;
2750+
}
2751+
auto const current_iteration =
2752+
(*shared_iterator.m_data)->currentIteration();
2753+
dirtyRecursive |=
2754+
current_iteration.has_value() && *current_iteration == index;
2755+
}();
2756+
27342757
switch (iterationEncoding())
27352758
{
27362759
using IE = IterationEncoding;

test/Files_ParallelIO/ParallelIOTests.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,4 +75,9 @@ namespace iterate_nonstreaming_series
7575
auto iterate_nonstreaming_series() -> void;
7676
}
7777

78+
namespace bug_1655_bp5_writer_hangup
79+
{
80+
auto bug_1655_bp5_writer_hangup() -> void;
81+
}
82+
7883
#endif
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#include "ParallelIOTests.hpp"
2+
3+
#include <mpi.h>
4+
#include <openPMD/openPMD.hpp>
5+
6+
namespace bug_1655_bp5_writer_hangup
7+
{
8+
auto worker(std::string const &ext) -> void
9+
{
10+
int mpi_size;
11+
int mpi_rank;
12+
13+
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
14+
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
15+
16+
auto const value = float(mpi_size * 100 + mpi_rank);
17+
std::vector<float> local_data(10 * 300, value);
18+
19+
std::string filename = "../samples/ptl_%T." + ext;
20+
21+
Series series = Series(filename, Access::CREATE, MPI_COMM_WORLD);
22+
23+
Datatype datatype = determineDatatype<float>();
24+
25+
auto myptl = series.writeIterations()[1].particles["ion"];
26+
Extent global_ptl = {10ul * mpi_size * 300};
27+
Dataset dataset_ptl = Dataset(datatype, global_ptl, "{}");
28+
myptl["charge"].resetDataset(dataset_ptl);
29+
30+
series.flush();
31+
32+
if (mpi_rank == 0) // only rank 0 adds data
33+
myptl["charge"].storeChunk(local_data, {0}, {3000});
34+
35+
series.flush(); // hangs here
36+
series.close();
37+
}
38+
39+
auto bug_1655_bp5_writer_hangup() -> void
40+
{
41+
for (auto const &ext : testedFileExtensions())
42+
{
43+
worker(ext);
44+
}
45+
}
46+
} // namespace bug_1655_bp5_writer_hangup

test/ParallelIOTest.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2183,3 +2183,10 @@ TEST_CASE("iterate_nonstreaming_series", "[serial][adios2]")
21832183
}
21842184

21852185
#endif // openPMD_HAVE_ADIOS2 && openPMD_HAVE_MPI
2186+
2187+
#if openPMD_HAVE_MPI
2188+
TEST_CASE("bug_1655_bp5_writer_hangup", "[parallel]")
2189+
{
2190+
bug_1655_bp5_writer_hangup::bug_1655_bp5_writer_hangup();
2191+
}
2192+
#endif

0 commit comments

Comments
 (0)