Skip to content

Commit 1fd34ba

Browse files
committed
Add compression example
1 parent 1e8143c commit 1fd34ba

3 files changed

Lines changed: 162 additions & 2 deletions

File tree

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,7 @@ set(openPMD_EXAMPLE_NAMES
710710
12_span_write
711711
13_write_dynamic_configuration
712712
14_toml_template
713+
15_compression
713714
)
714715
set(openPMD_PYTHON_EXAMPLE_NAMES
715716
2_read_serial

examples/15_compression.cpp

Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
#include <numeric>
2+
#include <openPMD/openPMD.hpp>
3+
4+
#if __has_include(<blosc2_filter.h>)
5+
#include <blosc2_filter.h>
6+
#define OPENPMD_USE_BLOSC2_FILTER 1
7+
#else
8+
#define OPENPMD_USE_BLOSC2_FILTER 0
9+
#endif
10+
11+
void init_blosc_for_hdf5()
12+
{
13+
#if OPENPMD_USE_BLOSC2_FILTER
14+
/*
15+
* This registers the Blosc2 plugin from
16+
* https://github.com/Blosc/HDF5-Blosc2 as a demonstration on how to
17+
* activate and configure dynamic HDF5 filter plugins through openPMD.
18+
*/
19+
20+
char *version, *date;
21+
int r = register_blosc2(&version, &date);
22+
if (r < 1)
23+
{
24+
throw std::runtime_error("Unable to register Blosc2 plugin with HDF5.");
25+
}
26+
else
27+
{
28+
std::cout << "Blosc2 plugin registered in version '" << version
29+
<< "' and date '" << date << "'." << std::endl;
30+
}
31+
#endif
32+
}
33+
34+
void write(std::string const &filename, std::string const &config)
35+
{
36+
using namespace openPMD;
37+
Series series(
38+
"../samples/compression/" + filename, Access::CREATE_LINEAR, config);
39+
40+
for (size_t i = 0; i < 10; ++i)
41+
{
42+
auto &current_iteration = series.snapshots()[i];
43+
44+
// First, write an E mesh.
45+
auto &E = current_iteration.meshes["E"];
46+
E.setAxisLabels({"x", "y"});
47+
for (auto const &dim : {"x", "y", "z"})
48+
{
49+
auto &component = E[dim];
50+
component.resetDataset({Datatype::FLOAT, {10, 10}});
51+
auto buffer_view =
52+
component.storeChunk<float>({0, 0}, {10, 10}).currentBuffer();
53+
// Now fill the prepared buffer with some nonsense data.
54+
std::iota(buffer_view.begin(), buffer_view.end(), i * 100);
55+
}
56+
57+
// Now, write some e particles.
58+
auto &e = current_iteration.particles["e"];
59+
for (auto const &dim : {"x", "y", "z"})
60+
{
61+
// Do not bother with a positionOffset
62+
auto &position_offset = e["positionOffset"][dim];
63+
position_offset.makeConstant(0);
64+
65+
auto &position = e["position"][dim];
66+
position.resetDataset({Datatype::FLOAT, {10}});
67+
auto buffer_view =
68+
position.storeChunk<float>({0}, {10}).currentBuffer();
69+
// Now fill the prepared buffer with some nonsense data.
70+
std::iota(buffer_view.begin(), buffer_view.end(), i * 10);
71+
}
72+
}
73+
}
74+
75+
int main()
76+
{
77+
init_blosc_for_hdf5();
78+
79+
// Backend specific configuration can be given in either JSON or TOML.
80+
// We will stick with TOML in this example, since it allows inline comments
81+
// and remains more legible for larger configurations.
82+
// openPMD includes / will in future include separate tooling for converting
83+
// between JSON and TOML (openpmd-convert-toml-json), which you may use to
84+
// convert these configurations to JSON.
85+
86+
std::string const simple_adios2_config = R"(
87+
88+
# Backend can either be inferred from the filename ending, or specified
89+
# explicitly. In the latter case, the filename ending can be given as
90+
# a wildcard %E, openPMD will then pick a default ending.
91+
backend = "adios2"
92+
93+
# ADIOS2 supports adding multiple operators to a variable, hence we
94+
# specify a list of operators here (using TOML's double bracket syntax).
95+
# How much sense this makes depends on the specific operators in use.
96+
97+
[[adios2.dataset.operators]]
98+
type = "zlib"
99+
parameters.clevel = 9 # The available parameters depend
100+
# on the operator.
101+
# Here, we specify zlib's compression level.
102+
)";
103+
write("adios2_with_zlib.%E", simple_adios2_config);
104+
105+
std::string const extended_adios2_config = R"(
106+
backend = "adios2"
107+
108+
# The compression can also be specified per-dataset.
109+
# For more details, also check:
110+
# https://openpmd-api.readthedocs.io/en/latest/details/backendconfig.html#dataset-specific-configuration
111+
112+
# This example will demonstrate the use of pattern matching.
113+
# adios2.dataset is now a list of dataset configurations. The specific
114+
# configuration to be used for a dataset will be determined by matching
115+
# the dataset name against the patterns specified by the 'select' keys.
116+
[[adios2.dataset]]
117+
# This uses egrep-type regular expressions.
118+
select = "meshes/.*"
119+
# Now, specify the operators list again. Let's use Blosc for this.
120+
[[adios2.dataset.cfg.operators]]
121+
type = "blosc"
122+
parameters.doshuffle = "BLOSC_BITSHUFFLE"
123+
parameters.clevel = 1
124+
125+
# Now, configure the particles.
126+
[[adios2.dataset]]
127+
# The match can either be against the path within the containing
128+
# Iteration (e.g. 'meshes/E/x', as above) or (as in this example),
129+
# against the full path (e.g. '/data/0/particles/e/position/x').
130+
# the containing Iteration (e.g. 'meshes/E/x').
131+
# In this example, completely deactivate compression specifically for
132+
# particles in Iterations 2, 3 and 4. All other particle datasets will
133+
# fall back to the default configuration specified below.
134+
select = "/data/(2|3|4)/particles/.*"
135+
cfg.operators = []
136+
137+
# Now, the default configuration.
138+
# In general, the dataset configurations are matched top-down, going for
139+
# the first matching configuration. So, a default configuration could
140+
# theoretically be specified by emplacing a catch-all pattern
141+
# (regex: ".*") as the last option.
142+
# However, we also define an explicit shorthand for specifying default
143+
# configurations: Just omit the 'select' key. This special syntax is
144+
# understood as the default configuration no matter where in the list it
145+
# is emplaced, and it allows the backends to initialize the default
146+
# configuration globally, instead of applying it selectively to each
147+
# dataset that matches a catch-all pattern.
148+
[[adios2.dataset]]
149+
[[adios2.dataset.cfg.operators]]
150+
type = "zlib"
151+
parameters.clevel = 2
152+
)";
153+
write(
154+
"adios2_with_dataset_specific_configurations.%E",
155+
extended_adios2_config);
156+
}

src/IO/ADIOS/ADIOS2IOHandler.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2449,8 +2449,11 @@ namespace detail
24492449
{
24502450
var.SetSelection({start, count});
24512451
}
2452-
// don't add compression operators multiple times
2453-
return;
2452+
// Compression operators can be specified per-block in ADIOS2.
2453+
// While we don't support that, we do support changing the
2454+
// compression across multiple steps.
2455+
// Hence remove and reapply operators, they might have changed.
2456+
var.RemoveOperations();
24542457
}
24552458

24562459
if (!var)

0 commit comments

Comments
 (0)