Skip to content

Commit fcb00db

Browse files
committed
Add compression example
1 parent 1e8143c commit fcb00db

File tree

4 files changed

+294
-37
lines changed

4 files changed

+294
-37
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,7 @@ set(openPMD_EXAMPLE_NAMES
710710
12_span_write
711711
13_write_dynamic_configuration
712712
14_toml_template
713+
15_compression
713714
)
714715
set(openPMD_PYTHON_EXAMPLE_NAMES
715716
2_read_serial

examples/15_compression.cpp

Lines changed: 276 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,276 @@
1+
/* Copyright 2025 Franz Poeschel
2+
*
3+
* This file is part of openPMD-api.
4+
*
5+
* openPMD-api is free software: you can redistribute it and/or modify
6+
* it under the terms of of either the GNU General Public License or
7+
* the GNU Lesser General Public License as published by
8+
* the Free Software Foundation, either version 3 of the License, or
9+
* (at your option) any later version.
10+
*
11+
* openPMD-api is distributed in the hope that it will be useful,
12+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
* GNU General Public License and the GNU Lesser General Public License
15+
* for more details.
16+
*
17+
* You should have received a copy of the GNU General Public License
18+
* and the GNU Lesser General Public License along with openPMD-api.
19+
* If not, see <http://www.gnu.org/licenses/>.
20+
*/
21+
22+
#include <openPMD/openPMD.hpp>
23+
24+
#if openPMD_HAVE_HDF5 && __has_include(<blosc2_filter.h>)
25+
#include <blosc2_filter.h>
26+
#define OPENPMD_USE_BLOSC2_FILTER 1
27+
#else
28+
#define OPENPMD_USE_BLOSC2_FILTER 0
29+
#endif
30+
31+
#include <iostream>
32+
#include <numeric>
33+
#include <sstream>
34+
35+
void init_blosc_for_hdf5()
36+
{
37+
#if OPENPMD_USE_BLOSC2_FILTER
38+
/*
39+
* This registers the Blosc2 plugin from
40+
* https://github.com/Blosc/HDF5-Blosc2 as a demonstration on how to
41+
* activate and configure dynamic HDF5 filter plugins through openPMD.
42+
*/
43+
44+
char *version, *date;
45+
int r = register_blosc2(&version, &date);
46+
if (r < 1)
47+
{
48+
throw std::runtime_error("Unable to register Blosc2 plugin with HDF5.");
49+
}
50+
else
51+
{
52+
std::cout << "Blosc2 plugin registered in version '" << version
53+
<< "' and date '" << date << "'." << std::endl;
54+
}
55+
#endif
56+
}
57+
58+
void write(std::string const &filename, std::string const &config)
59+
{
60+
using namespace openPMD;
61+
std::cout << "Config for '" << filename << "' as JSON:\n"
62+
<< json::merge(config, "{}") << "\n\n";
63+
Series series(
64+
"../samples/compression/" + filename, Access::CREATE_LINEAR, config);
65+
66+
for (size_t i = 0; i < 10; ++i)
67+
{
68+
auto &current_iteration = series.snapshots()[i];
69+
70+
// First, write an E mesh.
71+
auto &E = current_iteration.meshes["E"];
72+
E.setAxisLabels({"x", "y"});
73+
for (auto const &dim : {"x", "y"})
74+
{
75+
auto &component = E[dim];
76+
component.resetDataset({Datatype::FLOAT, {10, 10}});
77+
auto buffer_view =
78+
component.storeChunk<float>({0, 0}, {10, 10}).currentBuffer();
79+
// Now fill the prepared buffer with some nonsense data.
80+
std::iota(buffer_view.begin(), buffer_view.end(), i * 100);
81+
}
82+
83+
// Now, write some e particles.
84+
auto &e = current_iteration.particles["e"];
85+
for (auto const &dim : {"x", "y"})
86+
{
87+
// Do not bother with a positionOffset
88+
auto &position_offset = e["positionOffset"][dim];
89+
position_offset.makeConstant(0);
90+
91+
auto &position = e["position"][dim];
92+
position.resetDataset({Datatype::FLOAT, {100}});
93+
auto buffer_view =
94+
position.storeChunk<float>({0}, {100}).currentBuffer();
95+
// Now fill the prepared buffer with some nonsense data.
96+
std::iota(buffer_view.begin(), buffer_view.end(), i * 100);
97+
}
98+
}
99+
}
100+
101+
int main()
102+
{
103+
init_blosc_for_hdf5();
104+
105+
// Backend specific configuration can be given in either JSON or TOML.
106+
// We will stick with TOML in this example, since it allows inline comments
107+
// and remains more legible for larger configurations.
108+
// If you are interested in the configurations as JSON, run the example and
109+
// their JSON equivalents will be printed to stdout.
110+
111+
#if openPMD_HAVE_ADIOS2
112+
// We start with two examples for ADIOS2.
113+
std::string const simple_adios2_config = R"(
114+
115+
# Backend can either be inferred from the filename ending, or specified
116+
# explicitly. In the latter case, the filename ending can be given as
117+
# a wildcard %E, openPMD will then pick a default ending.
118+
backend = "adios2"
119+
120+
# ADIOS2 supports adding multiple operators to a variable, hence we
121+
# specify a list of operators here (using TOML's double bracket syntax).
122+
# How much sense this makes depends on the specific operators in use.
123+
124+
[[adios2.dataset.operators]]
125+
type = "bzip2"
126+
parameters.clevel = 9 # The available parameters depend
127+
# on the operator.
128+
# Here, we specify zlib's compression level.
129+
)";
130+
write("adios2_with_zlib.%E", simple_adios2_config);
131+
132+
// The compression can also be specified per-dataset.
133+
// For more details, also check:
134+
// https://openpmd-api.readthedocs.io/en/latest/details/backendconfig.html#dataset-specific-configuration
135+
136+
// This example will demonstrate the use of pattern matching.
137+
// adios2.dataset is now a list of dataset configurations. The specific
138+
// configuration to be used for a dataset will be determined by matching
139+
// the dataset name against the patterns specified by the 'select' keys.
140+
std::string const extended_adios2_config = R"(
141+
backend = "adios2"
142+
143+
[[adios2.dataset]]
144+
# This uses egrep-type regular expressions.
145+
select = "meshes/.*"
146+
# Now, specify the operators list again. Let's use Blosc for this.
147+
[[adios2.dataset.cfg.operators]]
148+
type = "blosc"
149+
parameters.doshuffle = "BLOSC_BITSHUFFLE"
150+
parameters.clevel = 1
151+
152+
# Now, configure the particles.
153+
[[adios2.dataset]]
154+
# The match can either be against the path within the containing
155+
# Iteration (e.g. 'meshes/E/x', as above) or (as in this example),
156+
# against the full path (e.g. '/data/0/particles/e/position/x').
157+
# In this example, completely deactivate compression specifically for
158+
# 'particles/e/position/x'. All other particle datasets will
159+
# fall back to the default configuration specified below.
160+
# Be careful when specifying compression per-Iteration. While this
161+
# syntax fundamentally allows doing that, compressions once specified
162+
# on an ADIOS2 variable will not be removed again.
163+
# Since variable-encoding reuses ADIOS2 variables from previous
164+
# Iterations, the compression configuration of the first Iteration will
165+
# leak into all subsequent Iterations.
166+
select = "/data/[0-9]*/particles/e/position/x"
167+
cfg.operators = []
168+
169+
# Now, the default configuration.
170+
# In general, the dataset configurations are matched top-down, going for
171+
# the first matching configuration. So, a default configuration could
172+
# theoretically be specified by emplacing a catch-all pattern
173+
# (regex: ".*") as the last option.
174+
# However, we also define an explicit shorthand for specifying default
175+
# configurations: Just omit the 'select' key. This special syntax is
176+
# understood as the default configuration no matter where in the list it
177+
# is emplaced, and it allows the backends to initialize the default
178+
# configuration globally, instead of applying it selectively to each
179+
# dataset that matches a catch-all pattern.
180+
[[adios2.dataset]]
181+
[[adios2.dataset.cfg.operators]]
182+
type = "bzip2"
183+
parameters.clevel = 2
184+
)";
185+
write(
186+
"adios2_with_dataset_specific_configurations.%E",
187+
extended_adios2_config);
188+
#endif // openPMD_HAVE_ADIOS2
189+
190+
#if openPMD_HAVE_HDF5
191+
// Now, let's continue with HDF5.
192+
// HDF5 supports compression via so-called filters. These can be permanent
193+
// (applied to an entire dataset) and transient (applied to individual I/O
194+
// operations). The openPMD-api currently supports permanent filters. Refer
195+
// also to https://web.ics.purdue.edu/~aai/HDF5/html/Filters.html.
196+
197+
// Filters are additionally distinguished by how tightly they integrate with
198+
// HDF5. The most tightly-integrated filter is Zlib, which has its own API
199+
// calls and hence also a special JSON/TOML configuration in openPMD:
200+
201+
std::string const hdf5_zlib_config = R"(
202+
backend = "hdf5"
203+
204+
[hdf5.dataset.permanent_filters]
205+
type = "zlib" # mandatory parameter
206+
aggression = 5 # optional, defaults to 1
207+
)";
208+
write("hdf5_zlib.%E", hdf5_zlib_config);
209+
210+
// All other filters have a common API and are identified by global IDs
211+
// registered with the HDF Group. More details can be found in the
212+
// H5Zpublic.h header. That header predefines a small number of filter IDs.
213+
// These are directly supported by the openPMD-api: deflate, shuffle,
214+
// fletcher32, szip, nbit, scaleoffset.
215+
216+
std::string const hdf5_predefined_filter_ids = R"(
217+
backend = "hdf5"
218+
219+
[hdf5.dataset.permanent_filters]
220+
id = "fletcher32" # mandatory parameter
221+
# A filter can be applied as mandatory (execution should abort if the
222+
# filter cannot be applied) or as optional (execution should ignore when
223+
# the filter cannot be applied).
224+
flags = "mandatory" # optional parameter
225+
type = "by_id" # optional parameter for filters identified by ID,
226+
# mandatory only for zlib (see above)
227+
)";
228+
write("hdf5_predefined_filter_id.%E", hdf5_predefined_filter_ids);
229+
230+
// Just like ADIOS2 with their operations, also HDF5 supports adding
231+
// multiple filters into a filter pipeline. The permanent_filters key can
232+
// hence also be given as a list.
233+
234+
std::string const hdf5_filter_pipeline = R"(
235+
backend = "hdf5"
236+
237+
# pipeline consisting of two filters
238+
239+
[[hdf5.dataset.permanent_filters]]
240+
type = "zlib"
241+
aggression = 5
242+
243+
[[hdf5.dataset.permanent_filters]]
244+
id = "shuffle"
245+
flags = "mandatory"
246+
)";
247+
write("hdf5_filter_pipeline.%E", hdf5_filter_pipeline);
248+
249+
// For non-predefined IDs, the ID must be given as a number. This example
250+
// uses the Blosc2 filter available from
251+
// https://github.com/Blosc/HDF5-Blosc2, with the permanent plugin ID 32026
252+
// (defined in blosc2_filter.h as FILTER_BLOSC2). Generic filters referenced
253+
// by ID can be configured via the cd_values field. This field is an array
254+
// of unsigned integers and plugin-specific interpretation. For the Blosc2
255+
// plugin, indexes 0, 1, 2 and 3 are reserved. index 4 is the compression
256+
// level, index 5 is a boolean for activating shuffling and index 6 denotes
257+
// the compression method.
258+
#if OPENPMD_USE_BLOSC2_FILTER
259+
std::stringstream hdf5_blosc_filter;
260+
hdf5_blosc_filter << R"(
261+
backend = "hdf5"
262+
263+
[hdf5.dataset]
264+
chunks = "auto"
265+
266+
[hdf5.dataset.permanent_filters]
267+
id = )" << FILTER_BLOSC2
268+
<< R"(
269+
flags = "mandatory"
270+
cd_values = [0, 0, 0, 0, 4, 1, )"
271+
<< BLOSC_ZSTD << R"(]
272+
)";
273+
write("hdf5_blosc_filter.%E", hdf5_blosc_filter.str());
274+
#endif // OPENPMD_USE_BLOSC2_FILTER
275+
#endif // openPMD_HAVE_HDF5
276+
}

examples/7_extended_write_serial.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,11 @@ int main()
165165
#if OPENPMD_USE_BLOSC2_FILTER
166166
/*
167167
* FILTER_BLOSC2 resolves to 32026, the permanent plugin ID registered
168-
* with the HDF Group. Plugin-specific options are given via c_values,
168+
* with the HDF Group. Plugin-specific options are given via cd_values,
169169
* refer to the specific plugin's documentation. For the Blosc2 plugin,
170-
* parameters 0, 1, 2 and 3 are reserved. Parameter 4 is the compression
171-
* level, parameter 5 is a boolean for activating shuffling and
172-
* parameter 6 denotes the compression method.
170+
* indexes 0, 1, 2 and 3 are reserved. Index 4 is the compression
171+
* level, index 5 is a boolean for activating shuffling and
172+
* index 6 denotes the compression method.
173173
*/
174174
d.options = R"END(
175175
{
@@ -192,7 +192,7 @@ int main()
192192
"id": )END" +
193193
std::to_string(FILTER_BLOSC2) + R"END(,
194194
"flags": "mandatory",
195-
"c_values": [0, 0, 0, 0, 4, 1, )END" +
195+
"cd_values": [0, 0, 0, 0, 4, 1, )END" +
196196
std::to_string(BLOSC_ZSTD) + R"END(]
197197
}
198198
}

0 commit comments

Comments
 (0)