|
| 1 | +/* Copyright 2025 Franz Poeschel |
| 2 | + * |
| 3 | + * This file is part of openPMD-api. |
| 4 | + * |
| 5 | + * openPMD-api is free software: you can redistribute it and/or modify |
| 6 | + * it under the terms of of either the GNU General Public License or |
| 7 | + * the GNU Lesser General Public License as published by |
| 8 | + * the Free Software Foundation, either version 3 of the License, or |
| 9 | + * (at your option) any later version. |
| 10 | + * |
| 11 | + * openPMD-api is distributed in the hope that it will be useful, |
| 12 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | + * GNU General Public License and the GNU Lesser General Public License |
| 15 | + * for more details. |
| 16 | + * |
| 17 | + * You should have received a copy of the GNU General Public License |
| 18 | + * and the GNU Lesser General Public License along with openPMD-api. |
| 19 | + * If not, see <http://www.gnu.org/licenses/>. |
| 20 | + */ |
| 21 | + |
| 22 | +#include <openPMD/openPMD.hpp> |
| 23 | + |
| 24 | +#if openPMD_HAVE_HDF5 && __has_include(<blosc2_filter.h>) |
| 25 | +#include <blosc2_filter.h> |
| 26 | +#define OPENPMD_USE_BLOSC2_FILTER 1 |
| 27 | +#else |
| 28 | +#define OPENPMD_USE_BLOSC2_FILTER 0 |
| 29 | +#endif |
| 30 | + |
| 31 | +#include <iostream> |
| 32 | +#include <numeric> |
| 33 | +#include <sstream> |
| 34 | + |
| 35 | +void init_blosc_for_hdf5() |
| 36 | +{ |
| 37 | +#if OPENPMD_USE_BLOSC2_FILTER |
| 38 | + /* |
| 39 | + * This registers the Blosc2 plugin from |
| 40 | + * https://github.com/Blosc/HDF5-Blosc2 as a demonstration on how to |
| 41 | + * activate and configure dynamic HDF5 filter plugins through openPMD. |
| 42 | + */ |
| 43 | + |
| 44 | + char *version, *date; |
| 45 | + int r = register_blosc2(&version, &date); |
| 46 | + if (r < 1) |
| 47 | + { |
| 48 | + throw std::runtime_error("Unable to register Blosc2 plugin with HDF5."); |
| 49 | + } |
| 50 | + else |
| 51 | + { |
| 52 | + std::cout << "Blosc2 plugin registered in version '" << version |
| 53 | + << "' and date '" << date << "'." << std::endl; |
| 54 | + } |
| 55 | +#endif |
| 56 | +} |
| 57 | + |
| 58 | +void write(std::string const &filename, std::string const &config) |
| 59 | +{ |
| 60 | + using namespace openPMD; |
| 61 | + std::cout << "Config for '" << filename << "' as JSON:\n" |
| 62 | + << json::merge(config, "{}") << "\n\n"; |
| 63 | + Series series( |
| 64 | + "../samples/compression/" + filename, Access::CREATE_LINEAR, config); |
| 65 | + |
| 66 | + for (size_t i = 0; i < 10; ++i) |
| 67 | + { |
| 68 | + auto ¤t_iteration = series.snapshots()[i]; |
| 69 | + |
| 70 | + // First, write an E mesh. |
| 71 | + auto &E = current_iteration.meshes["E"]; |
| 72 | + E.setAxisLabels({"x", "y"}); |
| 73 | + for (auto const &dim : {"x", "y"}) |
| 74 | + { |
| 75 | + auto &component = E[dim]; |
| 76 | + component.resetDataset({Datatype::FLOAT, {10, 10}}); |
| 77 | + auto buffer_view = |
| 78 | + component.storeChunk<float>({0, 0}, {10, 10}).currentBuffer(); |
| 79 | + // Now fill the prepared buffer with some nonsense data. |
| 80 | + std::iota(buffer_view.begin(), buffer_view.end(), i * 100); |
| 81 | + } |
| 82 | + |
| 83 | + // Now, write some e particles. |
| 84 | + auto &e = current_iteration.particles["e"]; |
| 85 | + for (auto const &dim : {"x", "y"}) |
| 86 | + { |
| 87 | + // Do not bother with a positionOffset |
| 88 | + auto &position_offset = e["positionOffset"][dim]; |
| 89 | + position_offset.makeConstant(0); |
| 90 | + |
| 91 | + auto &position = e["position"][dim]; |
| 92 | + position.resetDataset({Datatype::FLOAT, {100}}); |
| 93 | + auto buffer_view = |
| 94 | + position.storeChunk<float>({0}, {100}).currentBuffer(); |
| 95 | + // Now fill the prepared buffer with some nonsense data. |
| 96 | + std::iota(buffer_view.begin(), buffer_view.end(), i * 100); |
| 97 | + } |
| 98 | + } |
| 99 | +} |
| 100 | + |
| 101 | +int main() |
| 102 | +{ |
| 103 | + init_blosc_for_hdf5(); |
| 104 | + |
| 105 | + // Backend specific configuration can be given in either JSON or TOML. |
| 106 | + // We will stick with TOML in this example, since it allows inline comments |
| 107 | + // and remains more legible for larger configurations. |
| 108 | + // If you are interested in the configurations as JSON, run the example and |
| 109 | + // their JSON equivalents will be printed to stdout. |
| 110 | + |
| 111 | +#if openPMD_HAVE_ADIOS2 |
| 112 | + // We start with two examples for ADIOS2. |
| 113 | + std::string const simple_adios2_config = R"( |
| 114 | +
|
| 115 | + # Backend can either be inferred from the filename ending, or specified |
| 116 | + # explicitly. In the latter case, the filename ending can be given as |
| 117 | + # a wildcard %E, openPMD will then pick a default ending. |
| 118 | + backend = "adios2" |
| 119 | +
|
| 120 | + # ADIOS2 supports adding multiple operators to a variable, hence we |
| 121 | + # specify a list of operators here (using TOML's double bracket syntax). |
| 122 | + # How much sense this makes depends on the specific operators in use. |
| 123 | +
|
| 124 | + [[adios2.dataset.operators]] |
| 125 | + type = "bzip2" |
| 126 | + parameters.clevel = 9 # The available parameters depend |
| 127 | + # on the operator. |
| 128 | + # Here, we specify zlib's compression level. |
| 129 | + )"; |
| 130 | + write("adios2_with_zlib.%E", simple_adios2_config); |
| 131 | + |
| 132 | + // The compression can also be specified per-dataset. |
| 133 | + // For more details, also check: |
| 134 | + // https://openpmd-api.readthedocs.io/en/latest/details/backendconfig.html#dataset-specific-configuration |
| 135 | + |
| 136 | + // This example will demonstrate the use of pattern matching. |
| 137 | + // adios2.dataset is now a list of dataset configurations. The specific |
| 138 | + // configuration to be used for a dataset will be determined by matching |
| 139 | + // the dataset name against the patterns specified by the 'select' keys. |
| 140 | + std::string const extended_adios2_config = R"( |
| 141 | + backend = "adios2" |
| 142 | +
|
| 143 | + [[adios2.dataset]] |
| 144 | + # This uses egrep-type regular expressions. |
| 145 | + select = "meshes/.*" |
| 146 | + # Now, specify the operators list again. Let's use Blosc for this. |
| 147 | + [[adios2.dataset.cfg.operators]] |
| 148 | + type = "blosc" |
| 149 | + parameters.doshuffle = "BLOSC_BITSHUFFLE" |
| 150 | + parameters.clevel = 1 |
| 151 | +
|
| 152 | + # Now, configure the particles. |
| 153 | + [[adios2.dataset]] |
| 154 | + # The match can either be against the path within the containing |
| 155 | + # Iteration (e.g. 'meshes/E/x', as above) or (as in this example), |
| 156 | + # against the full path (e.g. '/data/0/particles/e/position/x'). |
| 157 | + # In this example, completely deactivate compression specifically for |
| 158 | + # 'particles/e/position/x'. All other particle datasets will |
| 159 | + # fall back to the default configuration specified below. |
| 160 | + # Be careful when specifying compression per-Iteration. While this |
| 161 | + # syntax fundamentally allows doing that, compressions once specified |
| 162 | + # on an ADIOS2 variable will not be removed again. |
| 163 | + # Since variable-encoding reuses ADIOS2 variables from previous |
| 164 | + # Iterations, the compression configuration of the first Iteration will |
| 165 | + # leak into all subsequent Iterations. |
| 166 | + select = "/data/[0-9]*/particles/e/position/x" |
| 167 | + cfg.operators = [] |
| 168 | +
|
| 169 | + # Now, the default configuration. |
| 170 | + # In general, the dataset configurations are matched top-down, going for |
| 171 | + # the first matching configuration. So, a default configuration could |
| 172 | + # theoretically be specified by emplacing a catch-all pattern |
| 173 | + # (regex: ".*") as the last option. |
| 174 | + # However, we also define an explicit shorthand for specifying default |
| 175 | + # configurations: Just omit the 'select' key. This special syntax is |
| 176 | + # understood as the default configuration no matter where in the list it |
| 177 | + # is emplaced, and it allows the backends to initialize the default |
| 178 | + # configuration globally, instead of applying it selectively to each |
| 179 | + # dataset that matches a catch-all pattern. |
| 180 | + [[adios2.dataset]] |
| 181 | + [[adios2.dataset.cfg.operators]] |
| 182 | + type = "bzip2" |
| 183 | + parameters.clevel = 2 |
| 184 | + )"; |
| 185 | + write( |
| 186 | + "adios2_with_dataset_specific_configurations.%E", |
| 187 | + extended_adios2_config); |
| 188 | +#endif // openPMD_HAVE_ADIOS2 |
| 189 | + |
| 190 | +#if openPMD_HAVE_HDF5 |
| 191 | + // Now, let's continue with HDF5. |
| 192 | + // HDF5 supports compression via so-called filters. These can be permanent |
| 193 | + // (applied to an entire dataset) and transient (applied to individual I/O |
| 194 | + // operations). The openPMD-api currently supports permanent filters. Refer |
| 195 | + // also to https://web.ics.purdue.edu/~aai/HDF5/html/Filters.html. |
| 196 | + |
| 197 | + // Filters are additionally distinguished by how tightly they integrate with |
| 198 | + // HDF5. The most tightly-integrated filter is Zlib, which has its own API |
| 199 | + // calls and hence also a special JSON/TOML configuration in openPMD: |
| 200 | + |
| 201 | + std::string const hdf5_zlib_config = R"( |
| 202 | + backend = "hdf5" |
| 203 | +
|
| 204 | + [hdf5.dataset.permanent_filters] |
| 205 | + type = "zlib" # mandatory parameter |
| 206 | + aggression = 5 # optional, defaults to 1 |
| 207 | + )"; |
| 208 | + write("hdf5_zlib.%E", hdf5_zlib_config); |
| 209 | + |
| 210 | + // All other filters have a common API and are identified by global IDs |
| 211 | + // registered with the HDF Group. More details can be found in the |
| 212 | + // H5Zpublic.h header. That header predefines a small number of filter IDs. |
| 213 | + // These are directly supported by the openPMD-api: deflate, shuffle, |
| 214 | + // fletcher32, szip, nbit, scaleoffset. |
| 215 | + |
| 216 | + std::string const hdf5_predefined_filter_ids = R"( |
| 217 | + backend = "hdf5" |
| 218 | +
|
| 219 | + [hdf5.dataset.permanent_filters] |
| 220 | + id = "fletcher32" # mandatory parameter |
| 221 | + # A filter can be applied as mandatory (execution should abort if the |
| 222 | + # filter cannot be applied) or as optional (execution should ignore when |
| 223 | + # the filter cannot be applied). |
| 224 | + flags = "mandatory" # optional parameter |
| 225 | + type = "by_id" # optional parameter for filters identified by ID, |
| 226 | + # mandatory only for zlib (see above) |
| 227 | + )"; |
| 228 | + write("hdf5_predefined_filter_id.%E", hdf5_predefined_filter_ids); |
| 229 | + |
| 230 | + // Just like ADIOS2 with their operations, also HDF5 supports adding |
| 231 | + // multiple filters into a filter pipeline. The permanent_filters key can |
| 232 | + // hence also be given as a list. |
| 233 | + |
| 234 | + std::string const hdf5_filter_pipeline = R"( |
| 235 | + backend = "hdf5" |
| 236 | +
|
| 237 | + # pipeline consisting of two filters |
| 238 | +
|
| 239 | + [[hdf5.dataset.permanent_filters]] |
| 240 | + type = "zlib" |
| 241 | + aggression = 5 |
| 242 | +
|
| 243 | + [[hdf5.dataset.permanent_filters]] |
| 244 | + id = "shuffle" |
| 245 | + flags = "mandatory" |
| 246 | + )"; |
| 247 | + write("hdf5_filter_pipeline.%E", hdf5_filter_pipeline); |
| 248 | + |
| 249 | + // For non-predefined IDs, the ID must be given as a number. This example |
| 250 | + // uses the Blosc2 filter available from |
| 251 | + // https://github.com/Blosc/HDF5-Blosc2, with the permanent plugin ID 32026 |
| 252 | + // (defined in blosc2_filter.h as FILTER_BLOSC2). Generic filters referenced |
| 253 | + // by ID can be configured via the cd_values field. This field is an array |
| 254 | + // of unsigned integers and plugin-specific interpretation. For the Blosc2 |
| 255 | + // plugin, indexes 0, 1, 2 and 3 are reserved. index 4 is the compression |
| 256 | + // level, index 5 is a boolean for activating shuffling and index 6 denotes |
| 257 | + // the compression method. |
| 258 | +#if OPENPMD_USE_BLOSC2_FILTER |
| 259 | + std::stringstream hdf5_blosc_filter; |
| 260 | + hdf5_blosc_filter << R"( |
| 261 | + backend = "hdf5" |
| 262 | +
|
| 263 | + [hdf5.dataset] |
| 264 | + chunks = "auto" |
| 265 | +
|
| 266 | + [hdf5.dataset.permanent_filters] |
| 267 | + id = )" << FILTER_BLOSC2 |
| 268 | + << R"( |
| 269 | + flags = "mandatory" |
| 270 | + cd_values = [0, 0, 0, 0, 4, 1, )" |
| 271 | + << BLOSC_ZSTD << R"(] |
| 272 | + )"; |
| 273 | + write("hdf5_blosc_filter.%E", hdf5_blosc_filter.str()); |
| 274 | +#endif // OPENPMD_USE_BLOSC2_FILTER |
| 275 | +#endif // openPMD_HAVE_HDF5 |
| 276 | +} |
0 commit comments