openPMD
diff --git a/‎.github/workflows/dependencies/install_hdf5_plugins‎
Lines changed: 11 additions & 0 deletions b/‎.github/workflows/dependencies/install_hdf5_plugins‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎.github/workflows/linux.yml‎
Lines changed: 18 additions & 4 deletions b/‎.github/workflows/linux.yml‎
Lines changed: 18 additions & 4 deletions
diff --git a/‎.github/workflows/tooling.yml‎
Lines changed: 10 additions & 0 deletions b/‎.github/workflows/tooling.yml‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions b/‎CMakeLists.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/source/backends/hdf5.rst‎
Lines changed: 13 additions & 0 deletions b/‎docs/source/backends/hdf5.rst‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎docs/source/details/backendconfig.rst‎
Lines changed: 20 additions & 2 deletions b/‎docs/source/details/backendconfig.rst‎
Lines changed: 20 additions & 2 deletions
diff --git a/‎examples/13_write_dynamic_configuration.cpp‎
Lines changed: 6 additions & 7 deletions b/‎examples/13_write_dynamic_configuration.cpp‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎examples/13_write_dynamic_configuration.py‎
Lines changed: 3 additions & 2 deletions b/‎examples/13_write_dynamic_configuration.py‎
Lines changed: 3 additions & 2 deletions
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+version_major=1.14
+version_minor=6
+build_var=ubuntu-2404_gcc
+
+cd /opt
+wget "https://github.com/HDFGroup/hdf5_plugins/releases/download/hdf5-${version_major}.${version_minor}/hdf5_plugins-${version_major}-${build_var}.deb" >&2
+sudo dpkg -i "hdf5_plugins-${version_major}-${build_var}.deb" >&2
+rm "hdf5_plugins-${version_major}-${build_var}.deb"
+echo "/HDF_Group/HDF5/${version_major}.${version_minor}/lib/plugin/"
@@ -97,9 +97,12 @@ jobs:
         sudo apt-get update
         sudo apt-get install clang-11 gfortran libopenmpi-dev python3
         sudo .github/workflows/dependencies/install_spack
+
     - name: Build
       env: {CC: clang-11, CXX: clang++-11, CXXFLAGS: -Werror}
       run: |
+        # Use this to make the HDF5 plugins available from the C/C++ API.
+        export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)"
         sudo ln -s "$(which cmake)" /usr/bin/cmake
         eval $(spack env activate --sh .github/ci/spack-envs/clang11_nopy_ompi_h5_ad2/)
         spack install
@@ -172,16 +175,20 @@ jobs:
       run: |
         sudo apt-get update
         sudo apt-get remove openmpi* libopenmpi* *hdf5* || true
-        sudo apt-get install g++ gfortran python3
+        sudo apt-get install g++ gfortran python3 python3-venv
+
         sudo .github/workflows/dependencies/install_spack
 
+
         # Need to build this manually due to broken MPICH package in Ubuntu 24.04
         # https://bugs.launchpad.net/ubuntu/+source/mpich/+bug/2072338
         sudo .github/workflows/dependencies/install_mpich
 
     - name: Build
       env: {CC: gcc, CXX: g++, MPICH_CC: gcc, MPICH_CXX: g++, CXXFLAGS: -Werror}
       run: |
+        # Use this to make the HDF5 plugins available from the C/C++ API.
+        export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)"
         cmake --version
         mpiexec --version
         mpicxx --version
@@ -190,9 +197,13 @@ jobs:
         eval $(spack env activate --sh .github/ci/spack-envs/gcc13_py312_mpich_h5_ad2/)
         spack install
 
+        python -m venv venv
+        source venv/bin/activate
+        pip install mpi4py numpy hdf5plugin
+
         share/openPMD/download_samples.sh build
         cmake -S . -B build \
-          -DopenPMD_USE_PYTHON=OFF \
+          -DopenPMD_USE_PYTHON=ON \
           -DopenPMD_USE_MPI=ON     \
           -DopenPMD_USE_HDF5=ON    \
           -DopenPMD_USE_ADIOS2=ON  \
@@ -238,6 +249,8 @@ jobs:
     - name: Build
       env: {CC: gcc-12, CXX: g++-12, CXXFLAGS: -Werror}
       run: |
+        # Use this to make the HDF5 plugins available from the C/C++ API.
+        export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)"
         sudo ln -s "$(which cmake)" /usr/bin/cmake
         eval $(spack env activate --sh .github/ci/spack-envs/gcc12_py36_ompi_h5_ad2/)
         spack install
@@ -248,7 +261,8 @@ jobs:
           -DopenPMD_USE_MPI=ON    \
           -DopenPMD_USE_HDF5=ON   \
           -DopenPMD_USE_ADIOS2=ON \
-          -DopenPMD_USE_INVASIVE_TESTS=ON
+          -DopenPMD_USE_INVASIVE_TESTS=ON \
+          -DCMAKE_VERBOSE_MAKEFILE=ON
         cmake --build build --parallel 4
         ctest --test-dir build --output-on-failure
 
@@ -261,6 +275,7 @@ jobs:
       run: |
         sudo apt-get update
         sudo apt-get install g++ libopenmpi-dev libhdf5-openmpi-dev python3 python3-numpy python3-mpi4py python3-pandas python3-h5py-mpi python3-pip
+        python3 -m pip install jsonschema==4.* referencing
 #       TODO ADIOS2
     - name: Build
       env: {CXXFLAGS: -Werror, PKG_CONFIG_PATH: /usr/lib/x86_64-linux-gnu/pkgconfig}
@@ -278,7 +293,6 @@ jobs:
         cmake --build build --parallel 4
         ctest --test-dir build --output-on-failure
 
-        python3 -m pip install jsonschema==4.* referencing
         cd share/openPMD/json_schema
         PATH="../../../build/bin:$PATH" make -j 2
         # We need to exclude the thetaMode example since that has a different
 
@@ -22,6 +22,11 @@ jobs:
         sudo apt-get install clang clang-tidy gfortran libopenmpi-dev python-is-python3
         SPACK_VER=1.0.1 sudo -E .github/workflows/dependencies/install_spack
         echo "SPACK VERSION: $(spack --version)"
+
+        # Use this to make the HDF5 plugins available from the C/C++ API.
+        export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)"
+        echo "$HDF5_PLUGIN_PATH"
+        ls "$HDF5_PLUGIN_PATH"
     - name: Build
       env: {CC: clang, CXX: clang++}
       run: |
@@ -52,6 +57,11 @@ jobs:
         sudo apt-get install clang-19 libc++-dev libc++abi-dev python3 gfortran libopenmpi-dev python3-numpy
         SPACK_VER=1.0.1 sudo -E .github/workflows/dependencies/install_spack
         echo "SPACK VERSION: $(spack --version)"
+
+        # Use this to make the HDF5 plugins available from the C/C++ API.
+        export HDF5_PLUGIN_PATH="$(sudo -E .github/workflows/dependencies/install_hdf5_plugins)"
+        echo "$HDF5_PLUGIN_PATH"
+        ls "$HDF5_PLUGIN_PATH"
     - name: Build
       env: {CC: mpicc, CXX: mpic++, OMPI_CC: clang-19, OMPI_CXX: clang++-19, CXXFLAGS: -Werror, OPENPMD_HDF5_CHUNKS: none, OPENPMD_TEST_NFILES_MAX: 100}
       run: |
 
@@ -718,6 +718,7 @@ set(openPMD_EXAMPLE_NAMES
     12_span_write
     13_write_dynamic_configuration
     14_toml_template
+    15_compression
 )
 set(openPMD_PYTHON_EXAMPLE_NAMES
     2_read_serial
@@ -734,6 +735,7 @@ set(openPMD_PYTHON_EXAMPLE_NAMES
     11_particle_dataframe
     12_span_write
     13_write_dynamic_configuration
+    15_compression
 )
 
 if(openPMD_USE_INVASIVE_TESTS)
 
@@ -25,6 +25,19 @@ Virtual file drivers are configured via JSON/TOML.
 Refer to the page on :ref:`JSON/TOML configuration <backendconfig-hdf5>` for further details.
 
 
+Filters (compression)
+*********************
+
+HDF5 supports so-called filters for transformations such as compression on datasets.
+These can be permanent (applied to an entire dataset) and transient (applied to individual I/O operations).
+The openPMD-api currently supports permanent filters.
+Pipelines of multiple subsequent filters are supported.
+Refer also to `this documentation <https://web.ics.purdue.edu/~aai/HDF5/html/Filters.html>`_.
+
+Filters are applied via :ref:`JSON/TOML configuration <backendconfig-hdf5>`, see there for detailed instructions on how to apply filters.
+There are also extended examples on how to apply compression options to ADIOS2 and HDF5 in the examples: `Python <https://github.com/openPMD/openPMD-api/blob/dev/examples/15_compression.py>`_ / `C++ <https://github.com/openPMD/openPMD-api/blob/dev/examples/15_compression.cpp>`_.
+
+
 Backend-Specific Controls
 -------------------------
 
 
@@ -185,8 +185,8 @@ Explanation of the single keys:
   Additionally, specifying ``"disk_override"``, ``"buffer_override"`` or ``"new_step_override"`` will take precedence over options specified without the ``_override`` suffix, allowing to invert the normal precedence order.
   This way, a data producing code can hardcode the preferred flush target per ``flush()`` call, but users can e.g. still entirely deactivate flushing to disk in the ``Series`` constructor by specifying ``preferred_flush_target = buffer_override``.
   This is useful when applying the asynchronous IO capabilities of the BP5 engine.
-* ``adios2.dataset.operators``: This key contains a list of ADIOS2 `operators <https://adios2.readthedocs.io/en/latest/components/components.html#operator>`_, used to enable compression or dataset transformations.
-  Each object in the list has two keys:
+* ``adios2.dataset.operators``: This key contains either a single ADIOS2 `operator <https://adios2.readthedocs.io/en/latest/components/components.html#operator>`_ or a list of operators, used to enable compression or dataset transformations.
+  Each operator is an object with two keys:
 
   * ``type`` supported ADIOS operator type, e.g. zfp, sz
   * ``parameters`` is an associative map of string parameters for the operator (e.g. compression levels)
@@ -247,6 +247,24 @@ Explanation of the single keys:
   An explicit chunk size can be specified as a list of positive integers, e.g. ``hdf5.dataset.chunks = [10, 100]``. Note that this specification should only be used per-dataset, e.g. in ``resetDataset()``/``reset_dataset()``.
 
   Chunking generally improves performance and only needs to be disabled in corner-cases, e.g. when heavily relying on independent, parallel I/O that non-collectively declares data records.
+* ``hdf5.datasets.permanent_filters``: Either a single HDF5 permanent filter specification or a list of HDF5 permanent filter specifications.
+  Each filter specification is a JSON/TOML object, but there are multiple options:
+
+  * Zlib: The Zlib filter has a distinct API in HDF5 and the configuration for Zlib in openPMD is hence also different. It is activated by the mandatory key ``type = "zlib"`` and configured by the optional integer key ``aggression``.
+    Example: ``{"type": "zlib", "aggression": 5}``.
+  * Filters identified by their global ID `registered with the HDF group <https://github.com/HDFGroup/hdf5_plugins/blob/master/docs/RegisteredFilterPlugins.md>`_.
+    They are activated by the mandatory integer key ``id`` containing this global ID.
+    All other keys are optional:
+
+    * ``type = "by_id"`` may optionally be specified for clarity and consistency.
+    * The string key ``flags`` can take the values ``"mandatory"`` or ``"optional"``, indicating if HDF5 should abort execution if the filter cannot be applied for some reason.
+    * The key ``cd_values`` points to a list of nonnegative integers.
+      These are filter-specific configuration options.
+      Refer to the specific filter's documentation.
+
+    Alternatively to an integer ID, the key ``id`` may also be of string type, identifying one of the six builtin filters of HDF5: ``"deflate", "shuffle", "fletcher32", "szip", "nbit", "scaleoffset"``.
+
+
 * ``hdf5.vfd.type`` selects the HDF5 virtual file driver.
   Currently available are:
 
 
@@ -47,6 +47,7 @@ type = "bp4"
 
 # ADIOS2 allows adding several operators
 # Lists are given in TOML by using double brackets
+# For specifying a single operator only, the list may be skipped.
 [[adios2.dataset.operators]]
 type = "zlib"
 
@@ -192,14 +193,12 @@ CFG.CHUNKS = [10]
   "resizable": true,
   "adios2": {
     "dataset": {
-      "operators": [
-        {
-          "type": "zlib",
-          "parameters": {
-            "clevel": 9
-          }
+      "operators": {
+        "type": "zlib",
+        "parameters": {
+          "clevel": 9
         }
-      ]
+      }
     }
   }
 })END";
 
@@ -31,6 +31,7 @@
 
 # ADIOS2 allows adding several operators
 # Lists are given in TOML by using double brackets
+# For specifying a single operator only, the list may be skipped.
 [[adios2.dataset.operators]]
 type = "zlib"
 
@@ -106,12 +107,12 @@ def main():
             }
         }
         config['adios2']['dataset'] = {
-            'operators': [{
+            'operators': {
                 'type': 'zlib',
                 'parameters': {
                     'clevel': 9
                 }
-            }]
+            }
         }
 
         temperature = iteration.meshes["temperature"]
Original file line number	Diff line number	Diff line change
`@@ -718,6 +718,7 @@ set(openPMD_EXAMPLE_NAMES`
`718`	`718`	`12_span_write`
`719`	`719`	`13_write_dynamic_configuration`
`720`	`720`	`14_toml_template`
	`721`	`+ 15_compression`
`721`	`722`	`)`
`722`	`723`	`set(openPMD_PYTHON_EXAMPLE_NAMES`
`723`	`724`	`2_read_serial`
`@@ -734,6 +735,7 @@ set(openPMD_PYTHON_EXAMPLE_NAMES`
`734`	`735`	`11_particle_dataframe`
`735`	`736`	`12_span_write`
`736`	`737`	`13_write_dynamic_configuration`
	`738`	`+ 15_compression`
`737`	`739`	`)`
`738`	`740`
`739`	`741`	`if(openPMD_USE_INVASIVE_TESTS)`
Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,7 @@`
`31`	`31`
`32`	`32`	`# ADIOS2 allows adding several operators`
`33`	`33`	`# Lists are given in TOML by using double brackets`
	`34`	`+# For specifying a single operator only, the list may be skipped.`
`34`	`35`	`[[adios2.dataset.operators]]`
`35`	`36`	`type = "zlib"`
`36`	`37`
`@@ -106,12 +107,12 @@ def main():`
`106`	`107`	`}`
`107`	`108`	`}`
`108`	`109`	`config['adios2']['dataset'] = {`
`109`		`- 'operators': [{`
	`110`	`+ 'operators': {`
`110`	`111`	`'type': 'zlib',`
`111`	`112`	`'parameters': {`
`112`	`113`	`'clevel': 9`
`113`	`114`	`}`
`114`		`- }]`
	`115`	`+ }`
`115`	`116`	`}`
`116`	`117`
`117`	`118`	`temperature = iteration.meshes["temperature"]`