diff --git a/feectools b/feectools
index 8c88dec79..c322ee361 160000
--- a/feectools
+++ b/feectools
@@ -1 +1 @@
-Subproject commit 8c88dec79510b315024d4b7e0ccc28e76ad8c9e7
+Subproject commit c322ee361f14ab8b77f616a21829ec5d693c18a6
diff --git a/params_LinearMHDDriftkineticCC.py b/params_LinearMHDDriftkineticCC.py
new file mode 100644
index 000000000..f1f97a3ee
--- /dev/null
+++ b/params_LinearMHDDriftkineticCC.py
@@ -0,0 +1,116 @@
+from struphy import main
+from struphy.fields_background import equils
+from struphy.geometry import domains
+from struphy.initial import perturbations
+from struphy.io.options import BaseUnits, DerhamOptions, EnvironmentOptions, FieldsBackground, Time
+from struphy.kinetic_background import maxwellians
+
+# import model, set verbosity
+from struphy.models.hybrid import LinearMHDDriftkineticCC
+from struphy.pic.utilities import (
+    BinningPlot,
+    BoundaryParameters,
+    KernelDensityPlot,
+    LoadingParameters,
+    WeightsParameters,
+)
+from struphy.topology import grids
+
+# environment options
+env = EnvironmentOptions()
+
+# units
+base_units = BaseUnits()
+
+# time stepping
+time_opts = Time()
+
+# geometry
+domain = domains.Cuboid()
+
+# fluid equilibrium (can be used as part of initial conditions)
+equil = equils.HomogenSlab()
+
+# grid
+grid = grids.TensorProductGrid(Nel=(16, 16, 16))
+
+# derham options
+derham_opts = DerhamOptions()
+
+# light-weight model instance
+model = LinearMHDDriftkineticCC()
+
+# species parameters
+model.mhd.set_phys_params()
+model.energetic_ions.set_phys_params()
+
+loading_params = LoadingParameters(ppc=1000)
+weights_params = WeightsParameters()
+boundary_params = BoundaryParameters()
+model.energetic_ions.set_markers(
+    loading_params=loading_params,
+    weights_params=weights_params,
+    boundary_params=boundary_params,
+)
+model.energetic_ions.set_sorting_boxes()
+model.energetic_ions.set_save_data()
+
+# propagator options
+model.propagators.push_bxe.options = model.propagators.push_bxe.Options(
+    b_tilde=model.em_fields.b_field,
+)
+model.propagators.push_parallel.options = model.propagators.push_parallel.Options(
+    b_tilde=model.em_fields.b_field,
+)
+model.propagators.shearalfen_cc5d.options = model.propagators.shearalfen_cc5d.Options(
+    energetic_ions=model.energetic_ions.var,
+)
+model.propagators.magnetosonic.options = model.propagators.magnetosonic.Options(
+    b_field=model.em_fields.b_field,
+)
+model.propagators.cc5d_density.options = model.propagators.cc5d_density.Options(
+    energetic_ions=model.energetic_ions.var,
+    b_tilde=model.em_fields.b_field,
+)
+model.propagators.cc5d_gradb.options = model.propagators.cc5d_gradb.Options(
+    b_tilde=model.em_fields.b_field,
+)
+model.propagators.cc5d_curlb.options = model.propagators.cc5d_curlb.Options(
+    b_tilde=model.em_fields.b_field,
+)
+
+# background, perturbations and initial conditions
+model.mhd.velocity.add_background(FieldsBackground())
+model.mhd.velocity.add_perturbation(perturbations.TorusModesCos(given_in_basis="v", comp=0))
+model.mhd.velocity.add_perturbation(perturbations.TorusModesCos(given_in_basis="v", comp=1))
+model.mhd.velocity.add_perturbation(perturbations.TorusModesCos(given_in_basis="v", comp=2))
+maxwellian_1 = maxwellians.GyroMaxwellian2D(n=(1.0, None), equil=equil)
+maxwellian_2 = maxwellians.GyroMaxwellian2D(n=(0.1, None), equil=equil)
+background = maxwellian_1 + maxwellian_2
+model.energetic_ions.var.add_background(background)
+
+# if .add_initial_condition is not called, the background is the kinetic initial condition
+perturbation = perturbations.TorusModesCos()
+maxwellian_1pt = maxwellians.GyroMaxwellian2D(n=(1.0, perturbation), equil=equil)
+init = maxwellian_1pt + maxwellian_2
+model.energetic_ions.var.add_initial_condition(init)
+
+# optional: exclude variables from saving
+# model.energetic_ions.var.save_data = False
+
+if __name__ == "__main__":
+    # start run
+    verbose = True
+
+    main.run(
+        model,
+        params_path=__file__,
+        env=env,
+        base_units=base_units,
+        time_opts=time_opts,
+        domain=domain,
+        equil=equil,
+        grid=grid,
+        derham_opts=derham_opts,
+        verbose=verbose,
+    )
diff --git a/src/struphy/bsplines/bsplines.py b/src/struphy/bsplines/bsplines.py
index 09fc2a7de..77690ab90 100644
--- a/src/struphy/bsplines/bsplines.py
+++ b/src/struphy/bsplines/bsplines.py
@@ -611,8 +611,8 @@ def make_knots(breaks, degree, periodic):
 
     if periodic:
         period = breaks[-1] - breaks[0]
-        T[0:p] = [xi - period for xi in breaks[-p - 1 : -1]]
-        T[-p:] = [xi + period for xi in breaks[1 : p + 1]]
+        T[0:p] = xp.asarray([xi - period for xi in breaks[-p - 1 : -1]])
+        T[-p:] = xp.asarray([xi + period for xi in breaks[1 : p + 1]])
     else:
         T[0:p] = breaks[0]
         T[-p:] = breaks[-1]
diff --git a/src/struphy/console/format.py b/src/struphy/console/format.py
index 3a38ed7ea..005e5d007 100644
--- a/src/struphy/console/format.py
+++ b/src/struphy/console/format.py
@@ -415,7 +415,7 @@ def parse_path(directory, verbose: bool = False):
         for filename in files:
             if re.search(r"__\w+__", root):
                 continue
-            if (filename.endswith(".py") or filename.endswith(".ipynb")) and not re.search(r"__\w+__", filename):
+            if filename.endswith(".py") and not re.search(r"__\w+__", filename):
                 file_path = os.path.join(root, filename)
                 python_files.append(file_path)
     return python_files
@@ -489,9 +489,7 @@ def get_python_files(input_type, path=None):
 
         # python_files = [f for f in files if f.endswith(".py") and os.path.isfile(f)]
         python_files = [
-            os.path.join(repopath, f)
-            for f in files
-            if (f.endswith(".py") or f.endswith(".ipynb")) and os.path.isfile(os.path.join(repopath, f))
+            os.path.join(repopath, f) for f in files if f.endswith(".py") and os.path.isfile(os.path.join(repopath, f))
         ]
 
         if not python_files:
diff --git a/src/struphy/diagnostics/diagnostics_pic.ipynb b/src/struphy/diagnostics/diagnostics_pic.ipynb
index f41425141..d4b2f2e0f 100644
--- a/src/struphy/diagnostics/diagnostics_pic.ipynb
+++ b/src/struphy/diagnostics/diagnostics_pic.ipynb
@@ -7,13 +7,11 @@
    "outputs": [],
    "source": [
     "import os\n",
-    "\n",
+    "import struphy\n",
     "import numpy as np\n",
     "from matplotlib import pyplot as plt\n",
     "\n",
-    "import struphy\n",
-    "\n",
-    "path_out = os.path.join(struphy.__path__[0], \"io/out\", \"sim_1\")\n",
+    "path_out = os.path.join(struphy.__path__[0], 'io/out', 'sim_1')\n",
     "\n",
     "print(path_out)\n",
     "os.listdir(path_out)"
@@ -30,7 +28,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "data_path = os.path.join(path_out, \"post_processing\")\n",
+    "data_path = os.path.join(path_out, 'post_processing')\n",
     "\n",
     "os.listdir(data_path)"
    ]
@@ -41,7 +39,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "t_grid = np.load(os.path.join(data_path, \"t_grid.npy\"))\n",
+    "t_grid = np.load(os.path.join(data_path, 't_grid.npy'))\n",
     "t_grid"
    ]
   },
@@ -51,7 +49,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "f_path = os.path.join(data_path, \"kinetic_data\", \"ions\", \"distribution_function\")\n",
+    "f_path = os.path.join(data_path, 'kinetic_data', 'ions', 'distribution_function')\n",
     "\n",
     "print(os.listdir(f_path))"
    ]
@@ -62,7 +60,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "path = os.path.join(f_path, \"e1\")\n",
+    "path = os.path.join(f_path, 'e1')\n",
     "print(os.listdir(path))"
    ]
   },
@@ -72,9 +70,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "grid = np.load(os.path.join(f_path, \"e1/\", \"grid_e1.npy\"))\n",
-    "f_binned = np.load(os.path.join(f_path, \"e1/\", \"f_binned.npy\"))\n",
-    "delta_f_e1_binned = np.load(os.path.join(f_path, \"e1/\", \"delta_f_binned.npy\"))\n",
+    "grid = np.load(os.path.join(f_path, 'e1/', 'grid_e1.npy'))\n",
+    "f_binned = np.load(os.path.join(f_path, 'e1/', 'f_binned.npy'))\n",
+    "delta_f_e1_binned = np.load(os.path.join(f_path, 'e1/', 'delta_f_binned.npy'))\n",
     "\n",
     "print(grid.shape)\n",
     "print(f_binned.shape)\n",
@@ -89,18 +87,18 @@
    "source": [
     "steps = list(np.arange(10))\n",
     "\n",
-    "plt.figure(figsize=(12, 5 * len(steps)))\n",
+    "plt.figure(figsize=(12, 5*len(steps)))\n",
     "for n, step in enumerate(steps):\n",
-    "    plt.subplot(len(steps), 2, 2 * n + 1)\n",
-    "    plt.plot(grid, f_binned[step], label=f\"time = {t_grid[step]}\")\n",
-    "    plt.xlabel(\"e1\")\n",
-    "    # plt.ylim([.5, 1.5])\n",
-    "    plt.title(\"full-f\")\n",
-    "    plt.subplot(len(steps), 2, 2 * n + 2)\n",
-    "    plt.plot(grid, delta_f_e1_binned[step], label=f\"time = {t_grid[step]}\")\n",
-    "    plt.xlabel(\"e1\")\n",
-    "    # plt.ylim([-3e-3, 3e-3])\n",
-    "    plt.title(r\"$\\delta f$\")\n",
+    "    plt.subplot(len(steps), 2, 2*n + 1)\n",
+    "    plt.plot(grid, f_binned[step], label=f'time = {t_grid[step]}')\n",
+    "    plt.xlabel('e1')\n",
+    "    #plt.ylim([.5, 1.5])\n",
+    "    plt.title('full-f')\n",
+    "    plt.subplot(len(steps), 2, 2*n + 2)\n",
+    "    plt.plot(grid, delta_f_e1_binned[step], label=f'time = {t_grid[step]}')\n",
+    "    plt.xlabel('e1')\n",
+    "    #plt.ylim([-3e-3, 3e-3])\n",
+    "    plt.title(r'$\\delta f$')\n",
     "    plt.legend()"
    ]
   },
@@ -110,7 +108,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "path = os.path.join(f_path, \"e1_v1\")\n",
+    "path = os.path.join(f_path, 'e1_v1')\n",
     "print(os.listdir(path))"
    ]
   },
@@ -120,10 +118,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "grid_e1 = np.load(os.path.join(f_path, \"e1_v1/\", \"grid_e1.npy\"))\n",
-    "grid_v1 = np.load(os.path.join(f_path, \"e1_v1/\", \"grid_v1.npy\"))\n",
-    "f_binned = np.load(os.path.join(f_path, \"e1_v1/\", \"f_binned.npy\"))\n",
-    "delta_f_binned = np.load(os.path.join(f_path, \"e1_v1/\", \"delta_f_binned.npy\"))\n",
+    "grid_e1 = np.load(os.path.join(f_path, 'e1_v1/', 'grid_e1.npy'))\n",
+    "grid_v1 = np.load(os.path.join(f_path, 'e1_v1/', 'grid_v1.npy'))\n",
+    "f_binned = np.load(os.path.join(f_path, 'e1_v1/', 'f_binned.npy'))\n",
+    "delta_f_binned = np.load(os.path.join(f_path, 'e1_v1/', 'delta_f_binned.npy'))\n",
     "\n",
     "print(grid_e1.shape)\n",
     "print(grid_v1.shape)\n",
@@ -139,20 +137,20 @@
    "source": [
     "steps = list(np.arange(10))\n",
     "\n",
-    "plt.figure(figsize=(12, 5 * len(steps)))\n",
+    "plt.figure(figsize=(12, 5*len(steps)))\n",
     "for n, step in enumerate(steps):\n",
-    "    plt.subplot(len(steps), 2, 2 * n + 1)\n",
-    "    plt.pcolor(grid_e1, grid_v1, f_binned[step].T, label=f\"time = {t_grid[step]}\")\n",
-    "    plt.xlabel(\"$e1$\")\n",
-    "    plt.ylabel(r\"$v_\\parallel$\")\n",
-    "    plt.title(\"full-f\")\n",
+    "    plt.subplot(len(steps), 2, 2*n + 1)\n",
+    "    plt.pcolor(grid_e1, grid_v1, f_binned[step].T, label=f'time = {t_grid[step]}')\n",
+    "    plt.xlabel('$e1$')\n",
+    "    plt.ylabel(r'$v_\\parallel$')\n",
+    "    plt.title('full-f')\n",
     "    plt.legend()\n",
     "    plt.colorbar()\n",
-    "    plt.subplot(len(steps), 2, 2 * n + 2)\n",
-    "    plt.pcolor(grid_e1, grid_v1, delta_f_binned[step].T, label=f\"time = {t_grid[step]}\")\n",
-    "    plt.xlabel(\"$e1$\")\n",
-    "    plt.ylabel(r\"$v_\\parallel$\")\n",
-    "    plt.title(r\"$\\delta f$\")\n",
+    "    plt.subplot(len(steps), 2, 2*n + 2)\n",
+    "    plt.pcolor(grid_e1, grid_v1, delta_f_binned[step].T, label=f'time = {t_grid[step]}')\n",
+    "    plt.xlabel('$e1$')\n",
+    "    plt.ylabel(r'$v_\\parallel$')\n",
+    "    plt.title(r'$\\delta f$')\n",
     "    plt.legend()\n",
     "    plt.colorbar()"
    ]
@@ -163,7 +161,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "fields_path = os.path.join(data_path, \"fields_data\")\n",
+    "fields_path = os.path.join(data_path, 'fields_data')\n",
     "\n",
     "print(os.listdir(fields_path))"
    ]
@@ -176,7 +174,7 @@
    "source": [
     "import pickle\n",
     "\n",
-    "with open(os.path.join(fields_path, \"grids_phy.bin\"), \"rb\") as file:\n",
+    "with open(os.path.join(fields_path, 'grids_phy.bin'), 'rb') as file:\n",
     "    x_grid, y_grid, z_grid = pickle.load(file)\n",
     "\n",
     "print(type(x_grid))\n",
@@ -189,7 +187,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open(os.path.join(fields_path, \"em_fields\", \"phi_phy.bin\"), \"rb\") as file:\n",
+    "with open(os.path.join(fields_path, 'em_fields', 'phi_phy.bin'), 'rb') as file:\n",
     "    phi = pickle.load(file)\n",
     "\n",
     "plt.figure(figsize=(12, 12))\n",
@@ -199,9 +197,9 @@
     "    t = t_grid[step]\n",
     "    print(phi[t][0].shape)\n",
     "    plt.subplot(2, 2, n + 1)\n",
-    "    plt.plot(x_grid[:, 0, 0], phi[t][0][:, 0, 0], label=f\"time = {t}\")\n",
-    "    plt.xlabel(\"x\")\n",
-    "    plt.ylabel(r\"$\\phi$(x)\")\n",
+    "    plt.plot(x_grid[:, 0, 0], phi[t][0][:, 0, 0], label=f'time = {t}')\n",
+    "    plt.xlabel('x')\n",
+    "    plt.ylabel(r'$\\phi$(x)')\n",
     "    plt.legend()"
    ]
   },
diff --git a/src/struphy/feec/basis_projection_ops.py b/src/struphy/feec/basis_projection_ops.py
index 20b494232..616340e39 100644
--- a/src/struphy/feec/basis_projection_ops.py
+++ b/src/struphy/feec/basis_projection_ops.py
@@ -57,9 +57,7 @@ def __init__(self, derham, domain, verbose=True, **weights):
 
         self._rank = derham.comm.Get_rank() if derham.comm is not None else 0
 
-        if xp.any(
-            [degree == 1 and num_elements > 1 for degree, num_elements in zip(derham.degree, derham.num_elements)]
-        ):
+        if any(degree == 1 and num_elements > 1 for degree, num_elements in zip(derham.degree, derham.num_elements)):
             if MPI.COMM_WORLD.Get_rank() == 0:
                 logger.info(
                     f'\nWARNING: Class "BasisProjectionOperators" called with degree={derham.degree} (interpolation of piece-wise constants should be avoided).',
diff --git a/src/struphy/feec/preconditioner.py b/src/struphy/feec/preconditioner.py
index b6201f8e7..d7683216e 100644
--- a/src/struphy/feec/preconditioner.py
+++ b/src/struphy/feec/preconditioner.py
@@ -363,6 +363,11 @@ def solver(self):
         """KroneckerLinearSolver or BlockDiagonalSolver for exactly inverting the approximate mass matrix self.matrix."""
         return self._solver
 
+    @property
+    def domain(self):
+        """The domain of the linear operator - an element of Vectorspace"""
+        return self._space
+
     @property
     def codomain(self):
         """The codomain of the linear operator - an element of Vectorspace"""
@@ -742,6 +747,9 @@ def matrix(self):
     def solver(self):
         """KroneckerLinearSolver or BlockDiagonalSolver for exactly inverting the approximate mass matrix self.matrix."""
         return self._solver
+
+    @property
+    def domain(self):
         """The domain of the linear operator - an element of Vectorspace"""
         return self._space
 
diff --git a/src/struphy/feec/psydac_derham.py b/src/struphy/feec/psydac_derham.py
index 8a8b0ad01..e757443b6 100644
--- a/src/struphy/feec/psydac_derham.py
+++ b/src/struphy/feec/psydac_derham.py
@@ -4,6 +4,7 @@
 
 import cunumpy as xp
 import feectools.core.bsplines as bsp
+import numpy as np
 from feectools.ddm.cart import DomainDecomposition
 from feectools.ddm.mpi import MockComm
 from feectools.ddm.mpi import mpi as MPI
@@ -3456,12 +3457,17 @@ def get_pts_and_wts(space_1d, start, end, n_quad=None, polar_shift=False):
             union_breaks = space_1d.breaks[:-1]
 
         # Make union of Greville and break points
-        tmp = set(xp.round(space_1d.histopolation_grid, decimals=14)).union(
-            xp.round(union_breaks, decimals=14),
+        # tmp = set(xp.round(space_1d.histopolation_grid, decimals=14)).union(
+        #     xp.round(union_breaks, decimals=14),
+        # )
+        # tmp = list(tmp)
+        # tmp.sort()
+        # tmp_a = xp.array(tmp)
+
+        tmp = set(xp.round(space_1d.histopolation_grid, decimals=14).tolist()).union(
+            xp.round(union_breaks, decimals=14).tolist()
         )
-
-        tmp = list(tmp)
-        tmp.sort()
+        tmp = sorted(tmp)
         tmp_a = xp.array(tmp)
 
         x_grid = tmp_a[
@@ -3489,7 +3495,13 @@ def get_pts_and_wts(space_1d, start, end, n_quad=None, polar_shift=False):
             # products of basis functions are integrated exactly
             n_quad = space_1d.degree + 1
 
-        pts_loc, wts_loc = xp.polynomial.legendre.leggauss(n_quad)
+        pts_loc, wts_loc = np.polynomial.legendre.leggauss(n_quad)
+
+        if "cupy" in xp.__name__:
+            import cupy as cp
+
+            pts_loc = cp.array(pts_loc)
+            wts_loc = cp.array(wts_loc)
 
         x, wts = bsp.quadrature_grid(x_grid, pts_loc, wts_loc)
 
diff --git a/src/struphy/geometry/base.py b/src/struphy/geometry/base.py
index f08a41bc8..e13696bc0 100644
--- a/src/struphy/geometry/base.py
+++ b/src/struphy/geometry/base.py
@@ -15,6 +15,7 @@
 from struphy.geometry import evaluation_kernels, transform_kernels
 from struphy.kernel_arguments.pusher_args_kernels import DomainArguments
 from struphy.linear_algebra import linalg_kron
+from struphy.utils.pyccel import Pyccelkernel
 from struphy.utils.utils import __class_with_params_repr_no_defaults__, all_class_params_are_default, all_subclasses
 
 logger = logging.getLogger("struphy")
@@ -878,8 +879,8 @@ def _evaluate_metric_coefficient(self, *etas, which=0, **kwargs):
 
             # to keep C-ordering the (3, 3)-part is in the last indices
             out = xp.empty((markers.shape[0], 3, 3), dtype=float)
-
-            n_inside = evaluation_kernels.kernel_evaluate_pic(
+            kernel = Pyccelkernel(evaluation_kernels.kernel_evaluate_pic)
+            n_inside = kernel(
                 markers,
                 which,
                 self.args_domain,
@@ -922,7 +923,8 @@ def _evaluate_metric_coefficient(self, *etas, which=0, **kwargs):
                 (E1.shape[0], E2.shape[1], E3.shape[2], 3, 3),
                 dtype=float,
             )
-            evaluation_kernels.kernel_evaluate(
+            kernel = Pyccelkernel(evaluation_kernels.kernel_evaluate)
+            kernel(
                 E1,
                 E2,
                 E3,
diff --git a/src/struphy/geometry/transform_kernels.py b/src/struphy/geometry/transform_kernels.py
index f9e6d8077..667c2528a 100644
--- a/src/struphy/geometry/transform_kernels.py
+++ b/src/struphy/geometry/transform_kernels.py
@@ -43,7 +43,14 @@
 - 2-form --> vector : (a_1, a_2, a_3) =          (a^2_1, a^2_2, a^2_3) / |det(DF)|
 """
 
-from numpy import empty, shape, sqrt, zeros
+import cunumpy as np
+from cunumpy.xp import array_backend
+
+if array_backend.backend == "cupy":
+    from cupy import empty, shape, sqrt, zeros
+else:
+    from numpy import empty, shape, sqrt, zeros
+
 from pyccel.decorators import stack_array
 
 import struphy.geometry.evaluation_kernels as evaluation_kernels
diff --git a/src/struphy/io/output_handling.py b/src/struphy/io/output_handling.py
index c923b6dc8..d906ad100 100644
--- a/src/struphy/io/output_handling.py
+++ b/src/struphy/io/output_handling.py
@@ -2,8 +2,8 @@
 import logging
 import os
 
-import cunumpy as xp
 import h5py
+import numpy as np
 
 logger = logging.getLogger("struphy")
 
@@ -76,6 +76,18 @@ def dset_dict(self):
         """Dictionary with dataset keys and object IDs."""
         return self._dset_dict
 
+    @staticmethod
+    def _as_numpy_array(val):
+        """Return a NumPy view/copy suitable for h5py writes."""
+        if isinstance(val, np.ndarray):
+            return val
+
+        get = getattr(val, "get", None)
+        if callable(get) and "cupy" in val.__class__.__module__:
+            return get()
+
+        return np.asarray(val)
+
     def add_data(self, data_dict):
         """
         Add data object to be saved during simulation.
@@ -83,11 +95,12 @@ def add_data(self, data_dict):
         Parameters
         ----------
         data_dict : dict
-            Name-object pairs to save during time stepping, e.g. {key : val}. key must be a string and val must be a xp.array of fixed shape. Scalar values (floats) must therefore be passed as 1d arrays of size 1.
+            Name-object pairs to save during time stepping, e.g. {key : val}. key must be a string and val must be an array of fixed shape. Scalar values (floats) must therefore be passed as 1d arrays of size 1.
         """
 
         for key, val in data_dict.items():
-            assert isinstance(val, xp.ndarray)
+            val_np = self._as_numpy_array(val)
+            assert isinstance(val_np, np.ndarray)
 
             # if dataset already exists, check for compatibility with given array
             if key in self._dset_dict:
@@ -96,30 +109,30 @@ def add_data(self, data_dict):
 
                 # scalar values are saved as 1d arrays of size 1
                 if len(dataset_shape) == 1:
-                    assert val.ndim == 1, "for scalar quantities, a 1d array with a single entry must used!"
-                    assert val.size == 1, "for scalar quantities, a 1d array with a single entry must used!"
+                    assert val_np.ndim == 1, "for scalar quantities, a 1d array with a single entry must used!"
+                    assert val_np.size == 1, "for scalar quantities, a 1d array with a single entry must used!"
 
                 # other values
                 else:
-                    assert dataset_shape[1:] == val.shape
+                    assert dataset_shape[1:] == val_np.shape
 
             # create new dataset otherwise and save array
             else:
                 with h5py.File(self.file_path, "a") as file:
                     # scalar values are saved as 1d arrays of size 1
-                    if val.size == 1:
-                        assert val.ndim == 1
-                        file.create_dataset(key, (1,), maxshape=(None,), dtype=val.dtype, chunks=True)
-                        file[key][0] = val[0]
+                    if val_np.size == 1:
+                        assert val_np.ndim == 1
+                        file.create_dataset(key, (1,), maxshape=(None,), dtype=val_np.dtype, chunks=True)
+                        file[key][0] = val_np[0]
                     else:
                         file.create_dataset(
                             key,
-                            (1,) + val.shape,
-                            maxshape=(None,) + val.shape,
-                            dtype=val.dtype,
+                            (1,) + val_np.shape,
+                            maxshape=(None,) + val_np.shape,
+                            dtype=val_np.dtype,
                             chunks=True,
                         )
-                        file[key][0] = val
+                        file[key][0] = val_np
 
             # set object ID
             self._dset_dict[key] = id(val)
@@ -138,13 +151,13 @@ def save_data(self, keys=None):
             if keys is None:
                 for key in self._dset_dict:
                     file[key].resize(file[key].shape[0] + 1, axis=0)
-                    file[key][-1] = ctypes.cast(self._dset_dict[key], ctypes.py_object).value
+                    file[key][-1] = self._as_numpy_array(ctypes.cast(self._dset_dict[key], ctypes.py_object).value)
 
             # only loop over given keys
             else:
                 for key in keys:
                     file[key].resize(file[key].shape[0] + 1, axis=0)
-                    file[key][-1] = ctypes.cast(self._dset_dict[key], ctypes.py_object).value
+                    file[key][-1] = self._as_numpy_array(ctypes.cast(self._dset_dict[key], ctypes.py_object).value)
 
     def info(self):
         """Print info of data sets to screen."""
diff --git a/src/struphy/kernel_arguments/pusher_args_kernels.py b/src/struphy/kernel_arguments/pusher_args_kernels.py
index 5be6879b4..f5d951c6e 100644
--- a/src/struphy/kernel_arguments/pusher_args_kernels.py
+++ b/src/struphy/kernel_arguments/pusher_args_kernels.py
@@ -1,5 +1,5 @@
 # from numpy import copy
-from numpy import empty
+import cunumpy as xp
 
 
 class MarkerArguments:
@@ -99,12 +99,12 @@ def __init__(
         self.tn3 = tn3
         self.starts = starts
 
-        self.bn1 = empty(pn[0] + 1, dtype=float)
-        self.bn2 = empty(pn[1] + 1, dtype=float)
-        self.bn3 = empty(pn[2] + 1, dtype=float)
-        self.bd1 = empty(pn[0], dtype=float)
-        self.bd2 = empty(pn[1], dtype=float)
-        self.bd3 = empty(pn[2], dtype=float)
+        self.bn1 = xp.empty(int(pn[0] + 1), dtype=float)
+        self.bn2 = xp.empty(int(pn[1] + 1), dtype=float)
+        self.bn3 = xp.empty(int(pn[2] + 1), dtype=float)
+        self.bd1 = xp.empty(int(pn[0]), dtype=float)
+        self.bd2 = xp.empty(int(pn[1]), dtype=float)
+        self.bd3 = xp.empty(int(pn[2]), dtype=float)
 
 
 class DomainArguments:
diff --git a/src/struphy/models/base.py b/src/struphy/models/base.py
index 5ea5583cf..db1bd13b1 100644
--- a/src/struphy/models/base.py
+++ b/src/struphy/models/base.py
@@ -456,7 +456,7 @@ def update_distr_functions(self):
                     h2 = 1 / obj.boxes_per_dim[1]
                     h3 = 1 / obj.boxes_per_dim[2]
 
-                    ndim = xp.count_nonzero([d > 1 for d in obj.boxes_per_dim])
+                    ndim = xp.count_nonzero(xp.array([d > 1 for d in obj.boxes_per_dim]))
                     if ndim == 0:
                         kernel_type = "gaussian_3d"
                     else:
diff --git a/src/struphy/models/variables.py b/src/struphy/models/variables.py
index 6b722b081..8586b776c 100644
--- a/src/struphy/models/variables.py
+++ b/src/struphy/models/variables.py
@@ -585,7 +585,7 @@ def allocate(
         self.particles.draw_markers(sort=sort, verbose=verbose)
 
         # set zero velocity according to loading_params
-        zero_index = xp.nonzero(self.particles.loading_params.set_zero_velocity)[0].flatten()
+        zero_index = tuple(i for i, is_zero in enumerate(self.particles.loading_params.set_zero_velocity) if is_zero)
         self.particles.set_velocities_comp(velocity=0.0, comp=zero_index)
 
         self.particles.initialize_weights()
diff --git a/src/struphy/ode/utils.py b/src/struphy/ode/utils.py
index e157d7070..82b7ec022 100644
--- a/src/struphy/ode/utils.py
+++ b/src/struphy/ode/utils.py
@@ -109,7 +109,8 @@ def __post_init__(self):
         self._a = xp.tri(self.n_stages, k=-1)
         for l, st in enumerate(a):
             assert len(st) == l + 1
-            self._a[l + 1, : l + 1] = st
+
+            self._a[l + 1, : l + 1] = xp.array(st)
 
         self._conv_rate = conv_rate
 
diff --git a/src/struphy/pic/base.py b/src/struphy/pic/base.py
index 573ce8cca..a2640fe8f 100644
--- a/src/struphy/pic/base.py
+++ b/src/struphy/pic/base.py
@@ -245,7 +245,7 @@ def __init__(
             assert all([nboxes % nproc == 0 for nboxes, nproc in zip(self.boxes_per_dim, self.nprocs)]), (
                 f"Number of boxes {self.boxes_per_dim =} must be divisible by number of processes {self.nprocs =} in each direction."
             )
-            n_boxes = xp.prod(self.boxes_per_dim, dtype=int) * self.num_clones
+            n_boxes = xp.prod(xp.array(self.boxes_per_dim), dtype=int) * self.num_clones
 
         # if verbose:
         #     logger.info(f"\n{self.mpi_rank = }, {n_boxes = }")
@@ -1141,7 +1141,7 @@ def _allocate_marker_array(self):
         bufsize = self.bufsize + 1.0 / xp.sqrt(n_mks_load_loc)
 
         # allocate markers array (3 x positions, vdim x velocities, weight, s0, w0, ..., ID) with buffer
-        self._n_rows = round(n_mks_load_loc * (1 + bufsize))
+        self._n_rows = round(float(n_mks_load_loc * (1 + bufsize)))
         self._markers = xp.zeros((self.n_rows, self.n_cols), dtype=float)
 
         # allocate auxiliary arrays
@@ -1972,7 +1972,7 @@ def binning(
             The reconstructed delta-f distribution function.
         """
 
-        assert xp.count_nonzero(components) == len(bin_edges)
+        assert xp.count_nonzero(xp.array(components)) == len(bin_edges)
 
         # volume of a bin
         bin_vol = 1.0
@@ -2495,7 +2495,7 @@ def _set_boxes(self):
             n_particles = self._markers_shape[0]
             n_mkr = int(n_particles / n_box_in) + 1
             n_cols = round(
-                n_mkr * (1 + 1 / xp.sqrt(n_mkr) + self._box_bufsize),
+                float(n_mkr) * (1 + 1 / float(xp.sqrt(n_mkr)) + self._box_bufsize),
             )
 
             # cartesian boxes
@@ -2714,7 +2714,17 @@ def check_and_assign_particles_to_boxes(self):
         """Check whether the box array has enough columns (detect load imbalance wrt to sorting boxes),
         and then assigne the particles to boxes."""
 
-        bcount = xp.bincount(xp.int64(self.markers_wo_holes[:, -2]))
+        from cunumpy.xp import array_backend
+
+        if array_backend.backend == "numpy":
+            bcount = xp.bincount(xp.int64(self.markers_wo_holes[:, -2]))
+        else:
+            import cupy as cp
+
+            indices = self.markers_wo_holes[:, -2]
+            indices = indices.astype(cp.int64)
+            bcount = cp.bincount(indices)
+
         max_in_box = xp.max(bcount)
         if max_in_box > self._sorting_boxes.boxes.shape[1]:
             warnings.warn(
@@ -4444,6 +4454,7 @@ def _gather_scalar_in_subcomm_array(self, scalar: int, out: xp.ndarray = None):
         _tmp[self.mpi_rank] = scalar
 
         if self.mpi_comm is not None:
+            print(f"{self.mpi_comm = }")
             self.mpi_comm.Allgather(
                 _tmp[self.mpi_rank],
                 _tmp,
diff --git a/src/struphy/pic/tests/test_pushers.py b/src/struphy/pic/tests/test_pushers.py
index 3772b4f9d..ffd6c1ed1 100644
--- a/src/struphy/pic/tests/test_pushers.py
+++ b/src/struphy/pic/tests/test_pushers.py
@@ -686,7 +686,7 @@ def test_push_eta_rk4(num_elements, degree, bcs, mapping, show_plots=False):
     butcher = ButcherTableau("rk4")
     # temp fix due to refactoring of ButcherTableau:
     butcher._a = xp.diag(butcher.a, k=-1)
-    butcher._a = xp.array(list(butcher._a) + [0.0])
+    butcher._a = xp.concatenate((butcher._a, xp.zeros(1, dtype=butcher._a.dtype)))
 
     pusher_psy = Pusher_psy(
         particles,
diff --git a/src/struphy/pic/tests/test_sorting.py b/src/struphy/pic/tests/test_sorting.py
index 4a689852b..2853dbcd5 100644
--- a/src/struphy/pic/tests/test_sorting.py
+++ b/src/struphy/pic/tests/test_sorting.py
@@ -18,7 +18,7 @@
 @pytest.mark.parametrize("ny", [16, 80])
 @pytest.mark.parametrize("nz", [32, 90])
 @pytest.mark.parametrize("algo", ["fortran_ordering", "c_ordering"])
-def test_flattening_1(nx, ny, nz, algo):
+def test_flattening(nx, ny, nz, algo):
     from struphy.pic.sorting_kernels import flatten_index, unflatten_index
 
     n1s = xp.array(xp.random.rand(10) * (nx + 1), dtype=int)
@@ -38,7 +38,7 @@ def test_flattening_1(nx, ny, nz, algo):
 @pytest.mark.parametrize("ny", [16, 80])
 @pytest.mark.parametrize("nz", [32, 90])
 @pytest.mark.parametrize("algo", ["fortran_ordering", "c_ordering"])
-def test_flattening_2(nx, ny, nz, algo):
+def test_flattening(nx, ny, nz, algo):
     from struphy.pic.sorting_kernels import flatten_index, unflatten_index
 
     n1s = xp.array(xp.random.rand(10) * (nx + 1), dtype=int)
@@ -58,7 +58,7 @@ def test_flattening_2(nx, ny, nz, algo):
 @pytest.mark.parametrize("ny", [16, 80])
 @pytest.mark.parametrize("nz", [32, 90])
 @pytest.mark.parametrize("algo", ["fortran_ordering", "c_ordering"])
-def test_flattening_3(nx, ny, nz, algo):
+def test_flattening(nx, ny, nz, algo):
     from struphy.pic.sorting_kernels import flatten_index, unflatten_index
 
     n1s = xp.array(xp.random.rand(10) * (nx + 1), dtype=int)
@@ -148,7 +148,7 @@ def test_sorting(num_elements, degree, bcs, mapping, Np, verbose=False):
 
 
 if __name__ == "__main__":
-    test_flattening_1(8, 8, 8, "c_orderwding")
+    test_flattening(8, 8, 8, "c_orderwding")
     # test_sorting(
     #     [8, 9, 10],
     #     [2, 3, 4],
diff --git a/src/struphy/post_processing/likwid/plot_time_traces.py b/src/struphy/post_processing/likwid/plot_time_traces.py
index 4899f61a8..d87e68f21 100644
--- a/src/struphy/post_processing/likwid/plot_time_traces.py
+++ b/src/struphy/post_processing/likwid/plot_time_traces.py
@@ -5,7 +5,6 @@
 
 import cunumpy as xp
 import matplotlib.pyplot as plt
-import plotly.graph_objects as go
 import plotly.io as pio
 from scope_profiler.h5reader import ProfilingH5Reader
 
@@ -22,14 +21,18 @@ def glob_to_regex(pat: str) -> str:
 
 
 def plot_region(region_name, groups_include=["*"], groups_skip=[]):
-    from fnmatch import fnmatch
-
-    for pattern in groups_skip:
-        if fnmatch(region_name, pattern):
+    # skips first
+    for pat in groups_skip:
+        rx = glob_to_regex(pat)
+        if re.fullmatch(rx, region_name):
             return False
-    for pattern in groups_include:
-        if fnmatch(region_name, pattern):
+
+    # includes next
+    for pat in groups_include:
+        rx = glob_to_regex(pat)
+        if re.fullmatch(rx, region_name):
             return True
+
     return False
 
 
@@ -168,6 +171,21 @@ def plot_avg_duration_bar_chart(
     logger.info(f"Saved average duration bar chart to: {figure_path}")
 
 
+import plotly.graph_objects as go
+
+
+def plot_region(region_name, groups_include=["*"], groups_skip=[]):
+    from fnmatch import fnmatch
+
+    for pattern in groups_skip:
+        if fnmatch(region_name, pattern):
+            return False
+    for pattern in groups_include:
+        if fnmatch(region_name, pattern):
+            return True
+    return False
+
+
 def plot_gantt_chart_plotly(
     reader: ProfilingH5Reader,
     output_path: str,
diff --git a/src/struphy/propagators/current_coupling_5d_gradb.py b/src/struphy/propagators/current_coupling_5d_gradb.py
index 65ced5649..ccdb78d52 100644
--- a/src/struphy/propagators/current_coupling_5d_gradb.py
+++ b/src/struphy/propagators/current_coupling_5d_gradb.py
@@ -290,8 +290,7 @@ def allocate(self, verbose: bool = False):
             butcher = self.options.butcher
             import numpy as np
 
-            butcher._a = xp.diag(butcher.a, k=-1)
-            butcher._a = xp.array(list(butcher.a) + [0.0])
+            butcher._a = xp.concatenate((xp.diag(butcher.a, k=-1), xp.zeros(1, dtype=butcher.a.dtype)))
 
             self._args_pusher_kernel = (
                 self.domain.args_domain,
diff --git a/src/struphy/propagators/push_deterministic_diffusion.py b/src/struphy/propagators/push_deterministic_diffusion.py
index 4f22d26c0..7fb89f9d0 100644
--- a/src/struphy/propagators/push_deterministic_diffusion.py
+++ b/src/struphy/propagators/push_deterministic_diffusion.py
@@ -110,8 +110,7 @@ def allocate(self, verbose: bool = False):
         # temp fix due to refactoring of ButcherTableau:
         import cunumpy as xp
 
-        self._butcher._a = xp.diag(self._butcher.a, k=-1)
-        self._butcher._a = xp.array(list(self._butcher.a) + [0.0])
+        self._butcher._a = xp.concatenate((xp.diag(self._butcher.a, k=-1), xp.zeros(1, dtype=self._butcher.a.dtype)))
 
         particles = self.variables.var.particles
 
diff --git a/src/struphy/propagators/push_eta.py b/src/struphy/propagators/push_eta.py
index ba8eaaaf8..8ae676f8f 100644
--- a/src/struphy/propagators/push_eta.py
+++ b/src/struphy/propagators/push_eta.py
@@ -94,8 +94,7 @@ def allocate(self, verbose: bool = False):
         try:
             import cunumpy as xp
 
-            butcher._a = xp.diag(butcher.a, k=-1)
-            butcher._a = xp.array(list(butcher.a) + [0.0])
+            butcher._a = xp.concatenate((xp.diag(butcher.a, k=-1), xp.zeros(1, dtype=butcher.a.dtype)))
         except ValueError:
             pass
 
diff --git a/src/struphy/propagators/push_eta_pc.py b/src/struphy/propagators/push_eta_pc.py
index b7851a9ee..0d9065f36 100644
--- a/src/struphy/propagators/push_eta_pc.py
+++ b/src/struphy/propagators/push_eta_pc.py
@@ -133,8 +133,7 @@ def allocate(self, verbose: bool = False):
         # temp fix due to refactoring of ButcherTableau:
         import cunumpy as xp
 
-        butcher._a = xp.diag(butcher.a, k=-1)
-        butcher._a = xp.array(list(butcher.a) + [0.0])
+        butcher._a = xp.concatenate((xp.diag(butcher.a, k=-1), xp.zeros(1, dtype=butcher.a.dtype)))
 
         args_kernel = (
             self.derham.args_derham,
diff --git a/src/struphy/propagators/push_guiding_center_bx_estar.py b/src/struphy/propagators/push_guiding_center_bx_estar.py
index 350c6a2b2..cde07078f 100644
--- a/src/struphy/propagators/push_guiding_center_bx_estar.py
+++ b/src/struphy/propagators/push_guiding_center_bx_estar.py
@@ -423,8 +423,7 @@ def allocate(self, verbose: bool = False):
             # temp fix due to refactoring of ButcherTableau:
             import cunumpy as xp
 
-            butcher._a = xp.diag(butcher.a, k=-1)
-            butcher._a = xp.array(list(butcher.a) + [0.0])
+            butcher._a = xp.concatenate((xp.diag(butcher.a, k=-1), xp.zeros(1, dtype=butcher.a.dtype)))
 
             kernel = Pyccelkernel(pusher_kernels_gc.push_gc_bxEstar_explicit_multistage)
 
diff --git a/src/struphy/propagators/push_guiding_center_parallel.py b/src/struphy/propagators/push_guiding_center_parallel.py
index 70144b0a6..5ea3495a9 100644
--- a/src/struphy/propagators/push_guiding_center_parallel.py
+++ b/src/struphy/propagators/push_guiding_center_parallel.py
@@ -442,8 +442,7 @@ def allocate(self, verbose: bool = False):
             # temp fix due to refactoring of ButcherTableau:
             import cunumpy as xp
 
-            butcher._a = xp.diag(butcher.a, k=-1)
-            butcher._a = xp.array(list(butcher.a) + [0.0])
+            butcher._a = xp.concatenate((xp.diag(butcher.a, k=-1), xp.zeros(1, dtype=butcher.a.dtype)))
 
             kernel = Pyccelkernel(pusher_kernels_gc.push_gc_Bstar_explicit_multistage)
 
diff --git a/src/struphy/propagators/push_random_diffusion.py b/src/struphy/propagators/push_random_diffusion.py
index 1f6ddac80..481c8b5ca 100644
--- a/src/struphy/propagators/push_random_diffusion.py
+++ b/src/struphy/propagators/push_random_diffusion.py
@@ -110,8 +110,7 @@ def allocate(self, verbose: bool = False):
         # temp fix due to refactoring of ButcherTableau:
         import cunumpy as xp
 
-        self._butcher._a = xp.diag(self._butcher.a, k=-1)
-        self._butcher._a = xp.array(list(self._butcher.a) + [0.0])
+        self._butcher._a = xp.concatenate((xp.diag(self._butcher.a, k=-1), xp.zeros(1, dtype=self._butcher.a.dtype)))
 
         # instantiate Pusher
         args_kernel = (
diff --git a/src/struphy/simulation/sim.py b/src/struphy/simulation/sim.py
index 5e2ff11ec..727c7ff1f 100644
--- a/src/struphy/simulation/sim.py
+++ b/src/struphy/simulation/sim.py
@@ -327,18 +327,22 @@ def save_geometry_and_equil_vtk(self, verbose: bool = False):
             ]
 
             tmp = self.domain(*grids_log)
-            grids_phy = [tmp[0], tmp[1], tmp[2]]
+            grids_phy = [
+                DataContainer._as_numpy_array(tmp[0]),
+                DataContainer._as_numpy_array(tmp[1]),
+                DataContainer._as_numpy_array(tmp[2]),
+            ]
 
             pointData = {}
             det_df = self.domain.jacobian_det(*grids_log)
-            pointData["det_df"] = det_df
+            pointData["det_df"] = DataContainer._as_numpy_array(det_df)
 
             if self.equil is not None:
                 p0 = self.equil.p0(*grids_log)
-                pointData["p0"] = p0
+                pointData["p0"] = DataContainer._as_numpy_array(p0)
                 if isinstance(self.equil, FluidEquilibriumWithB):
                     absB0 = self.equil.absB0(*grids_log)
-                    pointData["absB0"] = absB0
+                    pointData["absB0"] = DataContainer._as_numpy_array(absB0)
 
             gridToVTK(os.path.join(self.env.path_out, "geometry"), *grids_phy, pointData=pointData)
 
@@ -366,21 +370,25 @@ def create_geometry_mesh(
         ]
 
         tmp = self.domain(*grids_log)
-        grids_phy = [tmp[0], tmp[1], tmp[2]]
+        grids_phy = [
+            DataContainer._as_numpy_array(tmp[0]),
+            DataContainer._as_numpy_array(tmp[1]),
+            DataContainer._as_numpy_array(tmp[2]),
+        ]
 
         # Create PyVista structured grid
         mesh = pv.StructuredGrid(grids_phy[0], grids_phy[1], grids_phy[2])
 
         # Add point data
         det_df = self.domain.jacobian_det(*grids_log)
-        mesh["det_df"] = det_df.ravel(order="F")
+        mesh["det_df"] = DataContainer._as_numpy_array(det_df).ravel(order="F")
 
         if self.equil is not None:
             p0 = self.equil.p0(*grids_log)
-            mesh["p0"] = p0.ravel(order="F")
+            mesh["p0"] = DataContainer._as_numpy_array(p0).ravel(order="F")
             if isinstance(self.equil, FluidEquilibriumWithB):
                 absB0 = self.equil.absB0(*grids_log)
-                mesh["absB0"] = absB0.ravel(order="F")
+                mesh["absB0"] = DataContainer._as_numpy_array(absB0).ravel(order="F")
 
         return mesh
 
@@ -544,12 +552,12 @@ def run(self, one_time_step: bool = False, verbose: bool = False):
                 self.time_state["value_sec"][0] = file["restart/time/value_sec"][-1]
                 self.time_state["index"][0] = file["restart/time/index"][-1]
 
-            total_steps = str(int(round((Tend - self.time_state["value"][0]) / dt)))
+            total_steps = str(int(round((Tend - float(self.time_state["value"][0])) / dt)))
             logger.info(f"""\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 RESTARTing from:
-{self.time_state["value"][0]=}
-{self.time_state["value_sec"][0]=}
-{self.time_state["index"][0]=}
+self.time_state["value"][0]={float(self.time_state["value"][0])}
+self.time_state["value_sec"][0]={float(self.time_state["value_sec"][0])}
+self.time_state["index"][0]={int(self.time_state["index"][0])}
 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 """)
         else:
@@ -579,19 +587,19 @@ def run(self, one_time_step: bool = False, verbose: bool = False):
             self.Barrier()
 
             # stop time loop?
-            break_cond_1 = self.time_state["value"][0] >= Tend
+            break_cond_1 = float(self.time_state["value"][0]) >= Tend
             break_cond_2 = run_time_now > self.env.max_runtime
 
             if break_cond_1 or break_cond_2:
                 # save restart data (other data already saved below)
                 self.data.save_data(keys=save_keys_end)
                 end_time = time.time()
-                logger.info(f"\nTime steps done: {self.time_state['index'][0]}")
+                logger.info(f"\nTime steps done: {int(self.time_state['index'][0])}")
                 logger.info(f"wall-clock time of simulation [sec]: {end_time - self.start_time}")
                 logger.info("")
                 break
 
-            if self.env.sort_step and self.time_state["index"][0] % self.env.sort_step == 0:
+            if self.env.sort_step and int(self.time_state["index"][0]) % self.env.sort_step == 0:
                 t0 = time.time()
                 for key, val in self.model.pointer.items():
                     if isinstance(val, Particles):
@@ -605,8 +613,10 @@ def run(self, one_time_step: bool = False, verbose: bool = False):
                 logger.info("")
 
             # update time and index (round time to 10 decimals for a clean time grid!)
-            self.time_state["value"][0] = round(self.time_state["value"][0] + dt, 14)
-            self.time_state["value_sec"][0] = round(self.time_state["value_sec"][0] + dt * self.model.units.t, 14)
+            self.time_state["value"][0] = round(float(self.time_state["value"][0]) + dt, 14)
+            self.time_state["value_sec"][0] = round(
+                float(self.time_state["value_sec"][0]) + dt * self.model.units.t, 14
+            )
             self.time_state["index"][0] += 1
 
             # perform one time step dt
@@ -618,7 +628,7 @@ def run(self, one_time_step: bool = False, verbose: bool = False):
             run_time_now = (time.time() - self.start_time) / 60
 
             # update diagnostics data and save data
-            if self.time_state["index"][0] % self.env.save_step == 0:
+            if int(self.time_state["index"][0]) % self.env.save_step == 0:
                 # compute scalars and kinetic data
                 self.model.update_scalar_quantities()
                 self.model.update_markers_to_be_saved()
@@ -638,19 +648,19 @@ def run(self, one_time_step: bool = False, verbose: bool = False):
                 self.data.save_data(keys=save_keys_all)
 
                 # print current time and scalar quantities to screen
-                step = str(self.time_state["index"][0]).zfill(len(total_steps))
+                step = str(int(self.time_state["index"][0])).zfill(len(total_steps))
 
                 message = "time step:".ljust(25) + f"{step}/{total_steps}".rjust(25)
                 message += (
                     "\n"
                     + "normalized time:".ljust(25)
-                    + "{0:4.2e} / {1:4.2e}".format(self.time_state["value"][0], Tend).rjust(25)
+                    + "{0:4.2e} / {1:4.2e}".format(float(self.time_state["value"][0]), Tend).rjust(25)
                 )
                 message += (
                     "\n"
                     + "physical time [s]:".ljust(25)
                     + "{0:4.2e} / {1:4.2e}".format(
-                        self.time_state["value_sec"][0],
+                        float(self.time_state["value_sec"][0]),
                         Tend * self.model.units.t,
                     ).rjust(25)
                 )
@@ -1191,9 +1201,9 @@ def _initialize_hdf5_datasets(self, data: DataContainer, size, verbose: bool = F
             # store grid_info only for runs with 512 ranks or smaller
             if self.model.scalars.dct and self.derham is not None:
                 if size <= 512:
-                    file["scalar"].attrs["grid_info"] = self.derham.domain_array
+                    file["scalar"].attrs["grid_info"] = DataContainer._as_numpy_array(self.derham.domain_array)
                 else:
-                    file["scalar"].attrs["grid_info"] = self.derham.domain_array[0]
+                    file["scalar"].attrs["grid_info"] = DataContainer._as_numpy_array(self.derham.domain_array[0])
             else:
                 pass
 
@@ -1230,9 +1240,9 @@ def _initialize_hdf5_datasets(self, data: DataContainer, size, verbose: bool = F
 
                         # save field meta data
                         file[key_field].attrs["space_id"] = spline.space_id
-                        file[key_field].attrs["starts"] = spline.starts
-                        file[key_field].attrs["ends"] = spline.ends
-                        file[key_field].attrs["pads"] = spline.pads
+                        file[key_field].attrs["starts"] = DataContainer._as_numpy_array(spline.starts)
+                        file[key_field].attrs["ends"] = DataContainer._as_numpy_array(spline.ends)
+                        file[key_field].attrs["pads"] = DataContainer._as_numpy_array(spline.pads)
 
                     # save numpy array to be updated only at the end of the simulation for restart.
                     key_field_restart = os.path.join(species_path_restart, variable)
@@ -1280,7 +1290,9 @@ def _initialize_hdf5_datasets(self, data: DataContainer, size, verbose: bool = F
                     data.add_data({key_df: bin_plot.df})
 
                     for dim, be in enumerate(bin_plot.bin_edges):
-                        file[key_f].attrs["bin_centers" + "_" + str(dim + 1)] = be[:-1] + (be[1] - be[0]) / 2
+                        file[key_f].attrs["bin_centers" + "_" + str(dim + 1)] = DataContainer._as_numpy_array(
+                            be[:-1] + (be[1] - be[0]) / 2
+                        )
 
                 for i, kd_plot in enumerate(species.kernel_density_plots):
                     key_n = os.path.join(key_spec, "n_sph", f"view_{i}")
@@ -1290,9 +1302,9 @@ def _initialize_hdf5_datasets(self, data: DataContainer, size, verbose: bool = F
                     eta1 = kd_plot.plot_pts[0][:, 0, 0]
                     eta2 = kd_plot.plot_pts[1][0, :, 0]
                     eta3 = kd_plot.plot_pts[2][0, 0, :]
-                    file[key_n].attrs["eta1"] = eta1
-                    file[key_n].attrs["eta2"] = eta2
-                    file[key_n].attrs["eta3"] = eta3
+                    file[key_n].attrs["eta1"] = DataContainer._as_numpy_array(eta1)
+                    file[key_n].attrs["eta2"] = DataContainer._as_numpy_array(eta2)
+                    file[key_n].attrs["eta3"] = DataContainer._as_numpy_array(eta3)
 
                 # TODO: maybe add other data
                 # else:
diff --git a/src/struphy/utils/pyccel.py b/src/struphy/utils/pyccel.py
index 5e62426a3..6c7aabb8a 100644
--- a/src/struphy/utils/pyccel.py
+++ b/src/struphy/utils/pyccel.py
@@ -1,19 +1,71 @@
+import copy
 from typing import Any, Callable
 
+import cunumpy as xp
+import numpy as np
+
 
 class Pyccelkernel:
     def __init__(self, kernel: Callable[..., Any], use_cupy: bool = False) -> None:
         self._kernel = kernel
         self._use_cupy = use_cupy
+        if "cupy" in xp.__name__ or "cupy" in xp.ndarray.__module__:
+            self._use_cupy = True
+
+    @staticmethod
+    def _convert_to_numpy(value: Any, converted_arrays: list[tuple[Any, np.ndarray]]) -> Any:
+        if isinstance(value, xp.ndarray):
+            value_np = xp.asnumpy(value)
+            converted_arrays.append((value, value_np))
+            return value_np
+
+        if isinstance(value, tuple):
+            return tuple(Pyccelkernel._convert_to_numpy(item, converted_arrays) for item in value)
+
+        if isinstance(value, list):
+            return [Pyccelkernel._convert_to_numpy(item, converted_arrays) for item in value]
+
+        if hasattr(value, "__dict__") and value.__class__.__module__.startswith(("struphy.", "feectools.")):
+            value_np = copy.copy(value)
+            for name, attr in vars(value).items():
+                setattr(value_np, name, Pyccelkernel._convert_to_numpy(attr, converted_arrays))
+            return value_np
+
+        return value
 
     def __call__(self, *args: Any, **kwargs: Any) -> Any:
         if self.use_cupy:
-            raise NotImplementedError
+            # Convert all args from CuPy to NumPy
+            converted_args = []
+            args_np = [self._convert_to_numpy(x, converted_args) for x in args]
+
+            # Convert all kwargs from CuPy to NumPy
+            converted_kwargs = []
+            kwargs_np = {k: self._convert_to_numpy(v, converted_kwargs) for k, v in kwargs.items()}
+
+            # Call kernel
+            result = self._kernel(*args_np, **kwargs_np)
+
+            # Copy in-place kernel updates back to CuPy arrays.
+            for x, x_np in converted_args:
+                x[...] = xp.asarray(x_np)
+            for v, v_np in converted_kwargs:
+                v[...] = xp.asarray(v_np)
+
+            # Convert NumPy arrays back to CuPy
+            if result is None:
+                return None
+            if isinstance(result, tuple):
+                return tuple(xp.asarray(r) if isinstance(r, np.ndarray) else r for r in result)
+            if isinstance(result, np.ndarray):
+                return xp.asarray(result)
+            return result
+
         else:
             return self._kernel(*args, **kwargs)
 
     @property
-    def name(self):
+    def name(self) -> str:
         return self.kernel.__name__
 
     @property
@@ -21,5 +73,5 @@ def kernel(self) -> Callable[..., Any]:
         return self._kernel
 
     @property
-    def use_cupy(self):
+    def use_cupy(self) -> bool:
         return self._use_cupy