diff --git a/src/qonnx/util/exec_qonnx.py b/src/qonnx/util/exec_qonnx.py
index 5c059281..0d90a640 100644
--- a/src/qonnx/util/exec_qonnx.py
+++ b/src/qonnx/util/exec_qonnx.py
@@ -60,32 +60,48 @@ def exec_qonnx(
     expose_intermediates: str = None,
     output_prefix: str = "out_",
     output_mode: output_mode_options = OUTPUT_MODE_NAME,
-    argmax_verify_npy: str = None,
+    verify_npy: str = None,
+    verify_argmax=False,
+    auto_reshape=True,
     save_modified_model: str = None,
+    input_to_nchw=False,
+    input_to_nhwc=False,
+    input_cast2float=False,
     input_pix2float=False,
     input_zerocenter=False,
     maxiters: int = None,
-    output_nosave=False
+    output_nosave=False,
+    early_exit_acc_ratio=None,
+    override_exec_onnx=None
 ):
     """Execute a given QONNX model by initializing its inputs from .npy files, and write outputs
     as .npy files.
     The input model have been previously cleaned by the cleanup transformation or commandline tool.
 
     :param qonnx_model_file: Filename for the input ONNX model
-    :param in_npy: List of .npy files to supply as inputs. If not specified, inputs will be set to zero.
+    :param in_npy: List of .npy files (or single .npz file) to supply as inputs.
+        If .npz is used, it must contain a 'data' entry for inputs and 'target' for labels.
+        If not specified, inputs will be set to zero.
     :param override_batchsize: If specified, override the batch size for the ONNX graph
     :param override_opset: If specified, override the imported ONNX opset to this version.
     :param expose_intermediates: Comma-separated list of tensor name patterns.
         Matched patterns will expose intermediate outputs as top-level outputs.
     :param output_prefix: Prefix for the generated output files.
     :param output_mode: Naming mode for generated output files.
-    :param argmax_verify_npy: If specified, take argmax of output and compare to this file for top-1 accuracy measurement
+    :param verify_npy: If specified, compare output to this file for top-1 accuracy measurement
+    :param verify_argmax: If specified, take argmax of output before comparing to verify_npy
+    :param auto_reshape: If specified, automatically reshape the input data to the model's input shape.
     :param save_modified_model: If specified, save the modified model
         (after batchsize changes or exposed intermediate tensors) with this filename
+    :param input_to_nchw: If specified, convert input tensors to NCHW format
+    :param input_to_nhwc: If specified, convert input tensors to NHWC format
+    :param input_cast2float: If specified, apply simple cast to float32 for input
     :param input_pix2float: If specified, do uint8 [0,255] -> fp32 [0,1] mapping for input
     :param input_zerocenter: If specified together with pix2float, do uint8 [0,255] -> fp32 [-1,+1] mapping for input
     :param maxiters: If specified, limit maximum number of iterations (batches) to be processed
     :param output_nosave: If specified, do not save output tensors to files
+    :param early_exit_acc_ratio: If specified as a float number between 0 and 1, early exit if any batch accuracy falls under
+    :param override_exec_onnx: If specified, use this function to execute the model instead of qonnx
     """
     assert output_mode in output_modes, "Unrecognized output mode"
 
@@ -122,8 +138,26 @@ def exec_qonnx(
     inp_data = []
     labels = None
     if len(in_npy) > 0:
-        # load provided npy files and arrange in batches
-        inp_data = [np.load(x) for x in in_npy]
+        if len(in_npy) == 1 and in_npy[0].endswith(".npz"):
+            npz = np.load(in_npy[0])
+            inp_data = [npz["data"]]
+            if "target" in npz:
+                labels = npz["target"]
+                verify_npy = in_npy[0] + ":target"
+        else:
+            # load provided npy files and arrange in batches
+            inp_data = [np.load(x) for x in in_npy]
+            if verify_npy is not None:
+                labels = np.load(verify_npy)
+
+        if auto_reshape:
+            for i in range(len(inp_data)):
+                target_shape = model.get_tensor_shape(model.graph.input[i].name)
+                dset_size = inp_data[i].shape[0]
+                new_shape = (dset_size, *target_shape[1:])
+                if inp_data[i].shape != new_shape:
+                    inp_data[i] = inp_data[i].reshape(new_shape)
+
         inp_data_reshaped = []
         for inp in inp_data:
             dset_size = inp.shape[0]
@@ -132,8 +166,7 @@ def exec_qonnx(
             inp = inp.reshape(n_dset_iters, bsize, *inp.shape[1:])
             inp_data_reshaped.append(inp)
         inp_data = inp_data_reshaped
-        if argmax_verify_npy is not None:
-            labels = np.load(argmax_verify_npy)
+        if verify_npy is not None:
             assert labels.shape[0] == dset_size, "Label size must match dataset size"
             labels = labels.reshape(n_dset_iters, bsize, *labels.shape[1:])
     else:
@@ -150,7 +183,7 @@ def exec_qonnx(
     for iter in pbar:
         iter_suffix = "_batch%d" % iter
         idict = {}
-        if not argmax_verify_npy:
+        if not verify_npy:
             pbar.set_description("Batch [%d/%d]: running" % (iter + 1, n_dset_iters))
         # supply inputs and execute
         for inp_ind, inp in enumerate(model.graph.input):
@@ -158,16 +191,26 @@ def exec_qonnx(
                 idict[inp.name] = (inp_data[inp_ind][iter] / 255.0).astype(np.float32)
                 if input_zerocenter:
                     idict[inp.name] = (2 * idict[inp.name] - 1.0).astype(np.float32)
+            elif input_cast2float:
+                idict[inp.name] = inp_data[inp_ind][iter].astype(np.float32)
             else:
                 idict[inp.name] = inp_data[inp_ind][iter]
-        if n_custom_nodes > 0:
-            # run node-by-node in qonnx
-            odict = execute_onnx(model, idict)
+            if input_to_nhwc:
+                idict[inp.name] = np.transpose(idict[inp.name], (0, 2, 3, 1))
+            if input_to_nchw:
+                idict[inp.name] = np.transpose(idict[inp.name], (0, 3, 1, 2))
+        if override_exec_onnx is not None:
+            # run using specified custom execution function
+            odict = override_exec_onnx(model, idict)
         else:
-            # run using onnxruntime
-            sess = rt.InferenceSession(model.model.SerializeToString())
-            output_list = sess.run(None, idict)
-            odict = {outp.name: output_list[oind] for oind, outp in enumerate(model.graph.output)}
+            if n_custom_nodes > 0:
+                # run node-by-node in qonnx
+                odict = execute_onnx(model, idict)
+            else:
+                # run using onnxruntime
+                sess = rt.InferenceSession(model.model.SerializeToString())
+                output_list = sess.run(None, idict)
+                odict = {outp.name: output_list[oind] for oind, outp in enumerate(model.graph.output)}
         if not output_nosave:
             for out_ind, outp in enumerate(model.graph.output):
                 # save generated outputs
@@ -176,10 +219,11 @@ def exec_qonnx(
                 elif output_mode == OUTPUT_MODE_NAME:
                     oname = outp.name
                 np.save(output_prefix + oname + iter_suffix + ".npy", odict[outp.name])
-        if argmax_verify_npy:
+        if verify_npy:
             # measure accuracy for output
             ret = odict[model.graph.output[0].name]
-            ret = np.argmax(ret, axis=-1)
+            if verify_argmax:
+                ret = np.argmax(ret, axis=-1)
             ok_batch = np.count_nonzero(ret == labels[iter])
             nok_batch = bsize - ok_batch
             ok += ok_batch
@@ -190,6 +234,9 @@ def exec_qonnx(
                 "Batch [%d/%d]: ok %d nok %d accuracy %f (overall ok %d nok %d accuracy %f)"
                 % (iter + 1, n_dset_iters, ok_batch, nok_batch, accuracy_batch, ok, nok, accuracy_overall)
             )
+            if early_exit_acc_ratio is not None and accuracy_batch < early_exit_acc_ratio:
+                return (ok, nok)
+    return (ok, nok)
 
 
 def main():