diff --git a/method_comparison/MetaMathQA/run.py b/method_comparison/MetaMathQA/run.py
index 00cdc2e4d4..42e019f931 100644
--- a/method_comparison/MetaMathQA/run.py
+++ b/method_comparison/MetaMathQA/run.py
@@ -101,7 +101,7 @@ def evaluate(model, tokenizer, ds, batch_size, generate_kwargs, use_tqdm: bool =
     return predictions, responses
 
 
-@torch.inference_mode  # type: ignore
+@torch.inference_mode()
 def calculate_mean_per_token_loss(model, tokenizer, rows: list[str], batch_size: int, max_length: int) -> float:
     """Calculate the mean loss per token on the given dataset.
 
diff --git a/method_comparison/app.py b/method_comparison/app.py
index 9929d24bb4..1bf51c53a7 100644
--- a/method_comparison/app.py
+++ b/method_comparison/app.py
@@ -18,25 +18,46 @@
 import tempfile
 
 import gradio as gr
+import pandas as pd
 import plotly.express as px
 import plotly.graph_objects as go
 from processing import load_df
 from sanitizer import parse_and_filter
 
 
-metric_preferences = {
+_COMMON_METRIC_PREFERENCES = {
     "accelerator_memory_reserved_avg": "lower",
     "accelerator_memory_max": "lower",
     "accelerator_memory_reserved_99th": "lower",
     "total_time": "lower",
     "train_time": "lower",
     "file_size": "lower",
-    "test_accuracy": "higher",
     "train_loss": "lower",
     "num_trainable_params": "lower",
-    "forgetting*": "lower",
 }
 
+_TASK_METRIC_PREFERENCES = {
+    "MetaMathQA": {
+        "test_accuracy": "higher",
+        "forgetting*": "lower",
+    },
+    "image-gen": {
+        "test_dino_similarity": "higher",
+        "drift*": "lower",
+    },
+}
+
+_TASK_PARETO_DEFAULTS = {
+    "MetaMathQA": ("accelerator_memory_max", "test_accuracy"),
+    "image-gen": ("accelerator_memory_max", "test_dino_similarity"),
+}
+
+
+def get_metric_preferences(task_name):
+    prefs = dict(_COMMON_METRIC_PREFERENCES)
+    prefs.update(_TASK_METRIC_PREFERENCES.get(task_name, {}))
+    return prefs
+
 
 def get_model_ids(task_name, df):
     filtered = df[df["task_name"] == task_name]
@@ -49,7 +70,7 @@ def filter_data(task_name, model_id, df):
 
 
 # Compute the Pareto frontier for two selected metrics.
-def compute_pareto_frontier(df, metric_x, metric_y):
+def compute_pareto_frontier(df, metric_x, metric_y, metric_preferences):
     if df.empty:
         return df
 
@@ -87,12 +108,12 @@ def dominates(a, b, metric_x, metric_y):
     return pareto_df
 
 
-def generate_pareto_plot(df, metric_x, metric_y):
+def generate_pareto_plot(df, metric_x, metric_y, metric_preferences):
     if df.empty:
         return {}
 
     # Compute Pareto frontier and non-frontier points.
-    pareto_df = compute_pareto_frontier(df, metric_x, metric_y)
+    pareto_df = compute_pareto_frontier(df, metric_x, metric_y, metric_preferences)
     non_pareto_df = df.drop(pareto_df.index)
 
     # Create an empty figure.
@@ -188,6 +209,11 @@ def format_df(df):
 
 
 def build_app(df):
+    task_names = sorted(df["task_name"].unique())
+    initial_task = "MetaMathQA" if "MetaMathQA" in task_names else task_names[0]
+    initial_prefs = get_metric_preferences(initial_task)
+    initial_x, initial_y = _TASK_PARETO_DEFAULTS.get(initial_task, (list(initial_prefs)[0], list(initial_prefs)[1]))
+
     with gr.Blocks() as demo:
         gr.Markdown("# PEFT method comparison")
         gr.Markdown(
@@ -201,22 +227,21 @@ def build_app(df):
         with gr.Row():
             task_dropdown = gr.Dropdown(
                 label="Select Task",
-                choices=sorted(df["task_name"].unique()),
-                value=sorted(df["task_name"].unique())[0],
-            )
-            model_dropdown = gr.Dropdown(
-                label="Select Model ID", choices=get_model_ids(sorted(df["task_name"].unique())[0], df)
+                choices=task_names,
+                value=initial_task,
             )
+            model_dropdown = gr.Dropdown(label="Select Model ID", choices=get_model_ids(initial_task, df))
 
         # Make dataframe columns all equal in width so that they are good enough for numbers but don't
         # get hugely extended by columns like `train_config`.
-        column_widths = ["150px" for _ in df.columns]
-        column2index = dict(zip(df.columns, range(len(df.columns))))
-        column_widths[column2index['experiment_name']] = '300px'
+        initial_filtered = filter_data(initial_task, get_model_ids(initial_task, df)[0], df)
+        column_widths = ["150px" for _ in initial_filtered.columns]
+        column2index = dict(zip(initial_filtered.columns, range(len(initial_filtered.columns))))
+        column_widths[column2index["experiment_name"]] = "300px"
 
         data_table = gr.DataFrame(
             label="Results",
-            value=format_df(df),
+            value=format_df(initial_filtered),
             interactive=False,
             max_chars=100,
             wrap=False,
@@ -232,9 +257,8 @@ def build_app(df):
             apply_filter_button = gr.Button("Apply Filter")
             reset_filter_button = gr.Button("Reset Filter")
 
-        gr.Markdown(
-            "*forgetting: This is the reduction in CE loss on a sample of Wikipedia data and reflects how much the "
-            "model 'forgot' during training. The lower the number, the better."
+        metric_explanation = gr.Markdown(
+            _get_metric_explanation(initial_task),
         )
 
         gr.Markdown("## Pareto plot")
@@ -245,23 +269,15 @@ def build_app(df):
         )
 
         with gr.Row():
-            x_default = (
-                "accelerator_memory_max"
-                if "accelerator_memory_max" in metric_preferences
-                else list(metric_preferences.keys())[0]
-            )
-            y_default = (
-                "test_accuracy" if "test_accuracy" in metric_preferences else list(metric_preferences.keys())[1]
-            )
             metric_x_dropdown = gr.Dropdown(
                 label="1st metric for Pareto plot",
-                choices=list(metric_preferences.keys()),
-                value=x_default,
+                choices=list(initial_prefs.keys()),
+                value=initial_x,
             )
             metric_y_dropdown = gr.Dropdown(
                 label="2nd metric for Pareto plot",
-                choices=list(metric_preferences.keys()),
-                value=y_default,
+                choices=list(initial_prefs.keys()),
+                value=initial_y,
             )
 
         pareto_plot = gr.Plot(label="Pareto Frontier Plot")
@@ -280,10 +296,24 @@ def update_on_task(task_name, current_filter):
                 except Exception:
                     # invalid filter query
                     pass
-            return gr.update(choices=new_models, value=new_models[0] if new_models else None), format_df(filtered)
+
+            prefs = get_metric_preferences(task_name)
+            x_default, y_default = _TASK_PARETO_DEFAULTS.get(task_name, (list(prefs)[0], list(prefs)[1]))
+            metric_choices = list(prefs.keys())
+            explanation = _get_metric_explanation(task_name)
+
+            return (
+                gr.update(choices=new_models, value=new_models[0] if new_models else None),
+                format_df(filtered),
+                gr.update(choices=metric_choices, value=x_default),
+                gr.update(choices=metric_choices, value=y_default),
+                explanation,
+            )
 
         task_dropdown.change(
-            fn=update_on_task, inputs=[task_dropdown, filter_state], outputs=[model_dropdown, data_table]
+            fn=update_on_task,
+            inputs=[task_dropdown, filter_state],
+            outputs=[model_dropdown, data_table, metric_x_dropdown, metric_y_dropdown, metric_explanation],
         )
 
         def update_on_model(task_name, model_id, current_filter):
@@ -301,16 +331,17 @@ def update_on_model(task_name, model_id, current_filter):
         )
 
         def update_pareto_plot_and_summary(task_name, model_id, metric_x, metric_y, current_filter):
+            prefs = get_metric_preferences(task_name)
             filtered = filter_data(task_name, model_id, df)
             if current_filter.strip():
                 try:
                     mask = parse_and_filter(filtered, current_filter)
                     filtered = filtered[mask]
                 except Exception as e:
-                    return generate_pareto_plot(filtered, metric_x, metric_y), f"Filter error: {e}"
+                    return generate_pareto_plot(filtered, metric_x, metric_y, prefs), f"Filter error: {e}"
 
-            pareto_df = compute_pareto_frontier(filtered, metric_x, metric_y)
-            fig = generate_pareto_plot(filtered, metric_x, metric_y)
+            pareto_df = compute_pareto_frontier(filtered, metric_x, metric_y, prefs)
+            fig = generate_pareto_plot(filtered, metric_x, metric_y, prefs)
             summary = compute_pareto_summary(filtered, pareto_df, metric_x, metric_y)
             return fig, summary
 
@@ -322,6 +353,7 @@ def update_pareto_plot_and_summary(task_name, model_id, metric_x, metric_y, curr
             )
 
         def apply_filter(filter_query, task_name, model_id, metric_x, metric_y):
+            prefs = get_metric_preferences(task_name)
             filtered = filter_data(task_name, model_id, df)
             if filter_query.strip():
                 try:
@@ -332,12 +364,12 @@ def apply_filter(filter_query, task_name, model_id, metric_x, metric_y):
                     return (
                         filter_query,
                         filtered,
-                        generate_pareto_plot(filtered, metric_x, metric_y),
+                        generate_pareto_plot(filtered, metric_x, metric_y, prefs),
                         f"Filter error: {e}",
                     )
 
-            pareto_df = compute_pareto_frontier(filtered, metric_x, metric_y)
-            fig = generate_pareto_plot(filtered, metric_x, metric_y)
+            pareto_df = compute_pareto_frontier(filtered, metric_x, metric_y, prefs)
+            fig = generate_pareto_plot(filtered, metric_x, metric_y, prefs)
             summary = compute_pareto_summary(filtered, pareto_df, metric_x, metric_y)
             return filter_query, format_df(filtered), fig, summary
 
@@ -348,9 +380,10 @@ def apply_filter(filter_query, task_name, model_id, metric_x, metric_y):
         )
 
         def reset_filter(task_name, model_id, metric_x, metric_y):
+            prefs = get_metric_preferences(task_name)
             filtered = filter_data(task_name, model_id, df)
-            pareto_df = compute_pareto_frontier(filtered, metric_x, metric_y)
-            fig = generate_pareto_plot(filtered, metric_x, metric_y)
+            pareto_df = compute_pareto_frontier(filtered, metric_x, metric_y, prefs)
+            fig = generate_pareto_plot(filtered, metric_x, metric_y, prefs)
             summary = compute_pareto_summary(filtered, pareto_df, metric_x, metric_y)
             # Return empty strings to clear the filter state and textbox.
             return "", "", format_df(filtered), fig, summary
@@ -379,7 +412,34 @@ def reset_filter(task_name, model_id, metric_x, metric_y):
     return demo
 
 
-path = os.path.join(os.path.dirname(__file__), "MetaMathQA", "results")
-df = load_df(path, task_name="MetaMathQA")
+_METRIC_EXPLANATIONS = {
+    "MetaMathQA": (
+        "*forgetting: This is the reduction in CE loss on a sample of Wikipedia data and reflects how much the "
+        "model 'forgot' during training. The lower the number, the better."
+    ),
+    "image-gen": (
+        "*drift: This measures how much the generated images drift from the base model's outputs on unrelated "
+        "prompts, reflecting how much the model 'forgot' during training. The lower the number, the better."
+    ),
+}
+
+
+def _get_metric_explanation(task_name):
+    return _METRIC_EXPLANATIONS.get(task_name, "")
+
+
+base_dir = os.path.dirname(__file__)
+_TASK_CONFIGS = {
+    "MetaMathQA": os.path.join(base_dir, "MetaMathQA", "results"),
+    "image-gen": os.path.join(base_dir, "image-gen", "results"),
+}
+
+dfs = []
+for task_name, path in _TASK_CONFIGS.items():
+    if os.path.isdir(path):
+        task_df = load_df(path, task_name=task_name)
+        if not task_df.empty:
+            dfs.append(task_df)
+df = pd.concat(dfs, ignore_index=True)
 demo = build_app(df)
 demo.launch(theme=gr.themes.Soft())
diff --git a/method_comparison/image-gen/Makefile b/method_comparison/image-gen/Makefile
new file mode 100644
index 0000000000..91156a7c12
--- /dev/null
+++ b/method_comparison/image-gen/Makefile
@@ -0,0 +1,96 @@
+# Makefile for listing and running the image generation experiments.
+
+# --- Configuration ---
+PYTHON := python
+RUN_SCRIPT := run.py
+EXPERIMENTS_DIR := experiments
+RESULTS_DIR := results
+
+OPTIONAL_FLAGS =
+
+ifdef UPLOAD_BUCKET
+	OPTIONAL_FLAGS += --bucket_name "${UPLOAD_BUCKET}"
+endif
+
+# --- Automatic Experiment and Result Discovery ---
+
+# 1. Find all experiment directories by looking for adapter_config.json files.
+#    This gives us a list like: experiments/lora/llama-3.2-3B-rank32 ...
+EXPERIMENT_PATHS := $(shell find $(EXPERIMENTS_DIR) \
+		    -name "adapter_config.json" -or \
+		    -name "training_params.json" | xargs dirname | sort -u)
+
+# 2. Define a function to replace all occurrences of a character in a string.
+#    This is needed to replicate the result naming logic from run.py (e.g., "lora/foo" -> "lora-foo").
+#    Usage: $(call replace-all, string, char_to_replace, replacement_char)
+replace-all = $(if $(findstring $(2),$(1)),$(call replace-all,$(subst $(2),$(3),$(1)),$(2),$(3)),$(1))
+
+# 3. Define a function to convert an experiment path to its flat result file path.
+#    e.g., "experiments/lora/llama-3.2-3B-rank32" -> "results/lora-llama-3.2-3B-rank32.json"
+exp_to_res = $(RESULTS_DIR)/$(call replace-all,$(patsubst $(EXPERIMENTS_DIR)/%,%,$(1)),/,--).json
+
+# 4. Generate the list of all target result files we want to build.
+RESULT_FILES := $(foreach exp,$(EXPERIMENT_PATHS),$(call exp_to_res,$(exp)))
+
+
+# --- Main Rules ---
+
+# The default 'all' target depends on all possible result files.
+# Running `make` or `make all` will check and run any outdated or missing experiments.
+all: $(RESULT_FILES)
+
+
+# --- Dynamic Rule Generation ---
+
+# This is the core logic. We dynamically generate a specific Makefile rule for each experiment found.
+# This avoids a complex pattern rule and makes the logic clearer.
+define EXPERIMENT_template
+# Input $1: The full experiment path (e.g., experiments/lora/llama-3.2-3B-rank32)
+
+# Define the rule:
+# The target is the result file (e.g., results/lora-llama-3.2-3B-rank32.json).
+# The dependencies are its config files, code changes need to be audited manually since they can
+# vary in degree of importance. Note that we explicitly ignore when the script fails to run
+# so that the other experiments still have a chance to run.
+$(call exp_to_res,$(1)): $(wildcard $(1)/adapter_config.json) $(wildcard $(1)/training_params.json)
+	@echo "---"
+	@echo "Running experiment: $(1)"
+	-$(PYTHON) $(RUN_SCRIPT) $(OPTIONAL_FLAGS) -v $(1)
+	@echo "Finished: $$@"
+	@echo "---"
+
+endef
+
+# This command iterates through every found experiment path and evaluates the template,
+# effectively stamping out a unique, explicit rule for each one.
+$(foreach exp_path,$(EXPERIMENT_PATHS),$(eval $(call EXPERIMENT_template,$(exp_path))))
+
+
+# --- Utility Rules ---
+
+.PHONY: all clean list dump_rules
+
+# The 'clean' rule removes all generated results.
+clean:
+	@echo "Cleaning results directory..."
+	@([ -n "$(wildcard $(RESULTS_DIR)/*.json)" ] && rm $(RESULTS_DIR)/*.json) || exit 0
+
+# The 'list' rule is for debugging. It shows the discovered experiments
+# and the result files the Makefile expects to create for them.
+list:
+	@echo "Discovered experiment configurations:"
+	@$(foreach exp,$(EXPERIMENT_PATHS),echo "  - $(exp)/adapter_config.json";)
+	@echo "\nTarget result files:"
+	@$(foreach res,$(RESULT_FILES),echo "  - $(res)";)
+
+# The 'dump_rules' rule is for debugging. It dumps all dynamically defined rules.
+define newline
+
+
+endef
+define DUMPED_RULES
+	$(foreach exp_path,$(EXPERIMENT_PATHS),$(call EXPERIMENT_template,$(exp_path)))
+endef
+
+dump_rules:
+	@echo -e "$(subst $(newline),\n,${DUMPED_RULES})"
diff --git a/method_comparison/image-gen/README.md b/method_comparison/image-gen/README.md
new file mode 100644
index 0000000000..c2563860f0
--- /dev/null
+++ b/method_comparison/image-gen/README.md
@@ -0,0 +1,89 @@
+# PEFT method comparison on a DreamBooth-style image generation task
+
+## Goal
+
+This benchmark mirrors the structure of [`method_comparison/MetaMathQA`](https://github.com/huggingface/peft/tree/main/method_comparison/MetaMathQA) but targets DreamBooth-style fine-tuning for image generation. It is designed to compare PEFT methods along multiple dimensions like:
+
+- objective quality ([`DINOv2`](https://huggingface.co/facebook/dinov2-base) cosine similarity)
+- runtime
+- memory usage
+- checkpoint size
+
+Note that for max memory reserved, this benchmark measures the memory only for the training part, not the evaluation. This is because evaluation requires extra memory (for running the DINO model) which should not be attributed to the corresponding PEFT method.
+
+## Setup choices
+
+- Base model: [`black-forest-labs/FLUX.2-klein-base-4B`](https://huggingface.co/black-forest-labs/FLUX.2-klein-base-4B)
+- Dataset (default): [`cat pillow`](https://huggingface.co/datasets/peft-internal-testing/cat-image-dataset)
+
+## Running
+
+### Experiment settings
+
+Create an experiment under `experiments/<peft-method>/<experiment-name>/` or use one of the experiments there.
+
+Each experiment directory may contain:
+
+- `adapter_config.json` (optional; if missing, full fine-tuning is used)
+- `training_params.json` (optional; overrides `default_training_params.json`)
+
+### Running a single experiment
+
+Run one experiment:
+
+```sh
+python run.py -v experiments/lora/flux2-klein-rank16/
+```
+
+By default, the adapter will be saved in a temporary file for further inspection if needed. To prevent this, add the `--clean` flag to the call. To upload the model checkpoint and sample images to a Hugging Face Hub Bucket, pass the `--bucket_name your_user/my_bucket_name` argument.
+
+### Running all pending experiments
+
+The Makefile checks which experiments are missing a corresponding results file and runs those experiments. Note that running a whole sweep can easily take many hours.
+
+```sh
+make
+```
+
+If you set `UPLOAD_BUCKET="your_user/bucket_name"` as an environment variable prior to starting experiments via `make`, all experiments will be called with the `--bucket_name $UPLOAD_BUCKET` parameter and therefore store the checkpoints and sample images in that bucket.
+
+List experiments to run:
+
+```sh
+make list
+```
+
+## Training configs
+
+### `adapter_config.json`
+
+This must be a valid PEFT configuration. It is easiest to create it programmatically, e.g.:
+
+```python
+from peft import LoraConfig
+
+config = LoraConfig(...)
+config.save_pretrained(<path-to-experiment>)
+```
+
+### `training_params.json`
+
+There is a default file for the non-PEFT parameters: `default_training_params.json`. This contains all the other parameters that are relevant for training, e.g. the base model id, number of steps, batch size, learning rate, etc. If parameters that differ from the defaults are needed for a specific experiment, place a `training_params.json` into the experiment directory and adjust the parameters that need changing. The other parameters are taken from the aforementioned default config.
+
+For an overview of all possible arguments, you can also check the `TrainConfig` `dataclass` in `utils.py`.
+
+## Dependencies
+
+Install additional dependencies from:
+
+```sh
+python -m pip install -r requirements.txt
+```
+
+Python 3.12+ is required.
+
+## TODO
+
+- Add further experiments (more PEFT methods) and explore better hyper-parameters.
+- Test images are already created but they're not uploaded anywhere.
+- The method comparison Gradio app needs to be updated to show the generated images.
diff --git a/method_comparison/image-gen/cancelled_results/.gitkeep b/method_comparison/image-gen/cancelled_results/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/method_comparison/image-gen/data.py b/method_comparison/image-gen/data.py
new file mode 100644
index 0000000000..70f2d8e013
--- /dev/null
+++ b/method_comparison/image-gen/data.py
@@ -0,0 +1,102 @@
+# Copyright 2026-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Data handling for the image generation benchmark."""
+
+import numpy as np
+import torchvision.transforms as T
+from datasets import load_dataset
+from PIL import Image
+from PIL.ImageOps import exif_transpose
+
+
+def _to_rgb(image) -> Image.Image:
+    if isinstance(image, Image.Image):
+        return image.convert("RGB")
+    return Image.fromarray(image).convert("RGB")
+
+
+def _build_train_pixel_values(images: list[Image.Image], resolution: int):
+    size = resolution, resolution  # hard-code square
+    train_augmentations = T.Compose(
+        [
+            T.Resize(size, interpolation=T.InterpolationMode.BILINEAR),
+            T.ToTensor(),
+            T.Normalize([0.5], [0.5]),
+        ]
+    )
+    return [train_augmentations(exif_transpose(image)) for image in images]
+
+
+def get_train_valid_test_datasets(*, train_config, print_fn=print):
+    ds = load_dataset(train_config.dataset_id, split=train_config.dataset_split)
+    image_column = train_config.image_column
+
+    train_size = len(ds) - train_config.valid_size - train_config.test_size
+
+    prompts = train_config.instance_prompts
+    if isinstance(prompts, str):
+        prompts = [prompts] * len(ds)
+    else:
+        if len(ds) != len(prompts):
+            raise ValueError(f"Need 1 instance prompt per sample image, found {len(prompts)} and {len(ds)} instead.")
+
+    train_size = len(ds) - train_config.valid_size - train_config.test_size
+    if train_size < 1:
+        raise ValueError(
+            f"Dataset too small: need at least {1 + train_config.valid_size + train_config.test_size} rows, "
+            f"found {len(ds)}"
+        )
+
+    np.random.seed(0)
+    indices = np.arange(len(ds))
+    np.random.shuffle(indices)
+
+    idx_train = indices[:train_size]
+    idx_valid = indices[train_size : train_size + train_config.valid_size]
+    idx_test = indices[
+        train_size + train_config.valid_size : train_size + train_config.valid_size + train_config.test_size
+    ]
+
+    ds_train = ds.select(idx_train)
+    ds_valid = ds.select(idx_valid)
+    ds_test = ds.select(idx_test)
+
+    train_images = [_to_rgb(img) for img in ds_train[image_column]]
+    valid_images = [_to_rgb(img) for img in ds_valid[image_column]]
+    test_images = [_to_rgb(img) for img in ds_test[image_column]]
+
+    train_prompts = [prompts[i] for i in idx_train]
+    valid_prompts = [prompts[i] for i in idx_valid]
+    test_prompts = [prompts[i] for i in idx_test]
+
+    train_dataset = {
+        "pixel_values": _build_train_pixel_values(train_images, train_config.resolution),
+        "prompts": train_prompts,
+        "repeats": train_config.repeats,
+    }
+    valid_dataset = [
+        {"raw_image": exif_transpose(image), "prompt": prompt} for image, prompt in zip(valid_images, valid_prompts)
+    ]
+    test_dataset = [
+        {"raw_image": exif_transpose(image), "prompt": prompt} for image, prompt in zip(test_images, test_prompts)
+    ]
+
+    print_fn(f"Dataset: {train_config.dataset_id}")
+    print_fn(f"Raw rows: {len(ds)}")
+    print_fn(f"Train rows: {len(train_dataset['prompts']) * train_dataset['repeats']}")
+    print_fn(f"Valid rows: {len(valid_dataset)}")
+    print_fn(f"Test rows: {len(test_dataset)}")
+
+    return train_dataset, valid_dataset, test_dataset
diff --git a/method_comparison/image-gen/default_training_params.json b/method_comparison/image-gen/default_training_params.json
new file mode 100644
index 0000000000..c1146a2094
--- /dev/null
+++ b/method_comparison/image-gen/default_training_params.json
@@ -0,0 +1,78 @@
+{
+  "model_id": "black-forest-labs/FLUX.2-klein-base-4B",
+  "dataset_id": "peft-internal-testing/cat-image-dataset",
+  "dataset_split": "train",
+  "dtype": "bfloat16",
+  "resolution": 512,
+  "batch_size": 2,
+  "batch_size_eval": 1,
+  "repeats": 1000,
+  "max_steps": 750,
+  "eval_steps": 100,
+  "compile": false,
+  "use_gc": true,
+  "seed": 0,
+  "grad_norm_clip": 1.0,
+  "optimizer_type": "AdamW",
+  "optimizer_kwargs": {
+    "lr": 0.0001,
+    "weight_decay": 0.0001
+  },
+  "lr_scheduler": null,
+  "use_amp": false,
+  "autocast_adapter_dtype": true,
+  "image_column": "image",
+  "valid_size": 2,
+  "test_size": 4,
+  "num_inference_steps": 20,
+  "guidance_scale": 3.5,
+  "max_sequence_length": 512,
+  "text_encoder_out_layers": [10, 20, 30],
+  "weighting_scheme": "none",
+  "logit_mean": 0.0,
+  "logit_std": 1.0,
+  "mode_scale": 1.29,
+  "dino_model_id": "facebook/dinov2-base",
+  "dino_image_size": 224,
+  "instance_prompts": [
+    "sks cat sitting on a chair in front of a box of chocolates",
+    "sks cat playing on the Steam Deck",
+    "sks cat reading the newspaper with the title \"DIE BOX\"",
+    "sks cat being fed sushi",
+    "sks cat in front of a box of tomatoes",
+    "a gingerbread house in front of sks cat",
+    "sks cat is wearing a Wednesday Addams costume",
+    "sks cat looks skeptical as he reads a magazine depicting a black cat",
+    "sks cat wearing a striped scarf",
+    "sleepy sks cat lying in bed covered by a blue blanket",
+    "sks cat wearing a necklace while sitting in a box on a sofa",
+    "a box with four donuts in front of sks cat",
+    "sks cat wearing a pink veil with flowers on it and a dagger made out of yellow cardboard",
+    "sks cat between two pillows, with one pillow showing a polar bear and the other a fox",
+    "sks cat with an espresso reading the newspaper",
+    "a close up of a hand petting sks cat on the head",
+    "sks cat opens a white door",
+    "sks cat with a VIP \"all access\" pass",
+    "sks cat in front of a monitor showing the video game \"RimWorld\"",
+    "sks cat reads a newspaper about cannabis"
+  ],
+  "sample_image_prompts": [
+    "a photo of sks cat",
+    "a color drawing of sks cat",
+    "sks cat at the beach",
+    "a photo of sks dog",
+    "a photo of an orange cat"
+  ],
+  "drift_image_prompts": [
+    "a photo of cat",
+    "a color drawing of cat",
+    "cat sitting in a bathtub",
+    "cat looks into a mirror",
+    "a photo of cat next to a tree",
+    "the Mona Lisa but she looks angry",
+    "a dog holding a sign that says hello world",
+    "a photo of the Eiffel tower at night",
+    "an astronaut riding a horse",
+    "3d design concept of a chair that looks like an avocado"
+  ]
+}
diff --git a/method_comparison/image-gen/experiments/adalora/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/adalora/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..3b08b63a54
--- /dev/null
+++ b/method_comparison/image-gen/experiments/adalora/flux2-klein-default/adapter_config.json
@@ -0,0 +1,61 @@
+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "beta1": 0.85,
+  "beta2": 0.85,
+  "bias": "none",
+  "corda_config": null,
+  "deltaT": 1,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "init_r": 32,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "orth_reg_weight": 0.5,
+  "peft_type": "ADALORA",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": null,
+  "revision": null,
+  "target_modules": [
+    "linear_in",
+    "add_q_proj",
+    "add_k_proj",
+    "to_qkv_mlp_proj",
+    "to_add_out",
+    "linear_out",
+    "to_k",
+    "to_out.0",
+    "to_q",
+    "to_v",
+    "add_v_proj"
+  ],
+  "target_parameters": null,
+  "target_r": 8,
+  "task_type": null,
+  "tfinal": 200,
+  "tinit": 100,
+  "total_step": 750,
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
diff --git a/method_comparison/image-gen/experiments/boft/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/boft/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..e2a987567e
--- /dev/null
+++ b/method_comparison/image-gen/experiments/boft/flux2-klein-default/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "boft_block_num": 0,
+  "boft_block_size": 4,
+  "boft_dropout": 0.0,
+  "boft_n_butterfly_factor": 2,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "peft_type": "BOFT",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "revision": null,
+  "target_modules": [
+    "linear_in",
+    "add_q_proj",
+    "add_k_proj",
+    "to_qkv_mlp_proj",
+    "to_add_out",
+    "linear_out",
+    "to_k",
+    "to_out.0",
+    "to_q",
+    "to_v",
+    "add_v_proj"
+  ],
+  "task_type": null
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/c3a/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/c3a/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..7c7e69d12e
--- /dev/null
+++ b/method_comparison/image-gen/experiments/c3a/flux2-klein-default/adapter_config.json
@@ -0,0 +1,29 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "block_size": 256,
+  "block_size_pattern": {},
+  "inference_mode": false,
+  "init_weights": "xavier_uniform",
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "peft_type": "C3A",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "revision": null,
+  "target_modules": [
+    "to_q",
+    "add_q_proj",
+    "add_k_proj",
+    "add_v_proj",
+    "to_k",
+    "to_v",
+    "to_add_out",
+    "linear_in",
+    "linear_out",
+    "to_qkv_mlp_proj",
+    "to_out.0"
+  ],
+  "task_type": null
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/c3a/flux2-klein-default/training_params.json b/method_comparison/image-gen/experiments/c3a/flux2-klein-default/training_params.json
new file mode 100644
index 0000000000..a39b9dc8a8
--- /dev/null
+++ b/method_comparison/image-gen/experiments/c3a/flux2-klein-default/training_params.json
@@ -0,0 +1,6 @@
+{
+  "optimizer_kwargs": {
+    "lr": 3e-1,
+    "weight_decay": 1e-5
+  }
+}
diff --git a/method_comparison/image-gen/experiments/delora/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/delora/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..2c09acb965
--- /dev/null
+++ b/method_comparison/image-gen/experiments/delora/flux2-klein-default/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "delora_lambda": 15,
+  "exclude_modules": null,
+  "inference_mode": false,
+  "init_weights": true,
+  "lambda_pattern": {},
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "module_dropout": 0.0,
+  "modules_to_save": null,
+  "peft_type": "DELORA",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "to_out.0",
+    "to_add_out",
+    "to_qkv_mlp_proj",
+    "add_k_proj",
+    "add_q_proj",
+    "add_v_proj",
+    "to_add_out",
+    "to_k",
+    "to_q",
+    "to_v",
+    "linear_in",
+    "linear_out"
+  ],
+  "task_type": null
+}
diff --git a/method_comparison/image-gen/experiments/delora/flux2-klein-default/training_params.json b/method_comparison/image-gen/experiments/delora/flux2-klein-default/training_params.json
new file mode 100644
index 0000000000..8a120ad9a8
--- /dev/null
+++ b/method_comparison/image-gen/experiments/delora/flux2-klein-default/training_params.json
@@ -0,0 +1,6 @@
+{
+  "optimizer_kwargs": {
+    "lr": 1e-3
+  }
+}
+
diff --git a/method_comparison/image-gen/experiments/fourierft/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/fourierft/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..b56aa79348
--- /dev/null
+++ b/method_comparison/image-gen/experiments/fourierft/flux2-klein-default/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_weights": false,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "n_frequency": 2000,
+  "n_frequency_pattern": {},
+  "peft_type": "FOURIERFT",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "random_loc_seed": 777,
+  "revision": null,
+  "scaling": 150.0,
+  "target_modules": [
+    "to_v",
+    "add_v_proj",
+    "to_qkv_mlp_proj",
+    "linear_out",
+    "to_k",
+    "to_add_out",
+    "add_q_proj",
+    "linear_in",
+    "add_k_proj",
+    "to_q",
+    "to_out.0"
+  ],
+  "task_type": null
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/full-finetuning/flux2-klein-default/training_params.json b/method_comparison/image-gen/experiments/full-finetuning/flux2-klein-default/training_params.json
new file mode 100644
index 0000000000..eb78c482b7
--- /dev/null
+++ b/method_comparison/image-gen/experiments/full-finetuning/flux2-klein-default/training_params.json
@@ -0,0 +1,8 @@
+{
+  "max_steps": 300,
+  "eval_steps": 100,
+  "optimizer_kwargs": {
+    "lr": 0.00005,
+    "weight_decay": 0.01
+  }
+}
diff --git a/method_comparison/image-gen/experiments/gralora/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/gralora/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..c7e46d504c
--- /dev/null
+++ b/method_comparison/image-gen/experiments/gralora/flux2-klein-default/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha": 32,
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "gralora_dropout": 0.0,
+  "gralora_k": 2,
+  "hybrid_r": 0,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "peft_type": "GRALORA",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "r": 32,
+  "revision": null,
+  "target_modules": [
+    "to_out.0",
+    "to_add_out",
+    "to_qkv_mlp_proj",
+    "add_k_proj",
+    "add_q_proj",
+    "add_v_proj",
+    "to_add_out",
+    "to_k",
+    "to_q",
+    "to_v",
+    "linear_in",
+    "linear_out"
+  ],
+  "task_type": null
+}
diff --git a/method_comparison/image-gen/experiments/hra/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/hra/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..03f42e08e7
--- /dev/null
+++ b/method_comparison/image-gen/experiments/hra/flux2-klein-default/adapter_config.json
@@ -0,0 +1,31 @@
+{
+  "apply_GS": false,
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "exclude_modules": null,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "peft_type": "HRA",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "r": 32,
+  "revision": null,
+  "target_modules": [
+    "to_out.0",
+    "to_add_out",
+    "to_qkv_mlp_proj",
+    "add_k_proj",
+    "add_q_proj",
+    "add_v_proj",
+    "to_add_out",
+    "to_k",
+    "to_q",
+    "to_v",
+    "linear_in",
+    "linear_out"
+  ],
+  "task_type": null
+}
diff --git a/method_comparison/image-gen/experiments/ia3/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/ia3/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..39b09f094e
--- /dev/null
+++ b/method_comparison/image-gen/experiments/ia3/flux2-klein-default/adapter_config.json
@@ -0,0 +1,39 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "feedforward_modules": [
+    "to_v",
+    "add_v_proj",
+    "to_qkv_mlp_proj",
+    "linear_out",
+    "to_k",
+    "to_add_out",
+    "add_q_proj",
+    "linear_in",
+    "add_k_proj",
+    "to_q",
+    "to_out.0"
+  ],
+  "inference_mode": false,
+  "init_ia3_weights": true,
+  "modules_to_save": null,
+  "peft_type": "IA3",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "revision": null,
+  "target_modules": [
+    "to_v",
+    "add_v_proj",
+    "to_qkv_mlp_proj",
+    "linear_out",
+    "to_k",
+    "to_add_out",
+    "add_q_proj",
+    "linear_in",
+    "add_k_proj",
+    "to_q",
+    "to_out.0"
+  ],
+  "task_type": null
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/ia3/flux2-klein-default/training_params.json b/method_comparison/image-gen/experiments/ia3/flux2-klein-default/training_params.json
new file mode 100644
index 0000000000..8a120ad9a8
--- /dev/null
+++ b/method_comparison/image-gen/experiments/ia3/flux2-klein-default/training_params.json
@@ -0,0 +1,6 @@
+{
+  "optimizer_kwargs": {
+    "lr": 1e-3
+  }
+}
+
diff --git a/method_comparison/image-gen/experiments/lily/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/lily/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..f970e30d36
--- /dev/null
+++ b/method_comparison/image-gen/experiments/lily/flux2-klein-default/adapter_config.json
@@ -0,0 +1,31 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "exclude_modules": null,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "num_B": 2,
+  "peft_type": "LILY",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "r": 32,
+  "revision": null,
+  "scaling": 1.0,
+  "stride_A": 1,
+  "target_modules": [
+    "to_v",
+    "add_v_proj",
+    "to_qkv_mlp_proj",
+    "linear_out",
+    "to_k",
+    "to_add_out",
+    "add_q_proj",
+    "linear_in",
+    "add_k_proj",
+    "to_q",
+    "to_out.0"
+  ],
+  "task_type": null
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/ln_tuning/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/ln_tuning/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..901d53c0f6
--- /dev/null
+++ b/method_comparison/image-gen/experiments/ln_tuning/flux2-klein-default/adapter_config.json
@@ -0,0 +1,15 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "exclude_modules": null,
+  "inference_mode": false,
+  "modules_to_save": null,
+  "peft_type": "LN_TUNING",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "revision": null,
+  "target_modules": [
+      "norm_q",
+      "norm_k"
+  ],
+  "task_type": null
+}
diff --git a/method_comparison/image-gen/experiments/loha/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/loha/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..84c9e1d2b9
--- /dev/null
+++ b/method_comparison/image-gen/experiments/loha/flux2-klein-default/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "alpha": 32,
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "exclude_modules": null,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "module_dropout": 0.0,
+  "modules_to_save": null,
+  "peft_type": "LOHA",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "r": 32,
+  "rank_dropout": 0.0,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "to_qkv_mlp_proj",
+    "to_add_out",
+    "to_out.0",
+    "linear_out",
+    "add_k_proj",
+    "to_v",
+    "add_q_proj",
+    "add_v_proj",
+    "to_k",
+    "to_q",
+    "linear_in"
+  ],
+  "task_type": null,
+  "use_effective_conv2d": false
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/lokr/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/lokr/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..f52a54cd10
--- /dev/null
+++ b/method_comparison/image-gen/experiments/lokr/flux2-klein-default/adapter_config.json
@@ -0,0 +1,37 @@
+{
+  "alpha": 32,
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "decompose_both": false,
+  "decompose_factor": -1,
+  "exclude_modules": null,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "module_dropout": 0.0,
+  "modules_to_save": null,
+  "peft_type": "LOKR",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "r": 32,
+  "rank_dropout": 0.0,
+  "rank_dropout_scale": false,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "to_qkv_mlp_proj",
+    "to_add_out",
+    "to_out.0",
+    "linear_out",
+    "add_k_proj",
+    "to_v",
+    "add_q_proj",
+    "add_v_proj",
+    "to_k",
+    "to_q",
+    "linear_in"
+  ],
+  "task_type": null,
+  "use_effective_conv2d": false
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/lora/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/lora/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..fd5daff770
--- /dev/null
+++ b/method_comparison/image-gen/experiments/lora/flux2-klein-default/adapter_config.json
@@ -0,0 +1,43 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "black-forest-labs/FLUX.2-klein-base-4B",
+  "bias": "none",
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "to_out.0",
+    "to_add_out",
+    "to_qkv_mlp_proj",
+    "add_k_proj",
+    "add_q_proj",
+    "add_v_proj",
+    "to_add_out",
+    "to_k",
+    "to_q",
+    "to_v",
+    "linear_in",
+    "linear_out"
+  ],
+  "task_type": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}
diff --git a/method_comparison/image-gen/experiments/miss/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/miss/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..5018d99d6d
--- /dev/null
+++ b/method_comparison/image-gen/experiments/miss/flux2-klein-default/adapter_config.json
@@ -0,0 +1,31 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "exclude_modules": null,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "mini_r": 1,
+  "miss_dropout": 0.0,
+  "modules_to_save": null,
+  "peft_type": "MISS",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "r": 32,
+  "revision": null,
+  "target_modules": [
+    "to_q",
+    "add_q_proj",
+    "add_k_proj",
+    "add_v_proj",
+    "to_k",
+    "to_v",
+    "to_add_out",
+    "linear_in",
+    "linear_out",
+    "to_qkv_mlp_proj",
+    "to_out.0"
+  ],
+  "task_type": null
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/oft/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/oft/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..320987bdd3
--- /dev/null
+++ b/method_comparison/image-gen/experiments/oft/flux2-klein-default/adapter_config.json
@@ -0,0 +1,38 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "block_share": false,
+  "coft": false,
+  "eps": 6e-05,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "module_dropout": 0.0,
+  "modules_to_save": null,
+  "num_cayley_neumann_terms": 5,
+  "oft_block_size": 32,
+  "peft_type": "OFT",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "r": 0,
+  "revision": null,
+  "target_modules": [
+    "to_out.0",
+    "to_add_out",
+    "to_qkv_mlp_proj",
+    "add_k_proj",
+    "add_q_proj",
+    "add_v_proj",
+    "to_add_out",
+    "to_k",
+    "to_q",
+    "to_v",
+    "linear_in",
+    "linear_out"
+  ],
+  "task_type": null,
+  "use_cayley_neumann": true
+}
diff --git a/method_comparison/image-gen/experiments/osf/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/osf/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..f1ed132ae5
--- /dev/null
+++ b/method_comparison/image-gen/experiments/osf/flux2-klein-default/adapter_config.json
@@ -0,0 +1,27 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "effective_rank": 32,
+  "inference_mode": false,
+  "init_weights": null,
+  "modules_to_save": null,
+  "peft_type": "OSF",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "rank_pattern": null,
+  "revision": null,
+  "target_modules": [
+    "to_out.0",
+    "to_add_out",
+    "to_qkv_mlp_proj",
+    "add_k_proj",
+    "add_q_proj",
+    "add_v_proj",
+    "to_k",
+    "to_q",
+    "to_v",
+    "linear_in",
+    "linear_out"
+  ],
+  "target_svd_config": null,
+  "task_type": null
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/peanut/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/peanut/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..f1a0806cf3
--- /dev/null
+++ b/method_comparison/image-gen/experiments/peanut/flux2-klein-default/adapter_config.json
@@ -0,0 +1,31 @@
+{
+  "act_fn": "relu",
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "depth": 0,
+  "exclude_modules": null,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "peft_type": "PEANUT",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "r": 32,
+  "revision": null,
+  "scaling": 1.0,
+  "target_modules": [
+    "to_v",
+    "add_v_proj",
+    "to_qkv_mlp_proj",
+    "linear_out",
+    "to_k",
+    "to_add_out",
+    "add_q_proj",
+    "linear_in",
+    "add_k_proj",
+    "to_q",
+    "to_out.0"
+  ],
+  "task_type": null
+}
diff --git a/method_comparison/image-gen/experiments/psoft/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/psoft/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..a44695699b
--- /dev/null
+++ b/method_comparison/image-gen/experiments/psoft/flux2-klein-default/adapter_config.json
@@ -0,0 +1,40 @@
+{
+  "ab_svd_init": "psoft_init",
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "cayley_neumann_eps": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "num_cayley_neumann_terms": 5,
+  "peft_type": "PSOFT",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "psoft_alpha": 32,
+  "psoft_dropout": 0.0,
+  "psoft_mag_a": true,
+  "psoft_mag_b": true,
+  "psoft_orth": true,
+  "psoft_svd": "full",
+  "psoft_svd_lowrank_niter": 10,
+  "r": 32,
+  "revision": null,
+  "target_modules": [
+    "to_q",
+    "add_q_proj",
+    "add_k_proj",
+    "add_v_proj",
+    "to_k",
+    "to_v",
+    "to_add_out",
+    "linear_in",
+    "linear_out",
+    "to_qkv_mlp_proj",
+    "to_out.0"
+  ],
+  "task_type": null,
+  "use_cayley_neumann": false
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/pvera/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/pvera/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..dda2513460
--- /dev/null
+++ b/method_comparison/image-gen/experiments/pvera/flux2-klein-default/adapter_config.json
@@ -0,0 +1,34 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "d_initial": 0.1,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "peft_type": "PVERA",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "projection_prng_key": 0,
+  "pvera_dropout": 0.0,
+  "r": 32,
+  "revision": null,
+  "sample_at_inference": false,
+  "save_projection": true,
+  "target_modules": [
+    "to_q",
+    "add_q_proj",
+    "add_k_proj",
+    "add_v_proj",
+    "to_k",
+    "to_v",
+    "to_add_out",
+    "linear_in",
+    "linear_out",
+    "to_qkv_mlp_proj",
+    "to_out.0"
+  ],
+  "task_type": null
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/pvera/flux2-klein-default/training_params.json b/method_comparison/image-gen/experiments/pvera/flux2-klein-default/training_params.json
new file mode 100644
index 0000000000..8bed6db65e
--- /dev/null
+++ b/method_comparison/image-gen/experiments/pvera/flux2-klein-default/training_params.json
@@ -0,0 +1,6 @@
+{
+  "optimizer_kwargs": {
+    "lr": 3e-3
+  }
+}
+
diff --git a/method_comparison/image-gen/experiments/randlora/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/randlora/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..22eb3000cc
--- /dev/null
+++ b/method_comparison/image-gen/experiments/randlora/flux2-klein-default/adapter_config.json
@@ -0,0 +1,35 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "peft_type": "RANDLORA",
+  "peft_version": null,
+  "projection_prng_key": 0,
+  "r": 32,
+  "randlora_alpha": 640,
+  "randlora_dropout": 0.0,
+  "revision": null,
+  "save_projection": true,
+  "sparse": false,
+  "target_modules": [
+    "to_v",
+    "add_v_proj",
+    "to_qkv_mlp_proj",
+    "linear_out",
+    "to_k",
+    "to_add_out",
+    "add_q_proj",
+    "linear_in",
+    "add_k_proj",
+    "to_q",
+    "to_out.0"
+  ],
+  "task_type": null,
+  "very_sparse": false
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/road/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/road/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..576b9ed490
--- /dev/null
+++ b/method_comparison/image-gen/experiments/road/flux2-klein-default/adapter_config.json
@@ -0,0 +1,26 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "group_size": 64,
+  "inference_mode": false,
+  "init_weights": true,
+  "modules_to_save": null,
+  "peft_type": "ROAD",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "revision": null,
+  "target_modules": [
+    "linear_out",
+    "to_qkv_mlp_proj",
+    "to_k",
+    "add_k_proj",
+    "add_v_proj",
+    "to_v",
+    "add_q_proj",
+    "to_out.0",
+    "to_add_out",
+    "linear_in",
+    "to_q"
+  ],
+  "task_type": null,
+  "variant": "road_1"
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/road/flux2-klein-default/training_params.json b/method_comparison/image-gen/experiments/road/flux2-klein-default/training_params.json
new file mode 100644
index 0000000000..52d87e3ef6
--- /dev/null
+++ b/method_comparison/image-gen/experiments/road/flux2-klein-default/training_params.json
@@ -0,0 +1,5 @@
+{
+  "optimizer_kwargs": {
+    "lr": 1e-3
+  }
+}
diff --git a/method_comparison/image-gen/experiments/shira/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/shira/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..cf609352c6
--- /dev/null
+++ b/method_comparison/image-gen/experiments/shira/flux2-klein-default/adapter_config.json
@@ -0,0 +1,28 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_weights": true,
+  "mask_type": "random",
+  "modules_to_save": null,
+  "peft_type": "SHIRA",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "r": 32,
+  "random_seed": null,
+  "revision": null,
+  "target_modules": [
+    "to_q",
+    "add_q_proj",
+    "add_k_proj",
+    "add_v_proj",
+    "to_k",
+    "to_v",
+    "to_add_out",
+    "linear_in",
+    "linear_out",
+    "to_qkv_mlp_proj",
+    "to_out.0"
+  ],
+  "task_type": null
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/vblora/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/vblora/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..2a70ecb4e4
--- /dev/null
+++ b/method_comparison/image-gen/experiments/vblora/flux2-klein-default/adapter_config.json
@@ -0,0 +1,36 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_logits_std": 0.1,
+  "init_vector_bank_bound": 0.02,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "num_vectors": 256,
+  "peft_type": "VBLORA",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "r": 32,
+  "revision": null,
+  "save_only_topk_weights": false,
+  "target_modules": [
+    "to_v",
+    "add_v_proj",
+    "to_qkv_mlp_proj",
+    "linear_out",
+    "to_k",
+    "to_add_out",
+    "add_q_proj",
+    "linear_in",
+    "add_k_proj",
+    "to_q",
+    "to_out.0"
+  ],
+  "task_type": null,
+  "topk": 2,
+  "vblora_dropout": 0.0,
+  "vector_length": 256
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/vera/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/vera/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..928905d2c6
--- /dev/null
+++ b/method_comparison/image-gen/experiments/vera/flux2-klein-default/adapter_config.json
@@ -0,0 +1,33 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "d_initial": 0.1,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "peft_type": "VERA",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "projection_prng_key": 0,
+  "r": 32,
+  "revision": null,
+  "save_projection": true,
+  "target_modules": [
+    "to_v",
+    "add_v_proj",
+    "to_qkv_mlp_proj",
+    "linear_out",
+    "to_k",
+    "to_add_out",
+    "add_q_proj",
+    "linear_in",
+    "add_k_proj",
+    "to_q",
+    "to_out.0"
+  ],
+  "task_type": null,
+  "vera_dropout": 0.0
+}
\ No newline at end of file
diff --git a/method_comparison/image-gen/experiments/vera/flux2-klein-default/training_params.json b/method_comparison/image-gen/experiments/vera/flux2-klein-default/training_params.json
new file mode 100644
index 0000000000..8a120ad9a8
--- /dev/null
+++ b/method_comparison/image-gen/experiments/vera/flux2-klein-default/training_params.json
@@ -0,0 +1,6 @@
+{
+  "optimizer_kwargs": {
+    "lr": 1e-3
+  }
+}
+
diff --git a/method_comparison/image-gen/experiments/waveft/flux2-klein-default/adapter_config.json b/method_comparison/image-gen/experiments/waveft/flux2-klein-default/adapter_config.json
new file mode 100644
index 0000000000..34600fe20b
--- /dev/null
+++ b/method_comparison/image-gen/experiments/waveft/flux2-klein-default/adapter_config.json
@@ -0,0 +1,37 @@
+{
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "modules_to_save": null,
+  "n_frequency": 2592,
+  "n_frequency_pattern": {},
+  "peft_type": "WAVEFT",
+  "peft_version": "0.18.2.dev0@UNKNOWN",
+  "proportional_parameters": false,
+  "random_loc_seed": 777,
+  "revision": null,
+  "scaling": 25.0,
+  "target_modules": [
+    "to_out.0",
+    "to_add_out",
+    "to_qkv_mlp_proj",
+    "add_k_proj",
+    "add_q_proj",
+    "add_v_proj",
+    "to_add_out",
+    "to_k",
+    "to_q",
+    "to_v",
+    "linear_in",
+    "linear_out"
+  ],
+  "task_type": null,
+  "use_idwt": true,
+  "wavelet_family": "db1"
+}
diff --git a/method_comparison/image-gen/requirements.txt b/method_comparison/image-gen/requirements.txt
new file mode 100644
index 0000000000..f0809c7f47
--- /dev/null
+++ b/method_comparison/image-gen/requirements.txt
@@ -0,0 +1,6 @@
+datasets
+diffusers>=0.37.0
+torchvision
+numpy
+tqdm
+pillow
diff --git a/method_comparison/image-gen/results/.gitkeep b/method_comparison/image-gen/results/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/method_comparison/image-gen/run.py b/method_comparison/image-gen/run.py
new file mode 100644
index 0000000000..f581604086
--- /dev/null
+++ b/method_comparison/image-gen/run.py
@@ -0,0 +1,668 @@
+# Copyright 2026-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Main entry point for image generation method comparison experiments.
+
+Based on https://github.com/huggingface/diffusers/blob/bbbcdd87bd9d960fa372663a50b9edbdcb1391c6/examples/dreambooth/train_dreambooth_lora_flux2_klein.py
+"""
+
+import argparse
+import copy
+import datetime as dt
+import json
+import os
+import sys
+import time
+from collections.abc import Callable
+from contextlib import AbstractContextManager, nullcontext
+from functools import partial
+from typing import Any, Optional
+
+import huggingface_hub
+import torch
+from diffusers.training_utils import (
+    compute_density_for_timestep_sampling,
+    compute_loss_weighting_for_sd3,
+    offload_models,
+)
+from torch.amp import GradScaler, autocast
+from tqdm import tqdm
+from transformers import set_seed
+from utils import (
+    FILE_NAME_TRAIN_PARAMS,
+    TrainConfig,
+    TrainResult,
+    TrainStatus,
+    get_artifact_stem,
+    get_base_model_info,
+    get_dataset_info,
+    get_dino_embeddings,
+    get_dino_encoder,
+    get_file_size,
+    get_optimizer_and_scheduler,
+    get_peft_branch,
+    get_pipeline,
+    get_sample_image_save_dir,
+    get_torch_dtype,
+    get_train_config,
+    init_accelerator,
+    log_results,
+    upload_checkpoint_to_bucket,
+    upload_images_to_bucket,
+    validate_experiment_path,
+)
+
+from data import get_train_valid_test_datasets
+from peft import PeftConfig, PeftModel
+from peft.utils import CONFIG_NAME, infer_device
+
+
+os.environ["TORCHINDUCTOR_FORCE_DISABLE_CACHES"] = "1"
+
+
+def get_sigmas(timesteps, noise_scheduler, n_dim, dtype):
+    device = "cpu"
+    sigmas = noise_scheduler.sigmas.to(device=device, dtype=dtype)
+    schedule_timesteps = noise_scheduler.timesteps.to(device)
+    timesteps = timesteps.to(device)
+    step_indices = [(schedule_timesteps == t).nonzero().item() for t in timesteps]
+
+    sigma = sigmas[step_indices].flatten()
+    while len(sigma.shape) < n_dim:
+        sigma = sigma.unsqueeze(-1)
+    return sigma
+
+
+class DummyGradScaler:
+    def scale(self, loss):
+        return loss
+
+    def unscale_(self, optimizer):
+        pass
+
+    def step(self, optimizer):
+        optimizer.step()
+
+    def update(self):
+        pass
+
+
+def precompute_prompt_caches(
+    pipeline, prompts: list[str], device_type: str, train_config: TrainConfig
+) -> tuple[torch.Tensor, torch.Tensor]:
+    prompt_embeds_cache = []
+    text_ids_cache = []
+    with torch.no_grad(), offload_models(pipeline.text_encoder, device=device_type, offload=True):
+        for prompt in prompts:
+            prompt_embeds, text_ids = pipeline.encode_prompt(
+                prompt=prompt,
+                max_sequence_length=train_config.max_sequence_length,
+                text_encoder_out_layers=train_config.text_encoder_out_layers,
+            )
+            prompt_embeds_cache.append(prompt_embeds)
+            text_ids_cache.append(text_ids)
+    return torch.cat(prompt_embeds_cache, dim=0).to(device_type), torch.cat(text_ids_cache, dim=0).to(device_type)
+
+
+def precompute_latent_cache(
+    *,
+    pipeline,
+    vae,
+    pixel_values: list[torch.Tensor],
+    train_config: TrainConfig,
+    device_type: str,
+) -> torch.Tensor:
+    latents_cache = []
+    latents_bn_mean = vae.bn.running_mean.view(1, -1, 1, 1)
+    latents_bn_std = torch.sqrt(vae.bn.running_var.view(1, -1, 1, 1) + vae.config.batch_norm_eps)
+    with torch.no_grad(), offload_models(vae, device=device_type, offload=True):
+        latents_bn_mean = latents_bn_mean.to(vae.device)
+        latents_bn_std = latents_bn_std.to(vae.device)
+        for i in range(0, len(pixel_values), train_config.batch_size):
+            pixel_values_batch = torch.stack(pixel_values[i : i + train_config.batch_size]).to(
+                device=vae.device, dtype=get_torch_dtype(train_config.dtype)
+            )
+            latents = vae.encode(pixel_values_batch).latent_dist.mode()
+            latents = pipeline._patchify_latents(latents)
+            latents = (latents - latents_bn_mean) / latents_bn_std
+            latents_cache.append(latents.to(device_type))
+    return torch.cat(latents_cache, dim=0)
+
+
+def _generate_images(pipeline, *, generator, prompts: list[str], config: TrainConfig):
+    outputs = pipeline(
+        prompt=prompts,
+        num_inference_steps=config.num_inference_steps,
+        guidance_scale=config.guidance_scale,
+        height=config.resolution,  # hard-code square
+        width=config.resolution,
+        max_sequence_length=config.max_sequence_length,
+        text_encoder_out_layers=config.text_encoder_out_layers,
+        generator=generator,
+        output_type="pil",
+    )
+    return outputs
+
+
+@torch.inference_mode()
+def evaluate(
+    *,
+    pipeline,
+    ds_eval,
+    processor,
+    dino_model,
+    config: TrainConfig,
+    num_repeats: int = 1,
+) -> float:
+    with offload_models(pipeline.text_encoder, pipeline.vae, device=pipeline.transformer.device, offload=True):
+        seed = config.seed + 100_000  # don't use the same seed as in training just to be sure
+        generator = torch.Generator(device=pipeline.transformer.device).manual_seed(seed)
+        cosine_sim_scores = []
+        iter_ = range(num_repeats) if num_repeats <= 1 else tqdm(range(num_repeats))
+        for _ in iter_:
+            generated_images = []
+            reference_images = []
+            batch_size = config.batch_size_eval
+
+            for i in range(0, len(ds_eval), batch_size):
+                sliced = [ds_eval[j] for j in range(i, min(i + batch_size, len(ds_eval)))]
+                prompts = [sample["prompt"] for sample in sliced]
+                outputs = _generate_images(pipeline, generator=generator, prompts=prompts, config=config)
+                generated_images.extend(outputs.images)
+                reference_images.extend([sample["raw_image"] for sample in sliced])
+                if i + batch_size >= len(ds_eval):
+                    break
+
+            generated_embeddings = get_dino_embeddings(generated_images, processor, dino_model, batch_size=batch_size)
+            reference_embeddings = get_dino_embeddings(reference_images, processor, dino_model, batch_size=batch_size)
+            cosine_sim = (generated_embeddings * reference_embeddings).sum(dim=-1)
+            cosine_sim_scores.append(cosine_sim.mean().item())
+        mean_sim = sum(cosine_sim_scores) / num_repeats
+    return mean_sim
+
+
+@torch.inference_mode()
+def measure_drift(*, pipeline, processor, dino_model, config: TrainConfig) -> float:
+    # Measure the drift as 1 - the cosine similarity of the images generated by the base model vs the model with the
+    # trained adapter. The prompts are unrelated to the concept, so we expect the similarity to be high, hence the drift
+    # to be low.
+    if not isinstance(pipeline.transformer, PeftModel):
+        # in case of full fine-tuning, the adapter cannot be disabled and thus the drift cannot be measured, return
+        # dummy value
+        return 1.0
+
+    batch_size = config.batch_size_eval
+    prompts = config.drift_image_prompts
+    pbar = tqdm(total=len(prompts) * 2)
+    with offload_models(pipeline.text_encoder, pipeline.vae, device=pipeline.transformer.device, offload=True):
+        # without adapter
+        seed = config.seed + 100_000_000  # don't use the same seed as in training or eval just to be sure
+        generator = torch.Generator(device=pipeline.transformer.device).manual_seed(seed)
+        generated_base = []
+        with pipeline.transformer.disable_adapter():
+            for i in range(0, len(prompts), batch_size):
+                prompt_batch = prompts[i : i + batch_size]
+                outputs = _generate_images(pipeline, generator=generator, prompts=prompt_batch, config=config)
+                generated_base.extend(outputs.images)
+                pbar.update(1)
+
+        # with adapter
+        seed = config.seed + 100_000_000  # don't use the same seed as in training or eval just to be sure
+        generator = torch.Generator(device=pipeline.transformer.device).manual_seed(seed)
+        generated_adapter = []
+        for i in range(0, len(prompts), batch_size):
+            prompt_batch = prompts[i : i + batch_size]
+            outputs = _generate_images(pipeline, generator=generator, prompts=prompt_batch, config=config)
+            generated_adapter.extend(outputs.images)
+            pbar.update(1)
+
+    # calculate drift
+    generated_embeddings = get_dino_embeddings(generated_adapter, processor, dino_model, batch_size=batch_size)
+    reference_embeddings = get_dino_embeddings(generated_base, processor, dino_model, batch_size=batch_size)
+    cosine_sim = (generated_embeddings * reference_embeddings).sum(dim=-1)
+    drift = 1 - cosine_sim.mean().item()
+    return drift
+
+
+def train(
+    *,
+    pipeline,
+    train_config: TrainConfig,
+    accelerator_memory_init: int,
+    is_adalora: bool,
+    print_verbose: Callable[..., None],
+) -> TrainResult:
+    accelerator_memory_allocated_log = []
+    accelerator_memory_reserved_log = []
+    losses = []
+    durations = []
+    metrics = []
+    total_samples = 0
+
+    device_type = infer_device()
+    train_dataset, valid_dataset, test_dataset = get_train_valid_test_datasets(
+        train_config=train_config, print_fn=print_verbose
+    )
+    train_size_base = len(train_dataset["prompts"])
+    gen = torch.Generator(device=device_type).manual_seed(train_config.seed)
+    train_indices = torch.cat(
+        [torch.randperm(train_size_base, generator=gen, device=device_type) for _ in range(train_dataset["repeats"])]
+    )
+    if train_config.max_steps > len(train_indices):
+        raise ValueError(
+            f"max_steps is too high ({train_config.max_steps}), there are only {len(train_indices)} training samples"
+        )
+
+    processor, dino_model = get_dino_encoder(train_config.dino_model_id, train_config.dino_image_size)
+
+    torch_accelerator_module = getattr(torch, device_type, torch.cuda)
+    if train_config.use_amp:
+        grad_scaler: GradScaler | DummyGradScaler = GradScaler(device=device_type)
+        autocast_ctx: Callable[[], AbstractContextManager[Any]] = partial(autocast, device_type=device_type)
+    else:
+        grad_scaler = DummyGradScaler()
+        autocast_ctx = nullcontext
+
+    vae = pipeline.vae  # CPU
+    transformer = pipeline.transformer.to(device_type)
+    noise_scheduler_copy = copy.deepcopy(pipeline.scheduler)  # prevent mutating it
+    optimizer, lr_scheduler = get_optimizer_and_scheduler(
+        transformer,
+        optimizer_type=train_config.optimizer_type,
+        max_steps=train_config.max_steps,
+        lr_scheduler_arg=train_config.lr_scheduler,
+        **train_config.optimizer_kwargs,
+    )
+
+    if hasattr(transformer, "get_nb_trainable_parameters"):
+        num_trainable_params, num_params = transformer.get_nb_trainable_parameters()
+    else:
+        num_params = sum(param.numel() for param in transformer.parameters())
+        num_trainable_params = sum(param.numel() for param in transformer.parameters() if param.requires_grad)
+    print_verbose(
+        f"trainable params: {num_trainable_params:,d} || all params: {num_params:,d} || "
+        f"trainable: {100 * num_trainable_params / num_params:.4f}%"
+    )
+
+    status = TrainStatus.FAILED
+    tic_train = time.perf_counter()
+    eval_time = 0.0
+    error_msg = ""
+
+    # pre-compute, since they don't change during training and we can keep the text encoder and VAE offloaded
+    prompt_embeds_cache, text_ids_cache = precompute_prompt_caches(
+        pipeline, train_dataset["prompts"], device_type, train_config=train_config
+    )
+    latents_cache = precompute_latent_cache(
+        pipeline=pipeline,
+        vae=vae,
+        pixel_values=train_dataset["pixel_values"],
+        train_config=train_config,
+        device_type=device_type,
+    )
+
+    torch_accelerator_module.empty_cache()
+    torch_accelerator_module.reset_peak_memory_stats()
+    accelerator_memory_max_train = 0
+    try:
+        torch_accelerator_module.reset_peak_memory_stats()
+        pbar = tqdm(range(1, train_config.max_steps + 1))
+        for step in pbar:
+            tic = time.perf_counter()
+            i_start = (step - 1) * train_config.batch_size
+            i_stop = min(step * train_config.batch_size, len(train_indices))
+            batch_indices = train_indices[i_start:i_stop].to(device=latents_cache.device, dtype=torch.long)
+            latents = latents_cache.index_select(0, batch_indices)
+            prompt_embeds = prompt_embeds_cache.index_select(0, batch_indices)
+            text_ids = text_ids_cache.index_select(0, batch_indices)
+
+            current_batch_size = latents.shape[0]
+            total_samples += current_batch_size
+
+            model_input_ids = pipeline._prepare_latent_ids(latents).to(latents.device)
+            noise = torch.randn_like(latents, generator=gen)
+
+            u = compute_density_for_timestep_sampling(
+                weighting_scheme=train_config.weighting_scheme,
+                batch_size=current_batch_size,
+                logit_mean=train_config.logit_mean,
+                logit_std=train_config.logit_std,
+                mode_scale=train_config.mode_scale,
+            )
+            indices = (u * noise_scheduler_copy.config.num_train_timesteps).long()
+            timesteps = noise_scheduler_copy.timesteps[indices].to(device=latents.device)
+            # Add noise according to flow matching. zt = (1 - texp) * x + texp * z1
+            sigmas = get_sigmas(timesteps, noise_scheduler_copy, n_dim=latents.ndim, dtype=latents.dtype).to(
+                device_type
+            )
+            noisy_latents = (1.0 - sigmas) * latents + sigmas * noise
+            # [B, C, H, W] -> [B, H*W, C]
+            packed_noisy_latents = pipeline._pack_latents(noisy_latents)
+
+            # handle guidance
+            if transformer.config.guidance_embeds:
+                guidance = torch.full([1], train_config.guidance_scale, device=device_type)
+                guidance = guidance.expand(current_batch_size)
+            else:
+                guidance = None
+
+            optimizer.zero_grad(set_to_none=True)
+            with autocast_ctx():
+                model_pred = transformer(
+                    hidden_states=packed_noisy_latents,
+                    timestep=timesteps / 1000,
+                    guidance=guidance,
+                    encoder_hidden_states=prompt_embeds,
+                    txt_ids=text_ids,  # B, text_seq_len, 4
+                    img_ids=model_input_ids,  # B, image_seq_len, 4
+                    return_dict=False,
+                )[0]
+                model_pred = model_pred[:, : packed_noisy_latents.size(1)]
+                model_pred = pipeline._unpack_latents_with_ids(model_pred, model_input_ids)
+                # these weighting schemes use a uniform timestep sampling and instead post-weight the loss
+                weighting = compute_loss_weighting_for_sd3(train_config.weighting_scheme, sigmas=sigmas)
+                target = noise - latents
+                loss = torch.mean(
+                    (weighting.float() * (model_pred.float() - target.float()) ** 2).reshape(target.shape[0], -1), 1
+                )
+                loss = loss.mean()
+
+            grad_scaler.scale(loss).backward()
+            if train_config.grad_norm_clip:
+                grad_scaler.unscale_(optimizer)
+                torch.nn.utils.clip_grad_norm_(transformer.parameters(), train_config.grad_norm_clip)
+            grad_scaler.step(optimizer)
+            grad_scaler.update()
+            lr_scheduler.step()
+
+            if is_adalora:
+                transformer.base_model.update_and_allocate(step)
+
+            losses.append(loss)
+            pbar.set_postfix({"loss": loss.item()})
+
+            accelerator_memory_allocated_log.append(
+                torch_accelerator_module.memory_allocated() - accelerator_memory_init
+            )
+            accelerator_memory_reserved_log.append(
+                torch_accelerator_module.memory_reserved() - accelerator_memory_init
+            )
+            toc = time.perf_counter()
+            durations.append(toc - tic)
+
+            if step % train_config.eval_steps == 0:
+                # Measure max memory _before_ executing the eval loop and reset stats _after_ the eval loop. This way
+                # the extra memory required for evaluation is not included in the max memory statistic. We want to
+                # measure only the training memory, as the eval requires extra memory (DINO model) not caused by the
+                # PEFT method.
+                accelerator_memory_max_train = max(
+                    accelerator_memory_max_train,
+                    torch_accelerator_module.max_memory_reserved() - accelerator_memory_init,
+                )
+
+                tic_eval = time.perf_counter()
+                loss_avg = sum(losses[-train_config.eval_steps :]) / train_config.eval_steps
+                loss_avg = loss_avg.item()
+                memory_allocated_avg = (
+                    sum(accelerator_memory_allocated_log[-train_config.eval_steps :]) / train_config.eval_steps
+                )
+                memory_reserved_avg = (
+                    sum(accelerator_memory_reserved_log[-train_config.eval_steps :]) / train_config.eval_steps
+                )
+                dur_train = sum(durations[-train_config.eval_steps :])
+
+                transformer.eval()
+                valid_similarity = evaluate(
+                    pipeline=pipeline,
+                    ds_eval=valid_dataset,
+                    processor=processor,
+                    dino_model=dino_model,
+                    config=train_config,
+                )
+                transformer.train()
+
+                toc_eval = time.perf_counter()
+                dur_eval = toc_eval - tic_eval
+                eval_time += dur_eval
+                elapsed = time.perf_counter() - tic_train
+
+                metrics.append(
+                    {
+                        "step": step,
+                        "valid dino_similarity": valid_similarity,
+                        "train loss": loss_avg,
+                        "train samples": total_samples,
+                        "train time": dur_train,
+                        "eval time": dur_eval,
+                        "mem allocated avg": memory_allocated_avg,
+                        "mem reserved avg": memory_reserved_avg,
+                        "elapsed time": elapsed,
+                    }
+                )
+
+                log_dict = {
+                    "step": f"{step:4d}",
+                    "samples": f"{total_samples:5d}",
+                    "lr": f"{lr_scheduler.get_last_lr()[0]:.2e}",
+                    "loss avg": f"{loss_avg:.4f}",
+                    "valid sim": f"{valid_similarity:.4f}",
+                    "train time": f"{dur_train:.1f}s",
+                    "eval time": f"{dur_eval:.1f}s",
+                    "mem allocated": f"{memory_allocated_avg:.0f}",
+                    "mem reserved": f"{memory_reserved_avg:.0f}",
+                    "elapsed time": f"{elapsed // 60:.0f}min {elapsed % 60:.0f}s",
+                }
+                print_verbose(json.dumps(log_dict))
+
+                torch_accelerator_module.empty_cache()
+                torch_accelerator_module.reset_peak_memory_stats()
+
+        accelerator_memory_max_train = max(
+            accelerator_memory_max_train,
+            torch_accelerator_module.max_memory_reserved() - accelerator_memory_init,
+        )
+        print_verbose(f"Training finished after {train_config.max_steps} steps, evaluation on test set follows.")
+        transformer.eval()
+        test_similarity = evaluate(
+            pipeline=pipeline,
+            ds_eval=test_dataset,
+            processor=processor,
+            dino_model=dino_model,
+            config=train_config,
+            num_repeats=3,
+        )
+        print_verbose("Calculating drift.")
+        test_drift = measure_drift(pipeline=pipeline, processor=processor, dino_model=dino_model, config=train_config)
+        metrics.append(
+            {
+                "step": step,
+                "test dino_similarity": test_similarity,
+                "drift": test_drift,
+                "train loss": (sum(losses[-train_config.eval_steps :]) / train_config.eval_steps).item(),
+                "train samples": total_samples,
+            }
+        )
+        print_verbose(f"Test DINOv2 similarity: {test_similarity:.4f}")
+        print_verbose(f"Test drift:             {test_drift:.4f}")
+
+    except KeyboardInterrupt:
+        print_verbose("canceled training")
+        status = TrainStatus.CANCELED
+        error_msg = "manually canceled"
+    except torch.OutOfMemoryError as exc:
+        print_verbose("out of memory error encountered")
+        status = TrainStatus.CANCELED
+        error_msg = str(exc)
+    except Exception as exc:
+        print_verbose(f"encountered an error: {exc}")
+        status = TrainStatus.CANCELED
+        error_msg = str(exc)
+
+    toc_train = time.perf_counter()
+    train_time = toc_train - tic_train - eval_time
+
+    if status != TrainStatus.CANCELED:
+        status = TrainStatus.SUCCESS
+    train_result = TrainResult(
+        status=status,
+        train_time=train_time,
+        accelerator_memory_reserved_log=accelerator_memory_reserved_log,
+        accelerator_memory_max_train=accelerator_memory_max_train,
+        losses=[loss.item() for loss in losses],
+        metrics=metrics,
+        error_msg=error_msg,
+        num_trainable_params=num_trainable_params,
+        num_total_params=num_params,
+    )
+    return train_result
+
+
+@torch.inference_mode()
+def generate_sample_images(
+    *,
+    pipeline,
+    train_config,
+    sample_image_dir: str,
+    file_stem: str,
+    print_verbose: Callable[..., None],
+) -> None:
+    target_device = pipeline.transformer.device
+    with offload_models(pipeline.text_encoder, pipeline.vae, device=target_device, offload=True):
+        # don't use the same seed as in training just to be sure
+        seed = train_config.seed + 100_000
+        generator = torch.Generator(device=target_device).manual_seed(seed)
+        pbar = tqdm(
+            enumerate(train_config.sample_image_prompts, start=1), total=len(train_config.sample_image_prompts)
+        )
+        for idx, prompt in pbar:
+            image_path = os.path.join(sample_image_dir, f"{file_stem}_{idx:02d}.png")
+            outputs = _generate_images(pipeline, generator=generator, prompts=[prompt], config=train_config)
+            outputs.images[0].save(image_path)
+
+
+def main(*, path_experiment: str, experiment_name: str, clean: bool, bucket_name: Optional[str]) -> None:
+    tic_total = time.perf_counter()
+    start_date = dt.datetime.now(tz=dt.timezone.utc).replace(microsecond=0).isoformat()
+
+    peft_branch = get_peft_branch()
+    if peft_branch == "main":
+        print_verbose("===== This experiment is categorized as a MAIN run because the PEFT branch is 'main' ======")
+    else:
+        print_verbose(
+            f"===== This experiment is categorized as a TEST run because the PEFT branch is '{peft_branch}' ======"
+        )
+
+    peft_config: Optional[PeftConfig] = None
+    if os.path.exists(os.path.join(path_experiment, CONFIG_NAME)):
+        peft_config = PeftConfig.from_pretrained(path_experiment)
+    else:
+        print_verbose(f"Could not find PEFT config at {path_experiment}, performing FULL FINETUNING")
+
+    path_train_config = os.path.join(path_experiment, FILE_NAME_TRAIN_PARAMS)
+    train_config = get_train_config(path_train_config)
+    accelerator_memory_init = init_accelerator()
+    set_seed(train_config.seed)
+
+    model_info = get_base_model_info(train_config.model_id)
+    dataset_info = get_dataset_info(train_config.dataset_id)
+    pipeline = get_pipeline(
+        model_id=train_config.model_id,
+        dtype=train_config.dtype,
+        compile=train_config.compile,
+        peft_config=peft_config,
+        autocast_adapter_dtype=train_config.autocast_adapter_dtype,
+        use_gc=train_config.use_gc,
+    )
+    print_verbose(pipeline.transformer)
+
+    train_result = train(
+        pipeline=pipeline,
+        train_config=train_config,
+        accelerator_memory_init=accelerator_memory_init,
+        is_adalora=peft_config is not None and peft_config.peft_type == "ADALORA",
+        print_verbose=print_verbose,
+    )
+
+    if train_result.status == TrainStatus.FAILED:
+        print_verbose("Training failed, not logging results")
+        sys.exit(1)
+
+    file_size = get_file_size(pipeline.transformer, peft_config=peft_config, clean=clean, print_fn=print_verbose)
+
+    time_total = time.perf_counter() - tic_total
+    log_results(
+        experiment_name=experiment_name,
+        train_result=train_result,
+        time_total=time_total,
+        file_size=file_size,
+        model_info=model_info,
+        dataset_info=dataset_info,
+        start_date=start_date,
+        train_config=train_config,
+        peft_config=peft_config,
+        print_fn=print_verbose,
+    )
+
+    if (train_result.status == TrainStatus.SUCCESS) and train_config.sample_image_prompts:
+        print_verbose("Generating sample images")
+        try:
+            sample_image_dir = get_sample_image_save_dir(train_status=train_result.status, peft_branch=peft_branch)
+            file_stem = get_artifact_stem(experiment_name, start_date, sample_image_dir)
+            generate_sample_images(
+                pipeline=pipeline,
+                train_config=train_config,
+                sample_image_dir=sample_image_dir,
+                file_stem=file_stem,
+                print_verbose=print_verbose,
+            )
+            print_verbose(f"Stored sample images in {sample_image_dir}")
+        except Exception as exc:
+            print_verbose(f"Sample image generation failed: {exc}")
+
+    if bucket_name:
+        huggingface_hub.create_bucket(bucket_name, exist_ok=True)
+        upload_checkpoint_to_bucket(pipeline.transformer, experiment_name, bucket_name)
+        upload_images_to_bucket(bucket_name)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose output")
+    parser.add_argument("path_experiment", type=str, help="Path to the experiment directory")
+    parser.add_argument(
+        "--clean",
+        action="store_true",
+        help="Delete training artifacts after run finishes (logs are still saved)",
+    )
+    parser.add_argument("--bucket_name", type=str, help="HF bucket to upload checkpoints and images to.")
+    args = parser.parse_args()
+
+    experiment_name = validate_experiment_path(args.path_experiment)
+
+    if args.verbose:
+
+        def print_verbose(*args, **kwargs) -> None:
+            kwargs["file"] = sys.stderr
+            print(*args, **kwargs)
+    else:
+
+        def print_verbose(*args, **kwargs) -> None:
+            pass
+
+    main(
+        path_experiment=args.path_experiment,
+        experiment_name=experiment_name,
+        clean=args.clean,
+        bucket_name=args.bucket_name,
+    )
diff --git a/method_comparison/image-gen/sample-images/cancelled_results/.gitkeep b/method_comparison/image-gen/sample-images/cancelled_results/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/method_comparison/image-gen/sample-images/results/.gitkeep b/method_comparison/image-gen/sample-images/results/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/method_comparison/image-gen/sample-images/temporary_results/.gitkeep b/method_comparison/image-gen/sample-images/temporary_results/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/method_comparison/image-gen/temporary_results/.gitkeep b/method_comparison/image-gen/temporary_results/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/method_comparison/image-gen/utils.py b/method_comparison/image-gen/utils.py
new file mode 100644
index 0000000000..650d388b5c
--- /dev/null
+++ b/method_comparison/image-gen/utils.py
@@ -0,0 +1,594 @@
+# Copyright 2026-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utilities for the image generation benchmark."""
+
+import copy
+import enum
+import json
+import os
+import platform
+import subprocess
+import tempfile
+import warnings
+from collections.abc import Callable
+from dataclasses import asdict, dataclass
+from typing import Any, Literal, Optional
+
+import datasets
+import diffusers
+import huggingface_hub
+import numpy as np
+import torch
+import transformers
+from diffusers import Flux2KleinPipeline
+from torch import nn
+from transformers import AutoImageProcessor, AutoModel, get_cosine_schedule_with_warmup
+
+import peft
+from peft import PeftConfig, get_peft_model
+from peft.optimizers import create_lorafa_optimizer, create_loraplus_optimizer
+from peft.utils import SAFETENSORS_WEIGHTS_NAME, infer_device
+
+
+device = infer_device()
+
+if device not in ["cuda", "xpu"]:
+    raise RuntimeError("CUDA or XPU is not available, currently only CUDA or XPU is supported")
+
+ACCELERATOR_MEMORY_INIT_THRESHOLD = 500 * 2**20  # 500MB
+FILE_NAME_DEFAULT_TRAIN_PARAMS = os.path.join(os.path.dirname(__file__), "default_training_params.json")
+FILE_NAME_TRAIN_PARAMS = "training_params.json"
+RESULT_PATH = os.path.join(os.path.dirname(__file__), "results")
+RESULT_PATH_TEST = os.path.join(os.path.dirname(__file__), "temporary_results")
+RESULT_PATH_CANCELLED = os.path.join(os.path.dirname(__file__), "cancelled_results")
+SAMPLE_IMAGE_PATH = os.path.join(os.path.dirname(__file__), "sample-images")
+SAMPLE_IMAGE_PATH_MAIN = os.path.join(SAMPLE_IMAGE_PATH, "results")
+SAMPLE_IMAGE_PATH_TEST = os.path.join(SAMPLE_IMAGE_PATH, "temporary_results")
+SAMPLE_IMAGE_PATH_CANCELLED = os.path.join(SAMPLE_IMAGE_PATH, "cancelled_results")
+hf_api = huggingface_hub.HfApi()
+WARMUP_STEP_RATIO = 0.1
+
+
+@dataclass
+class TrainConfig:
+    """All configuration parameters associated with training the model
+
+    Args:
+        model_id: The model identifier, should not be changed
+        dataset_id: The dataset identifier, should not be changed
+        dataset_split: The dataset split to use, should not be changed
+        dtype: The data type to use for the model
+        resolution: The image resolution
+        batch_size: The batch size for training
+        batch_size_eval: The batch size for eval/test
+        repeats: The number of repeats for the dataset (if there are more steps than train samples)
+        max_steps: The maximum number of steps to train
+        eval_steps: The number of steps between evaluations
+        compile: Whether to compile the model
+        use_gc: Whether to use gradient checkpointing.
+        seed: The random seed
+        grad_norm_clip: The gradient norm clipping value (set to 0 to skip)
+        optimizer_type: The name of a torch optimizer (e.g. AdamW) or a PEFT method ("lora+", "lora-fa")
+        optimizer_kwargs: The optimizer keyword arguments (lr etc.)
+        lr_scheduler: The learning rate scheduler (currently only None or 'cosine' are supported)
+        use_amp: Whether to use automatic mixed precision
+        autocast_adapter_dtype: Whether to cast adapter dtype to float32, same argument as in PEFT
+        instance_prompts: The prompt(s) used for training instances
+        image_column: The column name for images in the dataset
+        valid_size: The validation set size
+        test_size: The test set size
+        num_inference_steps: The number of inference steps for image generation
+        guidance_scale: The guidance scale for image generation
+        max_sequence_length: The maximum sequence length for the text encoder
+        text_encoder_out_layers: The output layers of the text encoder to use
+        weighting_scheme: The weighting scheme for the loss
+        logit_mean: The logit mean for logit_normal weighting
+        logit_std: The logit std for logit_normal weighting
+        mode_scale: The mode scale for mode weighting
+        dino_model_id: The DINO model identifier for evaluation
+        dino_image_size: The image size for the DINO model
+        sample_image_prompts: The prompts used for generating sample images, should not be changed
+        drift_image_prompts: The prompts used for measuring drift, should not be changed
+    """
+
+    model_id: str
+    dataset_id: str
+    dataset_split: str
+    dtype: Literal["float32", "float16", "bfloat16"]
+    resolution: int
+    batch_size: int
+    batch_size_eval: int
+    repeats: int
+    max_steps: int
+    eval_steps: int
+    compile: bool
+    use_gc: bool
+    seed: int
+    grad_norm_clip: float
+    optimizer_type: str
+    optimizer_kwargs: dict[str, Any]
+    lr_scheduler: Optional[Literal["cosine"]]
+    use_amp: bool
+    autocast_adapter_dtype: bool
+    instance_prompts: str | list[str]
+    image_column: str
+    valid_size: int
+    test_size: int
+    num_inference_steps: int
+    guidance_scale: float
+    max_sequence_length: int
+    text_encoder_out_layers: list[int]
+    weighting_scheme: Literal["none", "sigma_sqrt", "logit_normal", "mode"]
+    logit_mean: float
+    logit_std: float
+    mode_scale: float
+    dino_model_id: str
+    dino_image_size: int
+    sample_image_prompts: list[str]
+    drift_image_prompts: list[str]
+
+    def __post_init__(self) -> None:
+        if self.dtype not in ["float32", "float16", "bfloat16"]:
+            raise ValueError(f"Invalid dtype: {self.dtype}")
+        if self.batch_size <= 0:
+            raise ValueError(f"Invalid batch_size: {self.batch_size}")
+        if self.batch_size_eval <= 0:
+            raise ValueError(f"Invalid batch_size_eval: {self.batch_size_eval}")
+        if self.repeats <= 0:
+            raise ValueError(f"Invalid repeats: {self.repeats}")
+        if self.max_steps <= 0:
+            raise ValueError(f"Invalid max_steps: {self.max_steps}")
+        if self.eval_steps <= 0:
+            raise ValueError(f"Invalid eval_steps: {self.eval_steps}")
+        if self.eval_steps > self.max_steps:
+            raise ValueError(f"Invalid eval_steps: {self.eval_steps} > max_steps: {self.max_steps}")
+        if self.grad_norm_clip < 0:
+            raise ValueError(f"Invalid grad_norm_clip: {self.grad_norm_clip}")
+        if self.optimizer_type not in ["lora+", "lora-fa"] and not hasattr(torch.optim, self.optimizer_type):
+            raise ValueError(f"Invalid optimizer_type: {self.optimizer_type}")
+        if self.lr_scheduler not in [None, "cosine"]:
+            raise ValueError(f"Invalid lr_scheduler: {self.lr_scheduler}, must be None or 'cosine'")
+
+
+def validate_experiment_path(path: str) -> str:
+    if not os.path.exists(FILE_NAME_DEFAULT_TRAIN_PARAMS):
+        raise FileNotFoundError(f"Missing default training params file '{FILE_NAME_DEFAULT_TRAIN_PARAMS}'")
+    if not os.path.exists(path):
+        raise FileNotFoundError(f"Path {path} does not exist")
+
+    path_parts = path.rstrip(os.path.sep).split(os.path.sep)
+    if (len(path_parts) != 3) or (path_parts[-3] != "experiments"):
+        raise ValueError(
+            f"Path {path} does not have the correct structure, should be ./experiments/<peft-method>/<experiment-name>"
+        )
+
+    experiment_name = os.path.join(*path_parts[-2:])
+    return experiment_name
+
+
+def get_train_config(path: str) -> TrainConfig:
+    with open(FILE_NAME_DEFAULT_TRAIN_PARAMS) as f:
+        default_config_kwargs = json.load(f)
+
+    config_kwargs = {}
+    if os.path.exists(path):
+        with open(path) as f:
+            config_kwargs = json.load(f)
+
+    config_kwargs = {**default_config_kwargs, **config_kwargs}
+    return TrainConfig(**config_kwargs)
+
+
+def init_accelerator() -> int:
+    torch_accelerator_module = getattr(torch, device, torch.cuda)
+    torch.manual_seed(0)
+    torch_accelerator_module.reset_peak_memory_stats()
+    torch_accelerator_module.manual_seed_all(0)
+    nn.Linear(1, 1).to(device)
+
+    accelerator_memory_init = torch_accelerator_module.max_memory_reserved()
+    if accelerator_memory_init > ACCELERATOR_MEMORY_INIT_THRESHOLD:
+        raise RuntimeError(
+            f"{device} memory usage at start is too high: {accelerator_memory_init // 2**20}MB, "
+            f"please ensure that no other processes are running on {device}."
+        )
+
+    torch_accelerator_module.reset_peak_memory_stats()
+    accelerator_memory_init = torch_accelerator_module.max_memory_reserved()
+    return accelerator_memory_init
+
+
+def get_torch_dtype(dtype: Literal["float32", "float16", "bfloat16"]) -> torch.dtype:
+    if dtype == "float32":
+        return torch.float32
+    if dtype == "float16":
+        return torch.float16
+    return torch.bfloat16
+
+
+def get_pipeline(
+    *,
+    model_id: str,
+    dtype: Literal["float32", "float16", "bfloat16"],
+    compile: bool,
+    peft_config: Optional[PeftConfig],
+    autocast_adapter_dtype: bool,
+    use_gc: bool,
+):
+    torch_dtype = get_torch_dtype(dtype)
+    pipeline = Flux2KleinPipeline.from_pretrained(model_id, torch_dtype=torch_dtype)
+    pipeline.set_progress_bar_config(disable=True)
+    if use_gc:
+        pipeline.transformer.enable_gradient_checkpointing()
+
+    pipeline.vae.requires_grad_(False)
+    pipeline.text_encoder.requires_grad_(False)
+
+    transformer = pipeline.transformer
+    if peft_config is None:
+        transformer.requires_grad_(True)
+    else:
+        transformer = get_peft_model(transformer, peft_config, autocast_adapter_dtype=autocast_adapter_dtype)
+        pipeline.transformer = transformer
+
+    if compile:
+        pipeline.transformer = torch.compile(pipeline.transformer, dynamic=True)
+
+    pipeline.transformer.train()
+    pipeline.vae.eval()
+    pipeline.text_encoder.eval()
+    return pipeline
+
+
+class DummyScheduler:
+    def __init__(self, lr):
+        self.lr = lr
+
+    def get_last_lr(self):
+        return [self.lr]
+
+    def step(self):
+        pass
+
+
+def get_optimizer_and_scheduler(
+    model, *, optimizer_type: str, max_steps: int, lr_scheduler_arg: Optional[Literal["cosine"]], **optimizer_kwargs
+) -> tuple[torch.optim.Optimizer, Any]:
+    if optimizer_type == "lora+":
+        optimizer = create_loraplus_optimizer(model, optimizer_cls=torch.optim.AdamW, **optimizer_kwargs)
+    elif optimizer_type == "lora-fa":
+        optimizer = create_lorafa_optimizer(model, **optimizer_kwargs)
+    else:
+        cls = getattr(torch.optim, optimizer_type)
+        optimizer = cls(model.parameters(), **optimizer_kwargs)
+
+    if lr_scheduler_arg == "cosine":
+        warmup_steps = int(WARMUP_STEP_RATIO * max_steps)
+        lr_scheduler = get_cosine_schedule_with_warmup(
+            optimizer, num_warmup_steps=warmup_steps, num_training_steps=max_steps
+        )
+    elif lr_scheduler_arg is None:
+        lr_scheduler = DummyScheduler(optimizer_kwargs["lr"])
+    else:
+        raise ValueError(f"Invalid lr_scheduler argument: {lr_scheduler_arg}")
+
+    return optimizer, lr_scheduler
+
+
+def upload_checkpoint_to_bucket(model: nn.Module, experiment_name: str, bucket_name: str):
+    """Uploads model checkpoint to Hugging Face Bucket"""
+    try:
+        with tempfile.TemporaryDirectory(ignore_cleanup_errors=True, delete=True) as tmp_dir:
+            model.save_pretrained(tmp_dir)
+            huggingface_hub.batch_bucket_files(
+                bucket_name,
+                add=[
+                    (os.path.join(tmp_dir, fname), f"checkpoints/{experiment_name}/{fname}")
+                    for fname in os.listdir(tmp_dir)
+                ],
+            )
+    except Exception as exc:
+        print(f"Failed to upload model checkpoint to hub: {exc}")
+
+
+def upload_images_to_bucket(bucket_name: str):
+    """Syncs test images (only main runs) with Hugging Face Bucket"""
+    try:
+        huggingface_hub.sync_bucket(SAMPLE_IMAGE_PATH, f"hf://buckets/{bucket_name}/sample-images", delete=False)
+    except Exception as exc:
+        print(f"Failed to upload sample images to hub: {exc}")
+
+
+def get_file_size(
+    transformer: nn.Module, *, peft_config: Optional[PeftConfig], clean: bool, print_fn: Callable[..., None]
+) -> int:
+    file_size = 99999999
+    if peft_config is not None:
+        try:
+            with tempfile.TemporaryDirectory(ignore_cleanup_errors=True, delete=clean) as tmp_dir:
+                transformer.save_pretrained(tmp_dir)
+                stat = os.stat(os.path.join(tmp_dir, SAFETENSORS_WEIGHTS_NAME))
+                file_size = stat.st_size
+                if not clean:
+                    print_fn(f"Saved PEFT checkpoint to {tmp_dir}")
+        except Exception as exc:
+            print(f"Failed to save PEFT checkpoint due to the following error: {exc}")
+    else:
+        print_fn("Not saving full model checkpoint because it is too large, estimating size instead")
+        try:
+            num_params = sum(param.numel() for param in transformer.parameters())
+            dtype_size = next(transformer.parameters()).element_size()
+            file_size = num_params * dtype_size
+        except Exception as exc:
+            print(f"Failed to determine file size for fully finetuned model because of: {exc}")
+    return file_size
+
+
+def get_base_model_info(model_id: str) -> Optional[huggingface_hub.ModelInfo]:
+    try:
+        return hf_api.model_info(model_id)
+    except Exception as exc:
+        warnings.warn(f"Could not retrieve model info, failed with error {exc}")
+        return None
+
+
+def get_dataset_info(dataset_id: str) -> Optional[huggingface_hub.DatasetInfo]:
+    try:
+        return hf_api.dataset_info(dataset_id)
+    except Exception as exc:
+        warnings.warn(f"Could not retrieve dataset info, failed with error {exc}")
+        return None
+
+
+def get_git_hash(module) -> Optional[str]:
+    module_path = module.__path__[0]
+    if "site-packages" in module_path:
+        return None
+    return subprocess.check_output("git rev-parse HEAD".split(), cwd=os.path.dirname(module.__file__)).decode().strip()
+
+
+def get_package_info() -> dict[str, Optional[str]]:
+    package_info = {
+        "transformers-version": transformers.__version__,
+        "transformers-commit-hash": get_git_hash(transformers),
+        "peft-version": peft.__version__,
+        "peft-commit-hash": get_git_hash(peft),
+        "datasets-version": datasets.__version__,
+        "datasets-commit-hash": get_git_hash(datasets),
+        "diffusers-version": diffusers.__version__,
+        "diffusers-commit-hash": get_git_hash(diffusers),
+        "torch-version": torch.__version__,
+        "torch-commit-hash": get_git_hash(torch),
+    }
+    return package_info
+
+
+def get_system_info() -> dict[str, str]:
+    torch_accelerator_module = getattr(torch, device, torch.cuda)
+    system_info = {
+        "system": platform.system(),
+        "release": platform.release(),
+        "version": platform.version(),
+        "machine": platform.machine(),
+        "processor": platform.processor(),
+        "accelerator": torch_accelerator_module.get_device_name(0),
+    }
+    return system_info
+
+
+@dataclass
+class MetaInfo:
+    package_info: dict[str, Optional[str]]
+    system_info: dict[str, str]
+    pytorch_info: str
+
+
+def get_meta_info() -> MetaInfo:
+    meta_info = MetaInfo(
+        package_info=get_package_info(),
+        system_info=get_system_info(),
+        pytorch_info=torch.__config__.show(),
+    )
+    return meta_info
+
+
+def get_peft_branch() -> str:
+    return (
+        subprocess.check_output("git rev-parse --abbrev-ref HEAD".split(), cwd=os.path.dirname(peft.__file__))
+        .decode()
+        .strip()
+    )
+
+
+class TrainStatus(enum.Enum):
+    FAILED = "failed"
+    SUCCESS = "success"
+    CANCELED = "canceled"
+
+
+@dataclass
+class TrainResult:
+    status: TrainStatus
+    train_time: float
+    accelerator_memory_reserved_log: list[int]
+    accelerator_memory_max_train: int
+    losses: list[float]
+    metrics: list[Any]
+    error_msg: str
+    num_trainable_params: int
+    num_total_params: int
+
+
+def get_dino_encoder(model_id: str, image_size: int):
+    processor = AutoImageProcessor.from_pretrained(model_id)
+    model = AutoModel.from_pretrained(model_id).to(device)
+    model.eval()
+    return processor, model
+
+
+@torch.inference_mode()
+def get_dino_embeddings(images, processor, model, batch_size: int):
+    embeddings = []
+    for i in range(0, len(images), batch_size):
+        batch_images = images[i : i + batch_size]
+        inputs = processor(images=batch_images, return_tensors="pt").to(model.device)
+        hidden_state = model(**inputs).last_hidden_state[:, 0]
+        hidden_state = torch.nn.functional.normalize(hidden_state, dim=-1)
+        embeddings.append(hidden_state)
+    return torch.cat(embeddings, dim=0)
+
+
+def log_to_console(log_data: dict[str, Any], print_fn: Callable[..., None]) -> None:
+    accelerator_memory_max = log_data["train_info"]["accelerator_memory_max"]
+    accelerator_memory_avg = log_data["train_info"]["accelerator_memory_reserved_avg"]
+    accelerator_memory_reserved_99th = log_data["train_info"]["accelerator_memory_reserved_99th"]
+    time_train = log_data["train_info"]["train_time"]
+    time_total = log_data["run_info"]["total_time"]
+    file_size = log_data["train_info"]["file_size"]
+
+    print_fn(f"accelerator memory max: {accelerator_memory_max // 2**20}MB")
+    print_fn(f"accelerator memory reserved avg: {accelerator_memory_avg // 2**20}MB")
+    print_fn(f"accelerator memory reserved 99th percentile: {accelerator_memory_reserved_99th // 2**20}MB")
+    print_fn(f"train time: {time_train}s")
+    print_fn(f"total time: {time_total:.2f}s")
+    print_fn(f"file size of checkpoint: {file_size / 2**20:.1f}MB")
+
+
+def log_to_file(
+    *, log_data: dict, save_dir: str, experiment_name: str, timestamp: str, print_fn: Callable[..., None]
+) -> None:
+    file_name = os.path.join(save_dir, f"{get_artifact_stem(experiment_name, timestamp, save_dir)}.json")
+    with open(file_name, "w") as f:
+        json.dump(log_data, f, indent=2)
+    print_fn(f"Saved log to: {file_name}")
+
+
+def get_result_save_dir(*, train_status: TrainStatus, peft_branch: str) -> str:
+    if train_status == TrainStatus.CANCELED:
+        return RESULT_PATH_CANCELLED
+    if peft_branch != "main":
+        return RESULT_PATH_TEST
+    if train_status == TrainStatus.SUCCESS:
+        return RESULT_PATH
+    return tempfile.mkdtemp()
+
+
+def get_sample_image_save_dir(*, train_status: TrainStatus, peft_branch: str) -> str:
+    if train_status == TrainStatus.CANCELED:
+        return SAMPLE_IMAGE_PATH_CANCELLED
+    if peft_branch != "main":
+        return SAMPLE_IMAGE_PATH_TEST
+    if train_status == TrainStatus.SUCCESS:
+        return SAMPLE_IMAGE_PATH_MAIN
+    return tempfile.mkdtemp()
+
+
+def get_artifact_stem(experiment_name: str, timestamp: str, save_dir: str) -> str:
+    experiment_name = experiment_name.replace(os.path.sep, "--")
+    if save_dir.endswith(RESULT_PATH) or save_dir.endswith(SAMPLE_IMAGE_PATH_MAIN):
+        return experiment_name
+    return f"{experiment_name}--{timestamp.replace(':', '-')}"
+
+
+def log_results(
+    *,
+    experiment_name: str,
+    train_result: TrainResult,
+    time_total: float,
+    file_size: int,
+    model_info: Optional[huggingface_hub.ModelInfo],
+    dataset_info: Optional[huggingface_hub.DatasetInfo],
+    start_date: str,
+    train_config: TrainConfig,
+    peft_config: Optional[PeftConfig],
+    print_fn: Callable[..., None],
+) -> None:
+    if train_result.accelerator_memory_reserved_log:
+        accelerator_memory_avg = int(
+            sum(train_result.accelerator_memory_reserved_log) / len(train_result.accelerator_memory_reserved_log)
+        )
+        accelerator_memory_reserved_99th = int(np.percentile(train_result.accelerator_memory_reserved_log, 99))
+    else:
+        accelerator_memory_avg = 0
+        accelerator_memory_reserved_99th = 0
+
+    meta_info = get_meta_info()
+    if model_info is not None:
+        model_sha = model_info.sha
+        model_created_at = model_info.created_at.isoformat()
+    else:
+        model_sha = None
+        model_created_at = None
+
+    if dataset_info is not None:
+        dataset_sha = dataset_info.sha
+        dataset_created_at = dataset_info.created_at.isoformat()
+    else:
+        dataset_sha = None
+        dataset_created_at = None
+
+    peft_branch = get_peft_branch()
+
+    save_dir = get_result_save_dir(train_status=train_result.status, peft_branch=peft_branch)
+
+    if save_dir == RESULT_PATH_CANCELLED:
+        print_fn("Experiment run was categorized as canceled")
+    elif save_dir == RESULT_PATH_TEST:
+        print_fn(f"Experiment run was categorized as a test run on branch {peft_branch}")
+    elif save_dir == RESULT_PATH:
+        print_fn("Experiment run was categorized as successful run")
+    else:
+        print_fn(f"Experiment could not be categorized, writing results to {save_dir}. Please open an issue on PEFT.")
+
+    if peft_config is None:
+        peft_config_dict: Optional[dict[str, Any]] = None
+    else:
+        peft_config_dict = copy.deepcopy(peft_config.to_dict())
+        for key, value in peft_config_dict.items():
+            if isinstance(value, set):
+                peft_config_dict[key] = list(value)
+
+    log_data = {
+        "run_info": {
+            "created_at": start_date,
+            "total_time": time_total,
+            "experiment_name": experiment_name,
+            "peft_branch": peft_branch,
+            "train_config": asdict(train_config),
+            "peft_config": peft_config_dict,
+            "error_msg": train_result.error_msg,
+        },
+        "train_info": {
+            "accelerator_memory_reserved_avg": accelerator_memory_avg,
+            "accelerator_memory_max": train_result.accelerator_memory_max_train,
+            "accelerator_memory_reserved_99th": accelerator_memory_reserved_99th,
+            "train_time": train_result.train_time,
+            "file_size": file_size,
+            "num_trainable_params": train_result.num_trainable_params,
+            "num_total_params": train_result.num_total_params,
+            "status": train_result.status.value,
+            "metrics": train_result.metrics,
+        },
+        "meta_info": {
+            "model_info": {"sha": model_sha, "created_at": model_created_at},
+            "dataset_info": {"sha": dataset_sha, "created_at": dataset_created_at},
+            **asdict(meta_info),
+        },
+    }
+
+    log_to_console(log_data, print_fn=print)
+    log_to_file(
+        log_data=log_data, save_dir=save_dir, experiment_name=experiment_name, timestamp=start_date, print_fn=print_fn
+    )
diff --git a/method_comparison/processing.py b/method_comparison/processing.py
index ea2b8caf40..30712918dd 100644
--- a/method_comparison/processing.py
+++ b/method_comparison/processing.py
@@ -20,53 +20,87 @@
 import pandas as pd
 
 
+def _preprocess_common(row):
+    """Extract fields common to all tasks from a single result row.
+
+    Returns a tuple of metainfo dict and train metrics, or None if the row should be skipped.
+    """
+    run_info = row["run_info"]
+    train_info = row["train_info"]
+    meta_info = row["meta_info"]
+    if run_info["peft_config"]:
+        peft_type = run_info["peft_config"]["peft_type"]
+    else:
+        peft_type = "full-finetuning"
+    if train_info["status"] != "success":
+        return None
+
+    train_metrics = train_info["metrics"][-1]
+
+    dct = {
+        "experiment_name": run_info["experiment_name"],
+        "model_id": run_info["train_config"]["model_id"],
+        "train_config": run_info["train_config"],
+        "peft_type": peft_type,
+        "peft_config": run_info["peft_config"],
+        "accelerator_memory_reserved_avg": train_info["accelerator_memory_reserved_avg"],
+        "accelerator_memory_max": train_info["accelerator_memory_max"],
+        "accelerator_memory_reserved_99th": train_info["accelerator_memory_reserved_99th"],
+        "total_time": run_info["total_time"],
+        "train_time": train_info["train_time"],
+        "file_size": train_info["file_size"],
+        "num_trainable_params": train_info["num_trainable_params"],
+        "train_loss": train_metrics["train loss"],
+        "train_samples": train_metrics["train samples"],
+        "peft_version": meta_info["package_info"]["peft-version"],
+        "peft_branch": run_info["peft_branch"],
+        "transformers_version": meta_info["package_info"]["transformers-version"],
+        "datasets_version": meta_info["package_info"]["datasets-version"],
+        "torch_version": meta_info["package_info"]["torch-version"],
+        "package_info": meta_info["package_info"],
+        "system_info": meta_info["system_info"],
+        "created_at": run_info["created_at"],
+    }
+    return dct, train_metrics
+
+
+def _preprocess_metamathqa(dct, train_metrics, meta_info):
+    """Add MetaMathQA-specific fields."""
+    dct["test_accuracy"] = train_metrics["test accuracy"]
+    dct["train_total_tokens"] = train_metrics["train total tokens"]
+    dct["forgetting*"] = train_metrics.get("forgetting", 123)
+    dct["bitsandbytes_version"] = meta_info["package_info"]["bitsandbytes-version"]
+
+
+def _preprocess_image_gen(dct, train_metrics, meta_info):
+    """Add image-gen-specific fields."""
+    dct["test_dino_similarity"] = train_metrics["test dino_similarity"]
+    dct["drift*"] = train_metrics.get("drift", 123)
+    dct["diffusers_version"] = meta_info["package_info"]["diffusers-version"]
+
+
+_TASK_PREPROCESSORS = {
+    "MetaMathQA": _preprocess_metamathqa,
+    "image-gen": _preprocess_image_gen,
+}
+
+
 def preprocess(rows, task_name: str, print_fn=print):
+    task_preprocessor = _TASK_PREPROCESSORS.get(task_name)
+    if task_preprocessor is None:
+        raise ValueError(f"Unknown task_name: {task_name!r}. Choose from {list(_TASK_PREPROCESSORS)}")
+
     results = []
     skipped = 0
     for row in rows:
-        run_info = row["run_info"]
-        train_info = row["train_info"]
-        meta_info = row["meta_info"]
-        if run_info["peft_config"]:
-            peft_type = run_info["peft_config"]["peft_type"]
-        else:
-            peft_type = "full-finetuning"
-        if train_info["status"] != "success":
+        common = _preprocess_common(row)
+        if common is None:
             skipped += 1
             continue
 
-        train_metrics = train_info["metrics"][-1]
-
-        # extract the fields that make most sense
-        dct = {
-            "task_name": task_name,
-            "experiment_name": run_info["experiment_name"],
-            "model_id": run_info["train_config"]["model_id"],
-            "train_config": run_info["train_config"],
-            "peft_type": peft_type,
-            "peft_config": run_info["peft_config"],
-            "accelerator_memory_reserved_avg": train_info["accelerator_memory_reserved_avg"],
-            "accelerator_memory_max": train_info["accelerator_memory_max"],
-            "accelerator_memory_reserved_99th": train_info["accelerator_memory_reserved_99th"],
-            "total_time": run_info["total_time"],
-            "train_time": train_info["train_time"],
-            "file_size": train_info["file_size"],
-            "num_trainable_params": train_info["num_trainable_params"],
-            "test_accuracy": train_metrics["test accuracy"],
-            "train_loss": train_metrics["train loss"],
-            "train_samples": train_metrics["train samples"],
-            "train_total_tokens": train_metrics["train total tokens"],
-            "forgetting*": train_metrics.get("forgetting", 123),
-            "peft_version": meta_info["package_info"]["peft-version"],
-            "peft_branch": run_info["peft_branch"],
-            "transformers_version": meta_info["package_info"]["transformers-version"],
-            "datasets_version": meta_info["package_info"]["datasets-version"],
-            "torch_version": meta_info["package_info"]["torch-version"],
-            "bitsandbytes_version": meta_info["package_info"]["bitsandbytes-version"],
-            "package_info": meta_info["package_info"],
-            "system_info": meta_info["system_info"],
-            "created_at": run_info["created_at"],
-        }
+        dct, train_metrics = common
+        dct["task_name"] = task_name
+        task_preprocessor(dct, train_metrics, row["meta_info"])
         results.append(dct)
 
     if skipped:
@@ -85,47 +119,48 @@ def load_jsons(path):
     return results
 
 
-def load_df(path, task_name, print_fn=print):
-    jsons = load_jsons(path)
-    preprocessed = preprocess(jsons, task_name=task_name, print_fn=print_fn)
-    dtype_dict = {
-        "task_name": "string",
-        "experiment_name": "string",
-        "model_id": "string",
-        "train_config": "string",
-        "peft_type": "string",
-        "peft_config": "string",
-        "accelerator_memory_reserved_avg": int,
-        "accelerator_memory_max": int,
-        "accelerator_memory_reserved_99th": int,
-        "total_time": float,
-        "train_time": float,
-        "file_size": int,
+_COMMON_DTYPES = {
+    "task_name": "string",
+    "experiment_name": "string",
+    "model_id": "string",
+    "train_config": "string",
+    "peft_type": "string",
+    "peft_config": "string",
+    "accelerator_memory_reserved_avg": int,
+    "accelerator_memory_max": int,
+    "accelerator_memory_reserved_99th": int,
+    "total_time": float,
+    "train_time": float,
+    "file_size": int,
+    "train_loss": float,
+    "train_samples": int,
+    "num_trainable_params": int,
+    "peft_version": "string",
+    "peft_branch": "string",
+    "transformers_version": "string",
+    "datasets_version": "string",
+    "torch_version": "string",
+    "package_info": "string",
+    "system_info": "string",
+    "created_at": "string",
+}
+
+_TASK_DTYPES = {
+    "MetaMathQA": {
         "test_accuracy": float,
-        "train_loss": float,
-        "train_samples": int,
         "train_total_tokens": int,
         "forgetting*": float,
-        "num_trainable_params": int,
-        "peft_version": "string",
-        "peft_branch": "string",
-        "transformers_version": "string",
-        "datasets_version": "string",
-        "torch_version": "string",
         "bitsandbytes_version": "string",
-        "package_info": "string",
-        "system_info": "string",
-        "created_at": "string",
-    }
-    df = pd.DataFrame(preprocessed)
-    df = df.astype(dtype_dict)
-    df["created_at"] = pd.to_datetime(df["created_at"])
-    # round training time to nearest second
-    df["train_time"] = df["train_time"].round().astype(int)
-    df["total_time"] = df["total_time"].round().astype(int)
+    },
+    "image-gen": {
+        "test_dino_similarity": float,
+        "drift*": float,
+        "diffusers_version": "string",
+    },
+}
 
-    # reorder columns for better viewing, pinned_columns arg in Gradio seems not to work correctly
-    important_columns = [
+_TASK_IMPORTANT_COLUMNS = {
+    "MetaMathQA": [
         "experiment_name",
         "peft_type",
         "total_time",
@@ -140,7 +175,41 @@ def load_df(path, task_name, print_fn=print):
         "created_at",
         "task_name",
         "forgetting*",
-    ]
+    ],
+    "image-gen": [
+        "experiment_name",
+        "peft_type",
+        "total_time",
+        "train_time",
+        "test_dino_similarity",
+        "drift*",
+        "train_loss",
+        "accelerator_memory_max",
+        "accelerator_memory_reserved_99th",
+        "accelerator_memory_reserved_avg",
+        "num_trainable_params",
+        "file_size",
+        "created_at",
+        "task_name",
+    ],
+}
+
+
+def load_df(path, task_name, print_fn=print):
+    jsons = load_jsons(path)
+    preprocessed = preprocess(jsons, task_name=task_name, print_fn=print_fn)
+    dtype_dict = {**_COMMON_DTYPES, **_TASK_DTYPES.get(task_name, {})}
+    if not preprocessed:
+        return pd.DataFrame(columns=dtype_dict.keys())
+    df = pd.DataFrame(preprocessed)
+    df = df.astype(dtype_dict)
+    df["created_at"] = pd.to_datetime(df["created_at"])
+    # round training time to nearest second
+    df["train_time"] = df["train_time"].round().astype(int)
+    df["total_time"] = df["total_time"].round().astype(int)
+
+    # reorder columns for better viewing, pinned_columns arg in Gradio seems not to work correctly
+    important_columns = _TASK_IMPORTANT_COLUMNS.get(task_name, ["experiment_name", "peft_type"])
     other_columns = [col for col in df if col not in important_columns]
     df = df[important_columns + other_columns]