improve and correct merger operations based on papers

silveroxides · silveroxides · commit b1ec55ff423a · 2026-01-23T18:41:35.000+01:00
diff --git a/docs/merger_2_model_modes.md b/docs/merger_2_model_modes.md
@@ -23,12 +23,47 @@
 ---
 
 ## Power-Up (DARE)
-> Adds the unique capabilities of Model B to Model A using the Drop and Rescale (DARE) technique, which often preserves the knowledge of the base model better than simple additions.
+> Adds the unique capabilities of Model B to Model A using the Drop and Rescale (DARE) technique. This implementation handles shape mismatches between models by padding and uses a randomized dropout mask.
 
 **Models Used:** A, B
 **Parameters:**
-- **Alpha:** The dropout rate. This is the proportion of unique weights from Model B that are *dropped* before merging. A higher value means less of B is merged.
+- **Alpha:** The dropout rate ($p$). This is the proportion of delta parameters from Model B that are randomly set to zero.
 - **Beta:** A final multiplier for the rescaled difference before it's added to Model A.
+- **Rescaling Logic:** Remaining weights are automatically rescaled by $1/(1-p)$ as per the DARE paper to approximate the original embeddings.
+
+---
+
+## Enhanced Man Interp
+> Sophisticated interpolation between values from A and B depending on their difference relative to other values, with manual threshold control.
+
+**Models Used:** A, B
+**Parameters:**
+- **Alpha:** Interpolation strength.
+- **Beta:** Lower mean threshold for filtering differences.
+- **Gamma:** Upper mean threshold for filtering differences.
+- **Delta:** Smoothness factor (mix between randomized mask and powered differences).
+
+---
+
+## Enhanced Auto Interp
+> Automated version of the enhanced interpolation mode that dynamically calculates thresholds based on mean differences.
+
+**Models Used:** A, B
+**Parameters:**
+- **Alpha:** Interpolation strength.
+- **Beta:** Threshold adjustment factor.
+- **Gamma:** Smoothness factor.
+
+---
+
+## Weight-Sum Cutoff
+> A linear interpolation mode that only applies the merge to weights whose differences fall within a specific threshold range.
+
+**Models Used:** A, B
+**Parameters:**
+- **Alpha:** Interpolation weight (multiplier for the difference).
+- **Beta:** Upper threshold for the difference cutoff.
+- **Gamma:** Lower threshold for the difference cutoff.
 
 ---
 
@@ -84,4 +119,4 @@ Layers matching any pattern will be **removed entirely** from the output.
 | `text_model` | All text encoder layers |
 | `\.norm` | All normalization layers |
 | `attn\.(q\|k\|v)` | Query, key, value attention weights |
-| `block\.[0-9]\.` | Blocks 0-9 |
+| `block\.[0-9]\.` | Blocks 0-9 |
diff --git a/docs/merger_3_model_modes.md b/docs/merger_3_model_modes.md
@@ -22,19 +22,19 @@
 ---
 
 ## Extract-Features
-> A powerful mode that identifies features present in both `(B - A)` and `(C - A)` and adds them to A. Allows for fine-grained control over combining aspects based on their similarity.
+> A powerful mode that identifies features present in both `(B - A)` and `(C - A)` and adds them to A. It uses per-vector cosine similarity to decide how much of each feature to keep, allowing for fine-grained control over combining aspects.
 
 **Models Used:** A, B, C
 **Parameters:**
 - **Alpha:** Weights the merge between Model B (`0.0`) and Model C (`1.0`).
 - **Beta:** Controls the focus on similarity (`0.0`) versus dissimilarity (`1.0`).
-- **Gamma:** A bias exponent for similarity. Higher values increase the bias.
+- **Gamma:** A bias exponent for similarity calculation.
 - **Delta:** A final multiplier for the extracted features before they are added to Model A.
 
 ---
 
 ## Add-Dissimilarities
-> Identifies features that are dissimilar between Model B and Model C and adds them to Model A. Useful for combining unique aspects of two different models.
+> Identifies features that are dissimilar between Model B and Model C (relative to A) and adds them to Model A. Useful for combining unique aspects of two different models.
 
 **Models Used:** A, B, C
 **Parameters:**
@@ -80,4 +80,4 @@ Layers matching any pattern will be **removed entirely** from the output.
 **Pattern format:**
 - Whitespace-separated regex patterns
 - Patterns use **substring matching** (not full match)
-- Example: `text_model lora` matches any key containing "text_model" OR "lora"
+- Example: `text_model lora` matches any key containing "text_model" OR "lora"
diff --git a/nodes/merger.py b/nodes/merger.py
@@ -222,6 +222,7 @@ def define_schema(cls):
                 io.Float.Input("alpha", default=0.5, min=-2.0, max=3.0, step=0.01),
                 io.Float.Input("beta", default=0.5, min=-2.0, max=3.0, step=0.01),
                 io.Float.Input("gamma", default=0.99, min=0.0, max=1.0, step=0.001),
+                io.Float.Input("delta", default=0.5, min=-2.0, max=3.0, step=0.01),
                 io.Int.Input("seed", default=0, min=0, max=0xffffffffffffffff),
                 io.String.Input("output_filename", default="merged_2_checkpoint"),
                 io.Combo.Input("save_dtype", options=["fp32", "fp16", "bf16"]),
@@ -238,7 +239,7 @@ def define_schema(cls):
     @classmethod
     def execute(cls, execution_mode: str, model_a: str, model_b: str,
                 calc_mode: str, mismatch_mode: str, alpha: float, beta: float,
-                gamma: float, seed: int, output_filename: str, save_dtype: str,
+                gamma: float, delta: float, seed: int, output_filename: str, save_dtype: str,
                 process_device: str, exclude_patterns: str, discard_patterns: str) -> io.NodeOutput:
         doc = load_documentation_from_file('merger_2_model_modes.md')
         if execution_mode == "DOCUMENTATION ONLY":
@@ -247,7 +248,7 @@ def execute(cls, execution_mode: str, model_a: str, model_b: str,
         recipe_params = {
             "model_a": model_a, "model_b": model_b, "calc_mode": calc_mode,
             "mismatch_mode": mismatch_mode,
-            "alpha": alpha, "beta": beta, "gamma": gamma, "seed": seed,
+            "alpha": alpha, "beta": beta, "gamma": gamma, "delta": delta, "seed": seed,
             "output_filename": output_filename, "save_dtype": save_dtype,
             "device": process_device, "dtype": torch.float32,
             "exclude_patterns": exclude_patterns, "discard_patterns": discard_patterns,
@@ -275,6 +276,7 @@ def define_schema(cls):
                 io.Float.Input("alpha", default=0.5, min=-2.0, max=3.0, step=0.01),
                 io.Float.Input("beta", default=0.5, min=-2.0, max=3.0, step=0.01),
                 io.Float.Input("gamma", default=0.99, min=0.0, max=1.0, step=0.001),
+                io.Float.Input("delta", default=0.5, min=-2.0, max=3.0, step=0.01),
                 io.Int.Input("seed", default=0, min=0, max=0xffffffffffffffff),
                 io.String.Input("output_filename", default="merged_2_model"),
                 io.Combo.Input("save_dtype", options=["fp32", "fp16", "bf16"]),
@@ -291,7 +293,7 @@ def define_schema(cls):
     @classmethod
     def execute(cls, execution_mode: str, model_a: str, model_b: str,
                 calc_mode: str, mismatch_mode: str, alpha: float, beta: float,
-                gamma: float, seed: int, output_filename: str, save_dtype: str,
+                gamma: float, delta: float, seed: int, output_filename: str, save_dtype: str,
                 process_device: str, exclude_patterns: str, discard_patterns: str) -> io.NodeOutput:
         doc = load_documentation_from_file('merger_2_model_modes.md')
         if execution_mode == "DOCUMENTATION ONLY":
@@ -300,7 +302,7 @@ def execute(cls, execution_mode: str, model_a: str, model_b: str,
         recipe_params = {
             "model_a": model_a, "model_b": model_b, "calc_mode": calc_mode,
             "mismatch_mode": mismatch_mode,
-            "alpha": alpha, "beta": beta, "gamma": gamma, "seed": seed,
+            "alpha": alpha, "beta": beta, "gamma": gamma, "delta": delta, "seed": seed,
             "output_filename": output_filename, "save_dtype": save_dtype,
             "device": process_device, "dtype": torch.float32,
             "exclude_patterns": exclude_patterns, "discard_patterns": discard_patterns,
@@ -328,6 +330,7 @@ def define_schema(cls):
                 io.Float.Input("alpha", default=0.5, min=-2.0, max=3.0, step=0.01),
                 io.Float.Input("beta", default=0.5, min=-2.0, max=3.0, step=0.01),
                 io.Float.Input("gamma", default=0.99, min=0.0, max=1.0, step=0.001),
+                io.Float.Input("delta", default=0.5, min=-2.0, max=3.0, step=0.01),
                 io.Int.Input("seed", default=0, min=0, max=0xffffffffffffffff),
                 io.String.Input("output_filename", default="merged_2_textencoder"),
                 io.Combo.Input("save_dtype", options=["fp32", "fp16", "bf16"]),
@@ -344,7 +347,7 @@ def define_schema(cls):
     @classmethod
     def execute(cls, execution_mode: str, model_a: str, model_b: str,
                 calc_mode: str, mismatch_mode: str, alpha: float, beta: float,
-                gamma: float, seed: int, output_filename: str, save_dtype: str,
+                gamma: float, delta: float, seed: int, output_filename: str, save_dtype: str,
                 process_device: str, exclude_patterns: str, discard_patterns: str) -> io.NodeOutput:
         doc = load_documentation_from_file('merger_2_model_modes.md')
         if execution_mode == "DOCUMENTATION ONLY":
@@ -353,7 +356,7 @@ def execute(cls, execution_mode: str, model_a: str, model_b: str,
         recipe_params = {
             "model_a": model_a, "model_b": model_b, "calc_mode": calc_mode,
             "mismatch_mode": mismatch_mode,
-            "alpha": alpha, "beta": beta, "gamma": gamma, "seed": seed,
+            "alpha": alpha, "beta": beta, "gamma": gamma, "delta": delta, "seed": seed,
             "output_filename": output_filename, "save_dtype": save_dtype,
             "device": process_device, "dtype": torch.float32,
             "exclude_patterns": exclude_patterns, "discard_patterns": discard_patterns,
@@ -381,6 +384,7 @@ def define_schema(cls):
                 io.Float.Input("alpha", default=0.5, min=-2.0, max=3.0, step=0.01),
                 io.Float.Input("beta", default=0.5, min=-2.0, max=3.0, step=0.01),
                 io.Float.Input("gamma", default=0.99, min=0.0, max=1.0, step=0.001),
+                io.Float.Input("delta", default=0.5, min=-2.0, max=3.0, step=0.01),
                 io.Int.Input("seed", default=0, min=0, max=0xffffffffffffffff),
                 io.String.Input("output_filename", default="merged_2_lora"),
                 io.Combo.Input("save_dtype", options=["fp32", "fp16", "bf16"]),
@@ -397,7 +401,7 @@ def define_schema(cls):
     @classmethod
     def execute(cls, execution_mode: str, model_a: str, model_b: str,
                 calc_mode: str, mismatch_mode: str, alpha: float, beta: float,
-                gamma: float, seed: int, output_filename: str, save_dtype: str,
+                gamma: float, delta: float, seed: int, output_filename: str, save_dtype: str,
                 process_device: str, exclude_patterns: str, discard_patterns: str) -> io.NodeOutput:
         doc = load_documentation_from_file('merger_2_model_modes.md')
         if execution_mode == "DOCUMENTATION ONLY":
@@ -406,7 +410,7 @@ def execute(cls, execution_mode: str, model_a: str, model_b: str,
         recipe_params = {
             "model_a": model_a, "model_b": model_b, "calc_mode": calc_mode,
             "mismatch_mode": mismatch_mode,
-            "alpha": alpha, "beta": beta, "gamma": gamma, "seed": seed,
+            "alpha": alpha, "beta": beta, "gamma": gamma, "delta": delta, "seed": seed,
             "output_filename": output_filename, "save_dtype": save_dtype,
             "device": process_device, "dtype": torch.float32,
             "exclude_patterns": exclude_patterns, "discard_patterns": discard_patterns,
@@ -434,6 +438,7 @@ def define_schema(cls):
                 io.Float.Input("alpha", default=0.5, min=-2.0, max=3.0, step=0.01),
                 io.Float.Input("beta", default=0.5, min=-2.0, max=3.0, step=0.01),
                 io.Float.Input("gamma", default=0.99, min=0.0, max=1.0, step=0.001),
+                io.Float.Input("delta", default=0.5, min=-2.0, max=3.0, step=0.01),
                 io.Int.Input("seed", default=0, min=0, max=0xffffffffffffffff),
                 io.String.Input("output_filename", default="merged_2_embedding"),
                 io.Combo.Input("save_dtype", options=["fp32", "fp16", "bf16"]),
@@ -450,7 +455,7 @@ def define_schema(cls):
     @classmethod
     def execute(cls, execution_mode: str, model_a: str, model_b: str,
                 calc_mode: str, mismatch_mode: str, alpha: float, beta: float,
-                gamma: float, seed: int, output_filename: str, save_dtype: str,
+                gamma: float, delta: float, seed: int, output_filename: str, save_dtype: str,
                 process_device: str, exclude_patterns: str, discard_patterns: str) -> io.NodeOutput:
         doc = load_documentation_from_file('merger_2_model_modes.md')
         if execution_mode == "DOCUMENTATION ONLY":
@@ -459,7 +464,7 @@ def execute(cls, execution_mode: str, model_a: str, model_b: str,
         recipe_params = {
             "model_a": model_a, "model_b": model_b, "calc_mode": calc_mode,
             "mismatch_mode": mismatch_mode,
-            "alpha": alpha, "beta": beta, "gamma": gamma, "seed": seed,
+            "alpha": alpha, "beta": beta, "gamma": gamma, "delta": delta, "seed": seed,
             "output_filename": output_filename, "save_dtype": save_dtype,
             "device": process_device, "dtype": torch.float32,
             "exclude_patterns": exclude_patterns, "discard_patterns": discard_patterns,
diff --git a/nodes/merger_ops.py b/nodes/merger_ops.py