Handle CustomConv2d bias dtype mismatches

JPPhoto · JPPhoto · commit fdf6be531b10 · 2026-04-09T20:47:44.000-05:00
diff --git a/invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_conv2d.py b/invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_conv2d.py
@@ -7,12 +7,25 @@
 from invokeai.backend.model_manager.load.model_cache.torch_module_autocast.custom_modules.utils import (
     add_nullable_tensors,
 )
+from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor
 
 
 class CustomConv2d(torch.nn.Conv2d, CustomModuleMixin):
+    def _cast_tensor_for_input(self, tensor: torch.Tensor | None, input: torch.Tensor) -> torch.Tensor | None:
+        tensor = cast_to_device(tensor, input.device)
+        if (
+            tensor is not None
+            and input.is_floating_point()
+            and tensor.is_floating_point()
+            and not isinstance(tensor, GGMLTensor)
+            and tensor.dtype != input.dtype
+        ):
+            tensor = tensor.to(dtype=input.dtype)
+        return tensor
+
     def _autocast_forward_with_patches(self, input: torch.Tensor) -> torch.Tensor:
-        weight = cast_to_device(self.weight, input.device)
-        bias = cast_to_device(self.bias, input.device)
+        weight = self._cast_tensor_for_input(self.weight, input)
+        bias = self._cast_tensor_for_input(self.bias, input)
 
         # Prepare the original parameters for the patch aggregation.
         orig_params = {"weight": weight, "bias": bias}
@@ -25,19 +38,40 @@ def _autocast_forward_with_patches(self, input: torch.Tensor) -> torch.Tensor:
             device=input.device,
         )
 
-        weight = add_nullable_tensors(weight, aggregated_param_residuals.get("weight", None))
-        bias = add_nullable_tensors(bias, aggregated_param_residuals.get("bias", None))
+        residual_weight = self._cast_tensor_for_input(aggregated_param_residuals.get("weight", None), input)
+        residual_bias = self._cast_tensor_for_input(aggregated_param_residuals.get("bias", None), input)
+        weight = add_nullable_tensors(weight, residual_weight)
+        bias = add_nullable_tensors(bias, residual_bias)
         return self._conv_forward(input, weight, bias)
 
     def _autocast_forward(self, input: torch.Tensor) -> torch.Tensor:
-        weight = cast_to_device(self.weight, input.device)
-        bias = cast_to_device(self.bias, input.device)
+        weight = self._cast_tensor_for_input(self.weight, input)
+        bias = self._cast_tensor_for_input(self.bias, input)
         return self._conv_forward(input, weight, bias)
 
     def forward(self, input: torch.Tensor) -> torch.Tensor:
         if len(self._patches_and_weights) > 0:
             return self._autocast_forward_with_patches(input)
         elif self._device_autocasting_enabled:
             return self._autocast_forward(input)
+        elif (
+            input.is_floating_point()
+            and (
+                (
+                    self.weight.is_floating_point()
+                    and not isinstance(self.weight, GGMLTensor)
+                    and self.weight.dtype != input.dtype
+                )
+                or (
+                    self.bias is not None
+                    and self.bias.is_floating_point()
+                    and not isinstance(self.bias, GGMLTensor)
+                    and self.bias.dtype != input.dtype
+                )
+            )
+        ):
+            weight = self._cast_tensor_for_input(self.weight, input)
+            bias = self._cast_tensor_for_input(self.bias, input)
+            return self._conv_forward(input, weight, bias)
         else:
             return super().forward(input)
diff --git a/invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_linear.py b/invokeai/backend/model_manager/load/model_cache/torch_module_autocast/custom_modules/custom_linear.py
@@ -9,6 +9,7 @@
 from invokeai.backend.patches.layers.base_layer_patch import BaseLayerPatch
 from invokeai.backend.patches.layers.flux_control_lora_layer import FluxControlLoRALayer
 from invokeai.backend.patches.layers.lora_layer import LoRALayer
+from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor
 
 
 def linear_lora_forward(input: torch.Tensor, lora_layer: LoRALayer, lora_weight: float) -> torch.Tensor:
@@ -73,18 +74,38 @@ def autocast_linear_forward_sidecar_patches(
 
 
 class CustomLinear(torch.nn.Linear, CustomModuleMixin):
+    def _cast_weight_bias_for_input(self, input: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor | None]:
+        weight = cast_to_device(self.weight, input.device)
+        bias = cast_to_device(self.bias, input.device)
+        if (
+            input.is_floating_point()
+            and weight.is_floating_point()
+            and not isinstance(weight, GGMLTensor)
+            and weight.dtype != input.dtype
+        ):
+            weight = weight.to(dtype=input.dtype)
+            if bias is not None and not isinstance(bias, GGMLTensor):
+                bias = bias.to(dtype=input.dtype)
+        return weight, bias
+
     def _autocast_forward_with_patches(self, input: torch.Tensor) -> torch.Tensor:
         return autocast_linear_forward_sidecar_patches(self, input, self._patches_and_weights)
 
     def _autocast_forward(self, input: torch.Tensor) -> torch.Tensor:
-        weight = cast_to_device(self.weight, input.device)
-        bias = cast_to_device(self.bias, input.device)
+        weight, bias = self._cast_weight_bias_for_input(input)
         return torch.nn.functional.linear(input, weight, bias)
 
     def forward(self, input: torch.Tensor) -> torch.Tensor:
         if len(self._patches_and_weights) > 0:
             return self._autocast_forward_with_patches(input)
         elif self._device_autocasting_enabled:
             return self._autocast_forward(input)
+        elif (
+            input.is_floating_point()
+            and self.weight.is_floating_point()
+            and self.weight.dtype != input.dtype
+        ):
+            weight, bias = self._cast_weight_bias_for_input(input)
+            return torch.nn.functional.linear(input, weight, bias)
         else:
             return super().forward(input)