feat: Test new network and self-attention.

chairc · chairc · commit 902edddca8c5 · 2025-04-28T21:53:14.000+08:00
diff --git a/iddm/config/choices.py b/iddm/config/choices.py
@@ -17,7 +17,7 @@
 # Support option
 bool_choices = [True, False]
 sample_choices = ["ddpm", "ddim", "plms"]
-network_choices = ["unet", "cspdarkunet", "unetv2"]
+network_choices = ["unet", "cspdarkunet", "unetv2", "unet-slim"]
 optim_choices = ["adam", "adamw", "sgd"]
 act_choices = ["gelu", "silu", "relu", "relu6", "lrelu"]
 lr_func_choices = ["linear", "cosine", "warmup_cosine"]
diff --git a/iddm/model/modules/attention.py b/iddm/model/modules/attention.py
@@ -51,3 +51,61 @@ def forward(self, x):
         attention_value = attention_value + x
         attention_value = self.ff_self(attention_value) + attention_value
         return attention_value.swapaxes(2, 1).view(-1, self.channels, self.size[0], self.size[1])
+
+
+class SelfAttentionAD(nn.Module):
+    """
+    Adaptive head count SelfAttention block
+    """
+
+    def __init__(self, channels, size, act="silu", dropout=0.1):
+        """
+        Initialize the adaptive head count self-attention block
+        :param channels: Channels
+        :param size: Size
+        :param act: Activation function
+        """
+        super(SelfAttentionAD, self).__init__()
+        self.channels = channels
+        self.size = size
+        self.dropout = dropout
+
+        # Adaptive head count
+        head_count = max(1, channels // 64)
+
+        # batch_first is not supported in pytorch 1.8.
+        # If you want to support upgrading to 1.9 and above, or use the following code to transpose
+        self.mha = nn.MultiheadAttention(embed_dim=channels, num_heads=head_count, batch_first=True)
+        self.ln = nn.LayerNorm(normalized_shape=[channels])
+        self.ff_self = nn.Sequential(
+            nn.LayerNorm(normalized_shape=[channels]),
+            nn.Linear(in_features=channels, out_features=channels),
+            get_activation_function(name=act),
+            nn.Dropout(dropout),
+            nn.Linear(in_features=channels, out_features=channels),
+            nn.Dropout(dropout),
+        )
+
+    def forward(self, x):
+        """
+        SelfAttention forward
+        :param x: Input
+        :return: attention_value
+        """
+        batch, channels, height, width = x.shape
+        assert height == self.size[0] and width == self.size[1], \
+            f"Input size {height}x{width} does not match the expected size {self.size[0]}x{self.size[1]}"
+        # Flatten the spatial dimension into sequence dimensions
+        # (batch, channels, height*width) -> (batch, seq_len, channels)
+        x_flat = x.flatten(2).swapaxes(1, 2)
+
+        # First residual calculation
+        x_ln = self.ln(x_flat)
+        # batch_first is not supported in pytorch 1.8.
+        # If you want to support upgrading to 1.9 and above, or use the following code to transpose
+        attention_value, _ = self.mha(x_ln, x_ln, x_ln)
+        attention_value = attention_value + x_flat
+
+        # Second residual calculation
+        attention_value = self.ff_self(attention_value) + attention_value
+        return attention_value.swapaxes(1, 2).view(batch, channels, height, width)
diff --git a/iddm/model/networks/__init__.py b/iddm/model/networks/__init__.py
@@ -10,6 +10,7 @@
 from .cspdarkunet import CSPDarkUnet
 from .unet import UNet
 from .unetv2 import UNetV2
+from .unet_slim import UNetSlim
 
 # Super resolution network
 from .sr.srv1 import SRv1
diff --git a/iddm/model/networks/unet_slim.py b/iddm/model/networks/unet_slim.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+"""
+    @Date   : 2023/6/23 22:26
+    @Author : chairc
+    @Site   : https://github.com/chairc
+"""
+
+from iddm.model.networks.unet import UNet
+
+
+class UNetSlim(UNet):
+    """
+    UNet-Slim
+    This is a slim network demo, reduce 45% GPU used
+    """
+
+    def __init__(self, **kwargs):
+        """
+        Initialize the UNet-Slim network
+        :param in_channel: Input channel
+        :param out_channel: Output channel
+        :param channel: The list of channel
+        :param time_channel: Time channel
+        :param num_classes: Number of classes
+        :param image_size: Adaptive image size
+        :param device: Device type
+        :param act: Activation function
+        """
+        super(UNetSlim, self).__init__(**kwargs)
+
+    def forward(self, x, time, y=None):
+        """
+        Forward
+        :param x: Input
+        :param time: Time
+        :param y: Input label
+        :return: output
+        """
+        time = self.encode_time_with_label(time=time, y=y)
+
+        x = self.inc(x)
+        x1 = x
+        x = self.down1(x, time)
+        x = self.sa1(x)
+        x2_sa = x
+        x = self.down2(x, time)
+        x3_sa = x
+        x = self.down3(x, time)
+        x = self.sa3(x)
+
+        x = self.bot1(x)
+        x = self.bot2(x)
+        x = self.bot3(x)
+
+        x = self.up1(x, x3_sa, time)
+        x = self.up2(x, x2_sa, time)
+        x = self.sa5(x)
+        x = self.up3(x, x1, time)
+        output = self.outc(x)
+        return output
diff --git a/iddm/utils/initializer.py b/iddm/utils/initializer.py
@@ -17,6 +17,7 @@
 
 from iddm.model.networks.unet import UNet
 from iddm.model.networks.unetv2 import UNetV2
+from iddm.model.networks.unet_slim import UNetSlim
 from iddm.model.networks.cspdarkunet import CSPDarkUnet
 from iddm.model.networks.sr.srv1 import SRv1
 from iddm.model.samples.ddim import DDIMDiffusion
@@ -94,6 +95,8 @@ def network_initializer(network, device):
         Network = UNetV2
     elif network == "cspdarkunet":
         Network = CSPDarkUnet
+    elif network == "unet-slim":
+        Network = UNetSlim
     else:
         Network = UNet
         logger.warning(msg=f"[{device}]: Setting network error, we has been automatically set to unet.")