sas_/convert_sas.py at main · DensoITLab/sas_ · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# Copyright (C) 2025 Denso IT Laboratory, Inc.
# All Rights Reserved

import os
import torch
import torch.nn as nn
from sas_conv2d import SASConv2d
from sas_linear import SASLinear


def convert_layers(model, use_sas):

    conversion_count = 0

    for name, module in list(model._modules.items()):
        if module is not None and len(list(module.children())) > 0:
            model._modules[name], num_converted = convert_layers(module, use_sas)
            conversion_count += num_converted

        # If use_sas is enabled, perform preprocessing to replace activations
        if use_sas:
            # Replace ReLU with Identity and set a flag _was_relu
            if isinstance(module, nn.ReLU):
                identity_module = nn.Identity()
                identity_module._was_relu = True
                model._modules[name] = identity_module
                conversion_count += 1
                module = identity_module
            # If module is Identity (with no _was_relu flag), replace it with ReLU
            elif isinstance(module, nn.Identity) and not hasattr(module, '_was_relu'):
                new_relu = nn.ReLU()
                model._modules[name] = new_relu
                conversion_count += 1
                module = new_relu

        if isinstance(module, nn.Conv2d):
            if use_sas and module.in_channels == 3:
                continue

            if use_sas:
                # Replace with SASConv2d
                new_layer = SASConv2d(
                    in_channels=module.in_channels,
                    out_channels=module.out_channels,
                    kernel_size=module.kernel_size,
                    stride=module.stride,
                    padding=module.padding,
                    dilation=module.dilation,
                    groups=module.groups,
                    bias=(module.bias is not None),
                    sparse_m=2,
                    sparse_n=2,
                    is_conv=True
                )
                with torch.no_grad():
                    # Copy original convolution weights into the new sparse structure:
                    orig_weight = module.weight.data
                    # Expand the weight along the input‐channel dimension
                    expanded_weight = orig_weight.repeat_interleave(new_layer.sparse_m, dim=1)
                    expanded_weight[:, 1::2, :, :] = 0
                    new_layer.weight.data.copy_(expanded_weight)

                    if module.bias is not None:
                        new_layer.bias.data.copy_(module.bias.data)
            else:
                # Replace with dummy Conv2d
                new_layer = dummy_conv(
                    in_channels=module.in_channels,
                    out_channels=module.out_channels,
                    kernel_size=module.kernel_size,
                    stride=module.stride,
                    padding=module.padding,
                    dilation=module.dilation,
                    groups=module.groups,
                    bias=(module.bias is not None),
                )
                with torch.no_grad():
                    # Copy the original weight and bias to the dummy_conv submodule
                    new_layer.dummy_conv.weight.data.copy_(module.weight.data)
                    if module.bias is not None:
                        new_layer.dummy_conv.bias.data.copy_(module.bias.data)

            model._modules[name] = new_layer
            conversion_count += 1

        elif isinstance(module, nn.Linear):
            if use_sas:
                # Replace with SASLinear
                new_layer = SASLinear(
                    in_channels=module.in_features,
                    out_channels=module.out_features,
                    kernel_size=(1, 1),
                    stride=1,
                    padding=[0, 0],
                    dilation=1,
                    groups=1,
                    bias=(module.bias is not None),
                    sparse_m=2,
                    sparse_n=2,
                    is_conv=False
                )
                with torch.no_grad():
                    # Copy original linear weights into the new sparse structure:
                    orig_weight = module.weight.data
                    # Expand along the input dimension to match sparse_m
                    expanded_weight = orig_weight.repeat_interleave(new_layer.sparse_m, dim=1)
                    expanded_weight[:, 1::2] = 0
                    new_layer.weight.data.copy_(expanded_weight)

                    if module.bias is not None:
                        new_layer.bias.data.copy_(module.bias.data)
            else:
                # Replace with dummy Linear
                new_layer = dummy_linear(
                    in_features=module.in_features,
                    out_features=module.out_features,
                    bias=(module.bias is not None),
                )
                with torch.no_grad():
                    # Copy the original weight and bias to the dummy_linear submodule
                    new_layer.dummy_linear.weight.data.copy_(module.weight.data)
                    if module.bias is not None:
                        new_layer.dummy_linear.bias.data.copy_(module.bias.data)

            model._modules[name] = new_layer
            conversion_count += 1

    return model, conversion_count


# Definition of dummy conv/linear layer

class dummy_conv(nn.Module):
    def __init__(self, *args, **kwargs):
        super(dummy_conv, self).__init__()
        self.dummy_conv = nn.Conv2d(*args, **kwargs)

    def forward(self, x):
        return self.dummy_conv(x)

    def init_weights(self):
        self.dummy_conv.reset_parameters()

class dummy_linear(nn.Module):
    def __init__(self, *args, **kwargs):
        super(dummy_linear, self).__init__()
        self.dummy_linear = nn.Linear(*args, **kwargs)

    def forward(self, x):
        return self.dummy_linear(x)

    def init_weights(self):
        self.dummy_linear.reset_parameters()