Arm backend: Add FP16 support to operators pt.2 (pytorch#17088)

martinlsm · web-flow · commit 64d7236f8e9e · 2026-02-02T12:40:57.000+01:00
Add FP16 support for operators:
 - conv2d
 - conv3d
 - cos
 - slice
 - pad

Update op tests to cover the new datatype.

Also correct the test name violations seen in test_conv2d.py.

Signed-off-by: Martin Lindström &lt;Martin.Lindstroem@arm.com&gt;
diff --git a/backends/arm/operators/op_constant_pad_nd.py b/backends/arm/operators/op_constant_pad_nd.py
@@ -46,6 +46,7 @@ def define_node(
                 ts.DType.INT8,
                 ts.DType.INT16,
                 ts.DType.INT32,
+                ts.DType.FP16,
                 ts.DType.FP32,
                 ts.DType.BF16,
                 ts.DType.BOOL,
diff --git a/backends/arm/operators/op_cos.py b/backends/arm/operators/op_cos.py
@@ -42,7 +42,7 @@ def define_node(
         validate_valid_dtype(
             self.target,
             [*inputs, output],
-            [ts.DType.FP32, ts.DType.BF16],
+            [ts.DType.FP16, ts.DType.FP32, ts.DType.BF16],
             self.tosa_spec,
         )
         attr = ts.TosaSerializerAttribute()
diff --git a/backends/arm/operators/op_slice.py b/backends/arm/operators/op_slice.py
@@ -77,6 +77,7 @@ def define_node(
                 ts.DType.INT16,
                 ts.DType.INT32,
                 ts.DType.BF16,
+                ts.DType.FP16,
                 ts.DType.FP32,
             ],
             self.tosa_spec,
diff --git a/backends/arm/operators/op_tosa_conv2d.py b/backends/arm/operators/op_tosa_conv2d.py
@@ -51,7 +51,7 @@ def define_node(
 
         valid_input_dtypes = []
         if self.tosa_spec.support_float():
-            valid_input_dtypes.append(ts.DType.FP32)
+            valid_input_dtypes.extend([ts.DType.FP16, ts.DType.FP32])
         if self.tosa_spec.support_integer():
             valid_input_dtypes.append(ts.DType.INT8)
 
@@ -82,8 +82,8 @@ def define_node(
 
         conv2d_output_name = output.name
         acc_type = output.dtype
-        if output.dtype == ts.DType.BF16:
-            # Accumulate BF16 inputs in FP32 for better precision per TOSA BF16 extension.
+        if output.dtype in [ts.DType.BF16, ts.DType.FP16]:
+            # Accumulate BF16, FP16 inputs in FP32 for better precision.
             acc_type = ts.DType.FP32
 
         input_zp_name, weight_zp_name = add_input_weight_zp_consts(
diff --git a/backends/arm/test/ops/test_constant_pad_nd.py b/backends/arm/test/ops/test_constant_pad_nd.py
@@ -45,6 +45,18 @@
         -0.5,
     ),
 }
+test_data_suite_fp16 = {
+    "4dim_last1dim_fp16": lambda: (
+        torch.rand(1, 1, 8, 8, dtype=torch.float16),
+        (1, 1, 0, 0, 0, 0, 0, 0),
+        1.0,
+    ),
+    "3dim_last1dim_fp16": lambda: (
+        torch.rand(1, 1, 8, dtype=torch.float16),
+        (1, 0, 1, 0, 0, 0),
+        -0.5,
+    ),
+}
 
 
 class ConstantPadND(torch.nn.Module):
@@ -65,7 +77,7 @@ def forward(self, x: torch.Tensor):
 
 @common.parametrize(
     "test_data",
-    test_data_suite | test_data_suite_bf16,
+    test_data_suite | test_data_suite_bf16 | test_data_suite_fp16,
 )
 def test_constant_pad_nd_tosa_FP(test_data: Tuple):
     test_data, padding, value = test_data()
@@ -105,7 +117,7 @@ def test_constant_pad_nd_tosa_INT_a16w8(test_data: Tuple):
     pipeline.run()
 
 
-@common.parametrize("test_data", test_data_suite)
+@common.parametrize("test_data", test_data_suite | test_data_suite_fp16)
 @common.SkipIfNoModelConverter
 def test_constant_pad_nd_vgf_no_quant(test_data: Tuple):
     inp, padding, value = test_data()
diff --git a/backends/arm/test/ops/test_conv2d.py b/backends/arm/test/ops/test_conv2d.py
@@ -410,6 +410,30 @@ def forward(self, x):
         dtype=torch.bfloat16,
     ),
 }
+test_data_FP_fp16 = {
+    "fp16_3x3": lambda: Conv2d(
+        height=12,
+        width=12,
+        in_channels=3,
+        out_channels=4,
+        kernel_size=(3, 3),
+        stride=(1, 1),
+        padding=(1, 1),
+        bias=True,
+        dtype=torch.float16,
+    ),
+    "fp16_1x1": lambda: Conv2d(
+        height=8,
+        width=8,
+        in_channels=2,
+        out_channels=2,
+        kernel_size=(1, 1),
+        stride=(2, 1),
+        padding=(0, 3),
+        bias=False,
+        dtype=torch.float16,
+    ),
+}
 
 # Generate a new test set paired with per_channel_quant=True/False.
 test_data_INT = {
@@ -431,7 +455,7 @@ def _get_dtype_count(model: torch.nn.Module):
     }
 
 
-@common.parametrize("test_data", test_data_FP | test_data_FP_bf16)
+@common.parametrize("test_data", test_data_FP | test_data_FP_bf16 | test_data_FP_fp16)
 def test_convolution_2d_tosa_FP(test_data):
     model = test_data()
     pipeline = TosaPipelineFP[input_t](
@@ -539,7 +563,7 @@ def test_convolution_2d_u85_INT_a8w4(test_data):
     pipeline.run()
 
 
-@common.parametrize("test_data", test_data_FP)
+@common.parametrize("test_data", test_data_FP | test_data_FP_fp16)
 @common.SkipIfNoModelConverter
 def test_convolution_2d_vgf_no_quant(test_data):
     model = test_data()
@@ -614,7 +638,7 @@ def test_convolution_2d_u55_INT_not_delegated(module: Conv2d):
 
 
 @common.parametrize("test_data", test_data_INT)
-def test_conv2d_tosa_INT_a16w8(test_data: input_t):
+def test_convolution_2d_tosa_INT_a16w8(test_data: input_t):
     """Test conv2d with 16A8W quantization for TOSA INT."""
     model, per_channel_quantization = test_data()
     pipeline = TosaPipelineINT[input_t](
@@ -630,7 +654,7 @@ def test_conv2d_tosa_INT_a16w8(test_data: input_t):
 
 @common.parametrize("test_data", test_data_INT)
 @common.XfailIfNoCorstone300
-def test_conv2d_u55_INT_a16w8(test_data: input_t):
+def test_convolution_2d_u55_INT_a16w8(test_data: input_t):
     """Test conv2d with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
     model, per_channel_quantization = test_data()
     pipeline = EthosU55PipelineINT[input_t](
@@ -647,7 +671,7 @@ def test_conv2d_u55_INT_a16w8(test_data: input_t):
 
 @common.parametrize("test_data", test_data_INT)
 @common.XfailIfNoCorstone320
-def test_conv2d_u85_INT_a16w8(test_data: input_t):
+def test_convolution_2d_u85_INT_a16w8(test_data: input_t):
     """Test conv2d with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
     model, per_channel_quantization = test_data()
     pipeline = EthosU85PipelineINT[input_t](
diff --git a/backends/arm/test/ops/test_conv3d.py b/backends/arm/test/ops/test_conv3d.py
@@ -439,6 +439,32 @@ def forward(self, x):
         dtype=torch.bfloat16,
     ),
 }
+test_data_FP_fp16 = {
+    "fp16_3x3": lambda: Conv3d(
+        height=10,
+        width=10,
+        depth=6,
+        in_channels=3,
+        out_channels=4,
+        kernel_size=(3, 3, 3),
+        stride=(1, 1, 1),
+        padding=(1, 1, 1),
+        bias=True,
+        dtype=torch.float16,
+    ),
+    "fp16_1x1": lambda: Conv3d(
+        height=6,
+        width=6,
+        depth=4,
+        in_channels=2,
+        out_channels=2,
+        kernel_size=(1, 1, 1),
+        stride=(1, 1, 1),
+        padding=(0, 0, 0),
+        bias=False,
+        dtype=torch.float16,
+    ),
+}
 
 # Generate a new test set paired with per_channel_quant=True/False.
 test_data_INT = {
@@ -466,11 +492,12 @@ def _get_dtype_count(model: torch.nn.Module):
 input_t = Tuple[torch.Tensor]
 
 
-@common.parametrize("test_data", test_data_FP | test_data_FP_bf16)
+@common.parametrize("test_data", test_data_FP | test_data_FP_bf16 | test_data_FP_fp16)
 def test_convolution_3d_tosa_FP(test_data):
+    model = test_data()
     pipeline = TosaPipelineFP[input_t](
-        test_data(),
-        test_data().get_inputs(),
+        model,
+        model.get_inputs(),
         aten_op,
         exir_op,
         tosa_extensions=["bf16"],
@@ -623,12 +650,13 @@ def test_convolution_3d_u85_INT_a8w4(test_data):
     pipeline.run()
 
 
-@common.parametrize("test_data", test_data_FP)
+@common.parametrize("test_data", test_data_FP | test_data_FP_fp16)
 @common.SkipIfNoModelConverter
 def test_convolution_3d_vgf_no_quant(test_data):
+    model = test_data()
     pipeline = VgfPipeline[input_t](
-        test_data(),
-        test_data().get_inputs(),
+        model,
+        model.get_inputs(),
         aten_op,
         exir_op,
         quantize=False,
diff --git a/backends/arm/test/ops/test_cos.py b/backends/arm/test/ops/test_cos.py
@@ -35,6 +35,10 @@
     "rand_bf16": torch.rand(4, 4, dtype=torch.bfloat16) - 0.5,
     "ramp_bf16": torch.arange(-8, 8, 0.5, dtype=torch.bfloat16),
 }
+test_data_suite_fp16 = {
+    "rand_fp16": torch.rand(4, 4, dtype=torch.float16) - 0.5,
+    "ramp_fp16": torch.arange(-8, 8, 0.5, dtype=torch.float16),
+}
 
 
 class Cos(torch.nn.Module):
@@ -43,7 +47,9 @@ def forward(self, x: torch.Tensor):
         return torch.cos(x)
 
 
-@common.parametrize("test_data", test_data_suite | test_data_suite_bf16)
+@common.parametrize(
+    "test_data", test_data_suite | test_data_suite_bf16 | test_data_suite_fp16
+)
 @pytest.mark.tosa_ref_model
 def test_cos_tosa_FP(test_data: Tuple):
     pipeline = TosaPipelineFP[input_t1](
@@ -92,7 +98,7 @@ def test_cos_u85_INT(test_data: Tuple):
     pipeline.run()
 
 
-@common.parametrize("test_data", test_data_suite)
+@common.parametrize("test_data", test_data_suite | test_data_suite_fp16)
 @common.SkipIfNoModelConverter
 def test_cos_vgf_no_quant(test_data: Tuple):
     pipeline = VgfPipeline[input_t1](
diff --git a/backends/arm/test/ops/test_slice.py b/backends/arm/test/ops/test_slice.py
@@ -37,14 +37,21 @@
     ),
 }
 
+test_data_suite_fp16 = {
+    "ones_slice_4_fp16": lambda: (
+        torch.ones((1, 12, 10, 10), dtype=torch.float16),
+        [(0, 1), (0, 5), (3, 5), (4, 10)],
+    ),
+}
+
 
 class Slice(torch.nn.Module):
     def forward(self, x: torch.Tensor, s: list[tuple[int, int]]):
         slices = [slice(*i) for i in s]
         return x[slices]
 
 
-@common.parametrize("test_data", test_data_suite)
+@common.parametrize("test_data", test_data_suite | test_data_suite_fp16)
 def test_slice_tensor_tosa_FP(test_data: torch.Tensor):
     pipeline = TosaPipelineFP[input_t1](Slice(), test_data(), aten_op, exir_op)
     pipeline.run()
@@ -96,7 +103,7 @@ def test_slice_tensor_u85_INT(test_data: torch.Tensor):
     pipeline.run()
 
 
-@common.parametrize("test_data", test_data_suite)
+@common.parametrize("test_data", test_data_suite | test_data_suite_fp16)
 @common.SkipIfNoModelConverter
 def test_slice_tensor_vgf_no_quant(test_data: torch.Tensor):
     pipeline = VgfPipeline[input_t1](

Original file line number	Diff line number	Diff line change
`@@ -42,7 +42,7 @@ def define_node(`
`42`	`42`	`validate_valid_dtype(`
`43`	`43`	`self.target,`
`44`	`44`	`[*inputs, output],`
`45`		`- [ts.DType.FP32, ts.DType.BF16],`
	`45`	`+ [ts.DType.FP16, ts.DType.FP32, ts.DType.BF16],`
`46`	`46`	`self.tosa_spec,`
`47`	`47`	`)`
`48`	`48`	`attr = ts.TosaSerializerAttribute()`