Skip to content

Commit 63b9533

Browse files
committed
fix error-free arithmetic
1 parent 9a929c9 commit 63b9533

46 files changed

Lines changed: 53 additions & 53 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
**Supports High-Dimensional Convolution Neural Networks.** (Complex, Quaternion, Vector3D)
1010

11-
For regression problems: **High precision calculate** by FP32-FP32 arithmetic (1/8 error of FP32 arithmetic)
11+
For regression problems: **High precision calculate** by FP32-FP32 arithmetic
1212

1313
## Requirement
1414
.NET 6.0

TensorShaderCudaBackend/ShaderDefines.cs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@ static __inline__ __device__ void floatfloat_hilo_add(float &hi, float &lo, floa
402402
public static string AtomicAdd =>
403403
$@"
404404
static __inline__ __device__ void floatfloat_atomicadd(float *ptr, float hi, float lo){{
405-
float tmp = atomicAdd(ptr, hi + lo);
405+
float tmp = atomicAdd(ptr, hi);
406406
atomicAdd(ptr + 1, lo - (((tmp + hi) - tmp) - hi));
407407
}}";
408408

@@ -465,9 +465,9 @@ static __inline__ __device__ void complex_mulgrad(float2 &hi, float2 &lo, float2
465465
static __inline__ __device__ void floatfloat_atomicadd(float2 *ptr, float2 hi, float2 lo){{
466466
float *ptr_float = (float*)ptr;
467467
468-
float tmpx = atomicAdd(ptr_float, hi.x + lo.x);
468+
float tmpx = atomicAdd(ptr_float, hi.x);
469469
atomicAdd(ptr_float + 1, lo.x - (((tmpx + hi.x) - tmpx) - hi.x));
470-
float tmpy = atomicAdd(ptr_float + 2, hi.y + lo.y);
470+
float tmpy = atomicAdd(ptr_float + 2, hi.y);
471471
atomicAdd(ptr_float + 3, lo.y - (((tmpy + hi.y) - tmpy) - hi.y));
472472
}}";
473473
}
@@ -585,13 +585,13 @@ static __inline__ __device__ void quaternion_mulgrad(float4 &hi, float4 &lo, flo
585585
static __inline__ __device__ void floatfloat_atomicadd(float4 *ptr, float4 hi, float4 lo){{
586586
float *ptr_float = (float*)ptr;
587587
588-
float tmpx = atomicAdd(ptr_float, hi.x + lo.x);
588+
float tmpx = atomicAdd(ptr_float, hi.x);
589589
atomicAdd(ptr_float + 1, lo.x - (((tmpx + hi.x) - tmpx) - hi.x));
590-
float tmpy = atomicAdd(ptr_float + 2, hi.y + lo.y);
590+
float tmpy = atomicAdd(ptr_float + 2, hi.y);
591591
atomicAdd(ptr_float + 3, lo.y - (((tmpy + hi.y) - tmpy) - hi.y));
592-
float tmpz = atomicAdd(ptr_float + 4, hi.z + lo.z);
592+
float tmpz = atomicAdd(ptr_float + 4, hi.z);
593593
atomicAdd(ptr_float + 5, lo.z - (((tmpz + hi.z) - tmpz) - hi.z));
594-
float tmpw = atomicAdd(ptr_float + 6, hi.w + lo.w);
594+
float tmpw = atomicAdd(ptr_float + 6, hi.w);
595595
atomicAdd(ptr_float + 7, lo.w - (((tmpw + hi.w) - tmpw) - hi.w));
596596
}}";
597597
}

TensorShaderCudaBackend/Shaders/Complex/Convolution/FloatFloatPrecision/Convolution1D.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ __global__ void complex_convolution_1d(const float2* __restrict__ inmap, float2*
9090
{ (OutChannels % ThreadsX != 0 ? $"if(outch < {OutChannels}){{" : "") }
9191
unsigned int outmap_idx = outch + {OutChannels} * ox;
9292
93-
outmap[outmap_idx] = ctor_float2(uv_hi.x + uv_lo.x, uv_hi.y + uv_lo.y);
93+
outmap[outmap_idx] = ctor_float2(uv_hi.x, uv_hi.y);
9494
{ (OutChannels % ThreadsX != 0 ? "}" : "") }
9595
}}";
9696

TensorShaderCudaBackend/Shaders/Complex/Convolution/FloatFloatPrecision/Convolution2D.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ __global__ void complex_convolution_2d(const float2* __restrict__ inmap, float2*
103103
{ (OutChannels % ThreadsX != 0 ? $"if(outch < {OutChannels}){{" : "") }
104104
unsigned int outmap_idx = outch + {OutChannels} * (ox + outwidth * oy);
105105
106-
outmap[outmap_idx] = ctor_float2(uv_hi.x + uv_lo.x, uv_hi.y + uv_lo.y);
106+
outmap[outmap_idx] = ctor_float2(uv_hi.x, uv_hi.y);
107107
{ (OutChannels % ThreadsX != 0 ? "}" : "") }
108108
}}";
109109

TensorShaderCudaBackend/Shaders/Complex/Convolution/FloatFloatPrecision/Convolution3D.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ __global__ void complex_convolution_3d(const float2* __restrict__ inmap, float2*
111111
{ (OutChannels % ThreadsX != 0 ? $"if(outch < {OutChannels}){{" : "") }
112112
unsigned int outmap_idx = outch + {OutChannels} * (ox + outwidth * (oy + outheight * oz));
113113
114-
outmap[outmap_idx] = ctor_float2(uv_hi.x + uv_lo.x, uv_hi.y + uv_lo.y);
114+
outmap[outmap_idx] = ctor_float2(uv_hi.x, uv_hi.y);
115115
{ (OutChannels % ThreadsX != 0 ? "}" : "") }
116116
}}";
117117

TensorShaderCudaBackend/Shaders/Complex/Convolution/FloatFloatPrecision/Deconvolution1D.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ __global__ void complex_deconvolution_1d(const float2* __restrict__ inmap, float
9090
{ (OutChannels % ThreadsX != 0 ? $"if(outch < {OutChannels}){{" : "") }
9191
unsigned int outmap_idx = outch + {OutChannels} * ox;
9292
93-
outmap[outmap_idx] = ctor_float2(uv_hi.x + uv_lo.x, uv_hi.y + uv_lo.y);
93+
outmap[outmap_idx] = ctor_float2(uv_hi.x, uv_hi.y);
9494
{ (OutChannels % ThreadsX != 0 ? "}" : "") }
9595
}}";
9696

TensorShaderCudaBackend/Shaders/Complex/Convolution/FloatFloatPrecision/Deconvolution2D.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ __global__ void complex_deconvolution_2d(const float2* __restrict__ inmap, float
106106
{ (OutChannels % ThreadsX != 0 ? $"if(outch < {OutChannels}){{" : "") }
107107
unsigned int outmap_idx = outch + {OutChannels} * (ox + outwidth * oy);
108108
109-
outmap[outmap_idx] = ctor_float2(uv_hi.x + uv_lo.x, uv_hi.y + uv_lo.y);
109+
outmap[outmap_idx] = ctor_float2(uv_hi.x, uv_hi.y);
110110
{ (OutChannels % ThreadsX != 0 ? "}" : "") }
111111
}}";
112112

TensorShaderCudaBackend/Shaders/Complex/Convolution/FloatFloatPrecision/Deconvolution3D.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ __global__ void complex_deconvolution_3d(const float2* __restrict__ inmap, float
118118
{ (OutChannels % ThreadsX != 0 ? $"if(outch < {OutChannels}){{" : "") }
119119
unsigned int outmap_idx = outch + {OutChannels} * (ox + outwidth * (oy + outheight * oz));
120120
121-
outmap[outmap_idx] = ctor_float2(uv_hi.x + uv_lo.x, uv_hi.y + uv_lo.y);
121+
outmap[outmap_idx] = ctor_float2(uv_hi.x, uv_hi.y);
122122
{ (OutChannels % ThreadsX != 0 ? "}" : "") }
123123
}}";
124124

TensorShaderCudaBackend/Shaders/Complex/Convolution/FloatFloatPrecision/Dense.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ __global__ void complex_dense(const float2* __restrict__ inmap, float2* __restri
7676
7777
filter_idx += {OutChannels};
7878
}}
79-
outmap[outch] = ctor_float2(vu_hi.x + vu_lo.x, vu_hi.y + vu_lo.y);
79+
outmap[outch] = ctor_float2(vu_hi.x, vu_hi.y);
8080
8181
{ (OutChannels % ThreadsX != 0 ? "}" : "") }
8282
}}";

TensorShaderCudaBackend/Shaders/Complex/Convolution/FloatFloatPrecision/TransposeDense.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ __global__ void complex_transpose_dense(const float2* __restrict__ inmap, float2
7474
7575
filter_idx += {OutChannels};
7676
}}
77-
outmap[outch] = ctor_float2(uv_hi.x + uv_lo.x, uv_hi.y + uv_lo.y);
77+
outmap[outch] = ctor_float2(uv_hi.x, uv_hi.y);
7878
7979
{ (OutChannels % ThreadsX != 0 ? "}" : "") }
8080
}}";

0 commit comments

Comments
 (0)