|
1 | 1 | // Copyright (c) Six Labors. |
2 | 2 | // Licensed under the Six Labors Split License. |
3 | 3 |
|
4 | | -using System.Numerics; |
5 | 4 | using System.Runtime.CompilerServices; |
6 | 5 | using System.Runtime.InteropServices; |
7 | 6 | using System.Runtime.Intrinsics; |
@@ -60,109 +59,76 @@ public void NormalizeColorsAndRoundInPlaceVector256(float maximum) |
60 | 59 | } |
61 | 60 |
|
62 | 61 | /// <summary> |
63 | | - /// Loads values from <paramref name="source"/> using extended AVX2 intrinsics. |
| 62 | + /// Loads values from <paramref name="source"/> using <see cref="Vector256{T}"/> intrinsics. |
64 | 63 | /// </summary> |
65 | 64 | /// <param name="source">The source <see cref="Block8x8"/></param> |
66 | | - public void LoadFromInt16ExtendedAvx2(ref Block8x8 source) |
| 65 | + public void LoadFromInt16ExtendedVector256(ref Block8x8 source) |
67 | 66 | { |
68 | 67 | DebugGuard.IsTrue( |
69 | | - Avx2.IsSupported, |
70 | | - "LoadFromUInt16ExtendedAvx2 only works on AVX2 compatible architecture!"); |
| 68 | + Vector256.IsHardwareAccelerated, |
| 69 | + "LoadFromInt16ExtendedVector256 only works on Vector256 compatible architecture!"); |
71 | 70 |
|
72 | 71 | ref short sRef = ref Unsafe.As<Block8x8, short>(ref source); |
73 | 72 | ref Vector256<float> dRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref this); |
74 | 73 |
|
75 | | - // Vector256<ushort>.Count == 16 on AVX2 |
| 74 | + // Vector256<ushort>.Count == 16 |
76 | 75 | // We can process 2 block rows in a single step |
77 | | - Vector256<int> top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef)); |
78 | | - Vector256<int> bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)Vector256<int>.Count)); |
79 | | - dRef = Avx.ConvertToVector256Single(top); |
80 | | - Unsafe.Add(ref dRef, 1) = Avx.ConvertToVector256Single(bottom); |
81 | | - |
82 | | - top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 2))); |
83 | | - bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 3))); |
84 | | - Unsafe.Add(ref dRef, 2) = Avx.ConvertToVector256Single(top); |
85 | | - Unsafe.Add(ref dRef, 3) = Avx.ConvertToVector256Single(bottom); |
86 | | - |
87 | | - top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 4))); |
88 | | - bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 5))); |
89 | | - Unsafe.Add(ref dRef, 4) = Avx.ConvertToVector256Single(top); |
90 | | - Unsafe.Add(ref dRef, 5) = Avx.ConvertToVector256Single(bottom); |
91 | | - |
92 | | - top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 6))); |
93 | | - bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 7))); |
94 | | - Unsafe.Add(ref dRef, 6) = Avx.ConvertToVector256Single(top); |
95 | | - Unsafe.Add(ref dRef, 7) = Avx.ConvertToVector256Single(bottom); |
| 76 | + Vector256<int> top = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef)); |
| 77 | + Vector256<int> bottom = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)Vector256<int>.Count)); |
| 78 | + dRef = Vector256.ConvertToSingle(top); |
| 79 | + Unsafe.Add(ref dRef, 1) = Vector256.ConvertToSingle(bottom); |
| 80 | + |
| 81 | + top = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 2))); |
| 82 | + bottom = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 3))); |
| 83 | + Unsafe.Add(ref dRef, 2) = Vector256.ConvertToSingle(top); |
| 84 | + Unsafe.Add(ref dRef, 3) = Vector256.ConvertToSingle(bottom); |
| 85 | + |
| 86 | + top = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 4))); |
| 87 | + bottom = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 5))); |
| 88 | + Unsafe.Add(ref dRef, 4) = Vector256.ConvertToSingle(top); |
| 89 | + Unsafe.Add(ref dRef, 5) = Vector256.ConvertToSingle(bottom); |
| 90 | + |
| 91 | + top = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 6))); |
| 92 | + bottom = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 7))); |
| 93 | + Unsafe.Add(ref dRef, 6) = Vector256.ConvertToSingle(top); |
| 94 | + Unsafe.Add(ref dRef, 7) = Vector256.ConvertToSingle(bottom); |
96 | 95 | } |
97 | 96 |
|
98 | 97 | [MethodImpl(InliningOptions.ShortMethod)] |
99 | 98 | private static Vector256<float> NormalizeAndRoundVector256(Vector256<float> value, Vector256<float> off, Vector256<float> max) |
100 | 99 | => Vector256_.RoundToNearestInteger(Vector256_.Clamp(value + off, Vector256<float>.Zero, max)); |
101 | 100 |
|
102 | | - private static unsafe void MultiplyIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest) |
| 101 | + private static unsafe void MultiplyIntoInt16Vector256(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest) |
103 | 102 | { |
104 | | - DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!"); |
| 103 | + DebugGuard.IsTrue(Vector256.IsHardwareAccelerated, "Vector256 support is required to run this operation!"); |
105 | 104 |
|
106 | 105 | ref Vector256<float> aBase = ref a.V256_0; |
107 | 106 | ref Vector256<float> bBase = ref b.V256_0; |
108 | | - |
109 | 107 | ref Vector256<short> destRef = ref dest.V01; |
110 | | - Vector256<int> multiplyIntoInt16ShuffleMask = Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7); |
111 | 108 |
|
112 | 109 | for (nuint i = 0; i < 8; i += 2) |
113 | 110 | { |
114 | | - Vector256<int> row0 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0))); |
115 | | - Vector256<int> row1 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1))); |
| 111 | + Vector256<int> row0 = Vector256_.ConvertToInt32RoundToEven(Unsafe.Add(ref aBase, i + 0) * Unsafe.Add(ref bBase, i + 0)); |
| 112 | + Vector256<int> row1 = Vector256_.ConvertToInt32RoundToEven(Unsafe.Add(ref aBase, i + 1) * Unsafe.Add(ref bBase, i + 1)); |
116 | 113 |
|
117 | | - Vector256<short> row = Avx2.PackSignedSaturate(row0, row1); |
118 | | - row = Avx2.PermuteVar8x32(row.AsInt32(), multiplyIntoInt16ShuffleMask).AsInt16(); |
| 114 | + Vector256<short> row = Vector256_.PackSignedSaturate(row0, row1); |
| 115 | + row = Vector256.Shuffle(row.AsInt32(), Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7)).AsInt16(); |
119 | 116 |
|
120 | 117 | Unsafe.Add(ref destRef, i / 2) = row; |
121 | 118 | } |
122 | 119 | } |
123 | 120 |
|
124 | | - private void TransposeInPlace_Avx() |
| 121 | + private void TransposeInPlaceVector256() |
125 | 122 | { |
126 | 123 | // https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536 |
127 | | - Vector256<float> r0 = Avx.InsertVector128( |
128 | | - this.V256_0, |
129 | | - Unsafe.As<Vector4, Vector128<float>>(ref this.V4L), |
130 | | - 1); |
131 | | - |
132 | | - Vector256<float> r1 = Avx.InsertVector128( |
133 | | - this.V256_1, |
134 | | - Unsafe.As<Vector4, Vector128<float>>(ref this.V5L), |
135 | | - 1); |
136 | | - |
137 | | - Vector256<float> r2 = Avx.InsertVector128( |
138 | | - this.V256_2, |
139 | | - Unsafe.As<Vector4, Vector128<float>>(ref this.V6L), |
140 | | - 1); |
141 | | - |
142 | | - Vector256<float> r3 = Avx.InsertVector128( |
143 | | - this.V256_3, |
144 | | - Unsafe.As<Vector4, Vector128<float>>(ref this.V7L), |
145 | | - 1); |
146 | | - |
147 | | - Vector256<float> r4 = Avx.InsertVector128( |
148 | | - Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(), |
149 | | - Unsafe.As<Vector4, Vector128<float>>(ref this.V4R), |
150 | | - 1); |
151 | | - |
152 | | - Vector256<float> r5 = Avx.InsertVector128( |
153 | | - Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(), |
154 | | - Unsafe.As<Vector4, Vector128<float>>(ref this.V5R), |
155 | | - 1); |
156 | | - |
157 | | - Vector256<float> r6 = Avx.InsertVector128( |
158 | | - Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(), |
159 | | - Unsafe.As<Vector4, Vector128<float>>(ref this.V6R), |
160 | | - 1); |
161 | | - |
162 | | - Vector256<float> r7 = Avx.InsertVector128( |
163 | | - Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(), |
164 | | - Unsafe.As<Vector4, Vector128<float>>(ref this.V7R), |
165 | | - 1); |
| 124 | + Vector256<float> r0 = this.V256_0.WithUpper(this.V4L.AsVector128()); |
| 125 | + Vector256<float> r1 = this.V256_1.WithUpper(this.V5L.AsVector128()); |
| 126 | + Vector256<float> r2 = this.V256_2.WithUpper(this.V6L.AsVector128()); |
| 127 | + Vector256<float> r3 = this.V256_3.WithUpper(this.V7L.AsVector128()); |
| 128 | + Vector256<float> r4 = this.V0R.AsVector128().ToVector256().WithUpper(this.V4R.AsVector128()); |
| 129 | + Vector256<float> r5 = this.V1R.AsVector128().ToVector256().WithUpper(this.V5R.AsVector128()); |
| 130 | + Vector256<float> r6 = this.V2R.AsVector128().ToVector256().WithUpper(this.V6R.AsVector128()); |
| 131 | + Vector256<float> r7 = this.V3R.AsVector128().ToVector256().WithUpper(this.V7R.AsVector128()); |
166 | 132 |
|
167 | 133 | Vector256<float> t0 = Avx.UnpackLow(r0, r1); |
168 | 134 | Vector256<float> t2 = Avx.UnpackLow(r2, r3); |
|
0 commit comments