Skip to content

Commit 1a91ec9

Browse files
Port common utils and alpha decoder
1 parent b5fe86c commit 1a91ec9

4 files changed

Lines changed: 153 additions & 47 deletions

File tree

src/ImageSharp/Common/Helpers/Vector128Utilities.cs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1300,4 +1300,68 @@ public static Vector128<sbyte> SubtractSaturate(Vector128<sbyte> left, Vector128
13001300
// Narrow back to signed bytes
13011301
return Vector128.Narrow(diffLo, diffHi);
13021302
}
1303+
1304+
/// <summary>
1305+
/// Create mask from the most significant bit of each 8-bit element in <paramref name="value"/>, and store the result.
1306+
/// </summary>
1307+
/// <param name="value">
1308+
/// The vector containing packed 8-bit integers from which to create the mask.
1309+
/// </param>
1310+
/// <returns>
1311+
/// A 16-bit integer mask where each bit corresponds to the most significant bit of each 8-bit element
1312+
/// in <paramref name="value"/>.
1313+
/// </returns>
1314+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
1315+
public static int MoveMask(Vector128<byte> value)
1316+
{
1317+
if (Sse2.IsSupported)
1318+
{
1319+
return Sse2.MoveMask(value);
1320+
}
1321+
1322+
if (AdvSimd.IsSupported)
1323+
{
1324+
// https://stackoverflow.com/questions/11870910/sse-mm-movemask-epi8-equivalent-method-for-arm-neon
1325+
Vector128<byte> powers = Vector128.Create(1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128);
1326+
Vector128<byte> masked = value & powers;
1327+
1328+
Vector128<ushort> sum8 = AdvSimd.AddPairwiseWidening(masked);
1329+
Vector128<uint> sum16 = AdvSimd.AddPairwiseWidening(sum8);
1330+
Vector128<ulong> sum32 = AdvSimd.AddPairwiseWidening(sum16);
1331+
1332+
// Extract lower 8 bits of each 64-bit lane
1333+
byte lo = sum32.AsByte().GetElement(0);
1334+
byte hi = sum32.AsByte().GetElement(8);
1335+
1336+
return (hi << 8) | lo;
1337+
}
1338+
1339+
{
1340+
// Step 1: isolate MSBs
1341+
Vector128<byte> msbMask = Vector128.Create((byte)0x80);
1342+
Vector128<byte> masked = value & msbMask;
1343+
1344+
// Step 2: shift each byte so MSB lands in bit position [0..15]
1345+
// i.e. convert: 0x80 → 1 << i
1346+
Vector128<ushort> bitShifts = Vector128.Create((ushort)1, 2, 4, 8, 16, 32, 64, 128);
1347+
Vector128<ushort> bitShiftsHigh = Vector128.Create(256, 512, 1024, 2048, 4096, 8192, 16384, 32768);
1348+
1349+
// Step 3: widen to ushort
1350+
(Vector128<ushort> lo, Vector128<ushort> hi) = Vector128.Widen(masked);
1351+
1352+
// Step 4: compare > 0 to get 0xFFFF where MSB was set
1353+
lo = Vector128.ConditionalSelect(Vector128.Equals(lo, Vector128<ushort>.Zero), Vector128<ushort>.Zero, bitShifts);
1354+
hi = Vector128.ConditionalSelect(Vector128.Equals(hi, Vector128<ushort>.Zero), Vector128<ushort>.Zero, bitShiftsHigh);
1355+
1356+
// Step 5: bitwise OR the two halves
1357+
Vector128<ushort> maskVector = lo | hi;
1358+
1359+
// Step 6: horizontal OR reduction via shuffles
1360+
maskVector |= Vector128.Shuffle(maskVector, Vector128.Create((ushort)4, 5, 6, 7, 0, 1, 2, 3));
1361+
maskVector |= Vector128.Shuffle(maskVector, Vector128.Create((ushort)2, 3, 0, 1, 6, 7, 4, 5));
1362+
maskVector |= Vector128.Shuffle(maskVector, Vector128.Create((ushort)1, 0, 3, 2, 5, 4, 7, 6));
1363+
1364+
return maskVector.ToScalar();
1365+
}
1366+
}
13031367
}

src/ImageSharp/Common/Helpers/Vector256Utilities.cs

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,27 @@ public static Vector256<short> PackSignedSaturate(Vector256<int> left, Vector256
231231
return Vector256.Narrow(lefClamped, rightClamped);
232232
}
233233

234+
/// <summary>
235+
/// Packs signed 16-bit integers to signed 8-bit integers and saturates.
236+
/// </summary>
237+
/// <param name="left">The left hand source vector.</param>
238+
/// <param name="right">The right hand source vector.</param>
239+
/// <returns>The <see cref="Vector256{SByte}"/>.</returns>
240+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
241+
public static Vector256<sbyte> PackSignedSaturate(Vector256<short> left, Vector256<short> right)
242+
{
243+
if (Avx2.IsSupported)
244+
{
245+
return Avx2.PackSignedSaturate(left, right);
246+
}
247+
248+
Vector256<short> min = Vector256.Create((short)sbyte.MinValue);
249+
Vector256<short> max = Vector256.Create((short)sbyte.MaxValue);
250+
Vector256<short> lefClamped = Clamp(left, min, max);
251+
Vector256<short> rightClamped = Clamp(right, min, max);
252+
return Vector256.Narrow(lefClamped, rightClamped);
253+
}
254+
234255
/// <summary>
235256
/// Restricts a vector between a minimum and a maximum value.
236257
/// </summary>
@@ -466,6 +487,28 @@ public static Vector256<byte> SubtractSaturate(Vector256<byte> left, Vector256<b
466487
Vector128_.SubtractSaturate(left.GetUpper(), right.GetUpper()));
467488
}
468489

490+
/// <summary>
491+
/// Create mask from the most significant bit of each 8-bit element in <paramref name="value"/>, and store the result.
492+
/// </summary>
493+
/// <param name="value">
494+
/// The vector containing packed 8-bit integers from which to create the mask.
495+
/// </param>
496+
/// <returns>
497+
/// A 16-bit integer mask where each bit corresponds to the most significant bit of each 8-bit element
498+
/// in <paramref name="value"/>.
499+
/// </returns>
500+
public static int MoveMask(Vector256<byte> value)
501+
{
502+
if (Avx2.IsSupported)
503+
{
504+
return Avx2.MoveMask(value);
505+
}
506+
507+
int loMask = Vector128_.MoveMask(value.GetLower());
508+
int hiMask = Vector128_.MoveMask(value.GetUpper());
509+
return loMask | (hiMask << 16);
510+
}
511+
469512
[DoesNotReturn]
470513
private static void ThrowUnreachableException() => throw new UnreachableException();
471514
}

src/ImageSharp/Formats/Webp/AlphaDecoder.cs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
using System.Runtime.CompilerServices;
77
using System.Runtime.InteropServices;
88
using System.Runtime.Intrinsics;
9-
using System.Runtime.Intrinsics.Arm;
109
using System.Runtime.Intrinsics.X86;
1110
using SixLabors.ImageSharp.Common.Helpers;
1211
using SixLabors.ImageSharp.Formats.Webp.BitReader;
@@ -314,7 +313,7 @@ private static void ColorIndexInverseTransformAlpha(
314313

315314
private static void HorizontalUnfilter(Span<byte> prev, Span<byte> input, Span<byte> dst, int width)
316315
{
317-
if ((Sse2.IsSupported || AdvSimd.IsSupported) && width >= 9)
316+
if (Vector128.IsHardwareAccelerated && width >= 9)
318317
{
319318
dst[0] = (byte)(input[0] + (prev.IsEmpty ? 0 : prev[0]));
320319
nuint i;
@@ -362,7 +361,7 @@ private static void VerticalUnfilter(Span<byte> prev, Span<byte> input, Span<byt
362361
{
363362
HorizontalUnfilter(null, input, dst, width);
364363
}
365-
else if (Avx2.IsSupported)
364+
else if (Vector256.IsHardwareAccelerated)
366365
{
367366
ref byte inputRef = ref MemoryMarshal.GetReference(input);
368367
ref byte prevRef = ref MemoryMarshal.GetReference(prev);
@@ -374,7 +373,7 @@ private static void VerticalUnfilter(Span<byte> prev, Span<byte> input, Span<byt
374373
{
375374
Vector256<int> a0 = Unsafe.As<byte, Vector256<int>>(ref Unsafe.Add(ref inputRef, i));
376375
Vector256<int> b0 = Unsafe.As<byte, Vector256<int>>(ref Unsafe.Add(ref prevRef, i));
377-
Vector256<byte> c0 = Avx2.Add(a0.AsByte(), b0.AsByte());
376+
Vector256<byte> c0 = a0.AsByte() + b0.AsByte();
378377
ref byte outputRef = ref Unsafe.Add(ref dstRef, i);
379378
Unsafe.As<byte, Vector256<byte>>(ref outputRef) = c0;
380379
}

src/ImageSharp/Formats/Webp/WebpCommonUtils.cs

Lines changed: 43 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
using System.Runtime.InteropServices;
55
using System.Runtime.Intrinsics;
6-
using System.Runtime.Intrinsics.X86;
6+
using SixLabors.ImageSharp.Common.Helpers;
77
using SixLabors.ImageSharp.PixelFormats;
88

99
namespace SixLabors.ImageSharp.Formats.Webp;
@@ -20,7 +20,7 @@ internal static class WebpCommonUtils
2020
/// <returns>Returns true if alpha has non-0xff values.</returns>
2121
public static unsafe bool CheckNonOpaque(ReadOnlySpan<Bgra32> row)
2222
{
23-
if (Avx2.IsSupported)
23+
if (Vector256.IsHardwareAccelerated)
2424
{
2525
ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row);
2626
int i = 0;
@@ -32,19 +32,19 @@ public static unsafe bool CheckNonOpaque(ReadOnlySpan<Bgra32> row)
3232

3333
for (; i + 128 <= length; i += 128)
3434
{
35-
Vector256<byte> a0 = Avx.LoadVector256(src + i).AsByte();
36-
Vector256<byte> a1 = Avx.LoadVector256(src + i + 32).AsByte();
37-
Vector256<byte> a2 = Avx.LoadVector256(src + i + 64).AsByte();
38-
Vector256<byte> a3 = Avx.LoadVector256(src + i + 96).AsByte();
39-
Vector256<int> b0 = Avx2.And(a0, alphaMaskVector256).AsInt32();
40-
Vector256<int> b1 = Avx2.And(a1, alphaMaskVector256).AsInt32();
41-
Vector256<int> b2 = Avx2.And(a2, alphaMaskVector256).AsInt32();
42-
Vector256<int> b3 = Avx2.And(a3, alphaMaskVector256).AsInt32();
43-
Vector256<short> c0 = Avx2.PackSignedSaturate(b0, b1).AsInt16();
44-
Vector256<short> c1 = Avx2.PackSignedSaturate(b2, b3).AsInt16();
45-
Vector256<byte> d = Avx2.PackSignedSaturate(c0, c1).AsByte();
46-
Vector256<byte> bits = Avx2.CompareEqual(d, all0x80Vector256);
47-
int mask = Avx2.MoveMask(bits);
35+
Vector256<byte> a0 = Vector256.Load(src + i).AsByte();
36+
Vector256<byte> a1 = Vector256.Load(src + i + 32).AsByte();
37+
Vector256<byte> a2 = Vector256.Load(src + i + 64).AsByte();
38+
Vector256<byte> a3 = Vector256.Load(src + i + 96).AsByte();
39+
Vector256<int> b0 = (a0 & alphaMaskVector256).AsInt32();
40+
Vector256<int> b1 = (a1 & alphaMaskVector256).AsInt32();
41+
Vector256<int> b2 = (a2 & alphaMaskVector256).AsInt32();
42+
Vector256<int> b3 = (a3 & alphaMaskVector256).AsInt32();
43+
Vector256<short> c0 = Vector256_.PackSignedSaturate(b0, b1).AsInt16();
44+
Vector256<short> c1 = Vector256_.PackSignedSaturate(b2, b3).AsInt16();
45+
Vector256<byte> d = Vector256_.PackSignedSaturate(c0, c1).AsByte();
46+
Vector256<byte> bits = Vector256.Equals(d, all0x80Vector256);
47+
int mask = Vector256_.MoveMask(bits);
4848
if (mask != -1)
4949
{
5050
return true;
@@ -53,15 +53,15 @@ public static unsafe bool CheckNonOpaque(ReadOnlySpan<Bgra32> row)
5353

5454
for (; i + 64 <= length; i += 64)
5555
{
56-
if (IsNoneOpaque64Bytes(src, i))
56+
if (IsNoneOpaque64BytesVector128(src, i))
5757
{
5858
return true;
5959
}
6060
}
6161

6262
for (; i + 32 <= length; i += 32)
6363
{
64-
if (IsNoneOpaque32Bytes(src, i))
64+
if (IsNonOpaque32BytesVector128(src, i))
6565
{
6666
return true;
6767
}
@@ -76,7 +76,7 @@ public static unsafe bool CheckNonOpaque(ReadOnlySpan<Bgra32> row)
7676
}
7777
}
7878
}
79-
else if (Sse2.IsSupported)
79+
else if (Vector128.IsHardwareAccelerated)
8080
{
8181
ReadOnlySpan<byte> rowBytes = MemoryMarshal.AsBytes(row);
8282
int i = 0;
@@ -85,15 +85,15 @@ public static unsafe bool CheckNonOpaque(ReadOnlySpan<Bgra32> row)
8585
{
8686
for (; i + 64 <= length; i += 64)
8787
{
88-
if (IsNoneOpaque64Bytes(src, i))
88+
if (IsNoneOpaque64BytesVector128(src, i))
8989
{
9090
return true;
9191
}
9292
}
9393

9494
for (; i + 32 <= length; i += 32)
9595
{
96-
if (IsNoneOpaque32Bytes(src, i))
96+
if (IsNonOpaque32BytesVector128(src, i))
9797
{
9898
return true;
9999
}
@@ -122,38 +122,38 @@ public static unsafe bool CheckNonOpaque(ReadOnlySpan<Bgra32> row)
122122
return false;
123123
}
124124

125-
private static unsafe bool IsNoneOpaque64Bytes(byte* src, int i)
125+
private static unsafe bool IsNoneOpaque64BytesVector128(byte* src, int i)
126126
{
127127
Vector128<byte> alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
128128

129-
Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte();
130-
Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte();
131-
Vector128<byte> a2 = Sse2.LoadVector128(src + i + 32).AsByte();
132-
Vector128<byte> a3 = Sse2.LoadVector128(src + i + 48).AsByte();
133-
Vector128<int> b0 = Sse2.And(a0, alphaMask).AsInt32();
134-
Vector128<int> b1 = Sse2.And(a1, alphaMask).AsInt32();
135-
Vector128<int> b2 = Sse2.And(a2, alphaMask).AsInt32();
136-
Vector128<int> b3 = Sse2.And(a3, alphaMask).AsInt32();
137-
Vector128<short> c0 = Sse2.PackSignedSaturate(b0, b1).AsInt16();
138-
Vector128<short> c1 = Sse2.PackSignedSaturate(b2, b3).AsInt16();
139-
Vector128<byte> d = Sse2.PackSignedSaturate(c0, c1).AsByte();
140-
Vector128<byte> bits = Sse2.CompareEqual(d, Vector128.Create((byte)0x80).AsByte());
141-
int mask = Sse2.MoveMask(bits);
129+
Vector128<byte> a0 = Vector128.Load(src + i).AsByte();
130+
Vector128<byte> a1 = Vector128.Load(src + i + 16).AsByte();
131+
Vector128<byte> a2 = Vector128.Load(src + i + 32).AsByte();
132+
Vector128<byte> a3 = Vector128.Load(src + i + 48).AsByte();
133+
Vector128<int> b0 = (a0 & alphaMask).AsInt32();
134+
Vector128<int> b1 = (a1 & alphaMask).AsInt32();
135+
Vector128<int> b2 = (a2 & alphaMask).AsInt32();
136+
Vector128<int> b3 = (a3 & alphaMask).AsInt32();
137+
Vector128<short> c0 = Vector128_.PackSignedSaturate(b0, b1).AsInt16();
138+
Vector128<short> c1 = Vector128_.PackSignedSaturate(b2, b3).AsInt16();
139+
Vector128<byte> d = Vector128_.PackSignedSaturate(c0, c1).AsByte();
140+
Vector128<byte> bits = Vector128.Equals(d, Vector128.Create((byte)0x80).AsByte());
141+
int mask = Vector128_.MoveMask(bits);
142142
return mask != 0xFFFF;
143143
}
144144

145-
private static unsafe bool IsNoneOpaque32Bytes(byte* src, int i)
145+
private static unsafe bool IsNonOpaque32BytesVector128(byte* src, int i)
146146
{
147147
Vector128<byte> alphaMask = Vector128.Create(0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255);
148148

149-
Vector128<byte> a0 = Sse2.LoadVector128(src + i).AsByte();
150-
Vector128<byte> a1 = Sse2.LoadVector128(src + i + 16).AsByte();
151-
Vector128<int> b0 = Sse2.And(a0, alphaMask).AsInt32();
152-
Vector128<int> b1 = Sse2.And(a1, alphaMask).AsInt32();
153-
Vector128<short> c = Sse2.PackSignedSaturate(b0, b1).AsInt16();
154-
Vector128<byte> d = Sse2.PackSignedSaturate(c, c).AsByte();
155-
Vector128<byte> bits = Sse2.CompareEqual(d, Vector128.Create((byte)0x80).AsByte());
156-
int mask = Sse2.MoveMask(bits);
149+
Vector128<byte> a0 = Vector128.Load(src + i).AsByte();
150+
Vector128<byte> a1 = Vector128.Load(src + i + 16).AsByte();
151+
Vector128<int> b0 = (a0 & alphaMask).AsInt32();
152+
Vector128<int> b1 = (a1 & alphaMask).AsInt32();
153+
Vector128<short> c = Vector128_.PackSignedSaturate(b0, b1).AsInt16();
154+
Vector128<byte> d = Vector128_.PackSignedSaturate(c, c).AsByte();
155+
Vector128<byte> bits = Vector128.Equals(d, Vector128.Create((byte)0x80).AsByte());
156+
int mask = Vector128_.MoveMask(bits);
157157
return mask != 0xFFFF;
158158
}
159159
}

0 commit comments

Comments
 (0)