From 1c5e3e1a69d89d0202185100166c6a4c1339bd3a Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 8 Apr 2026 14:39:56 +1000 Subject: [PATCH 1/3] Modernize base PorterDuffFunctions --- src/ImageSharp/Common/Helpers/Numerics.cs | 2 +- .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 37 +++--------- .../Common/Helpers/Vector256Utilities.cs | 22 +++++++ .../PixelBlenders/PorterDuffFunctions.cs | 57 ++++++++++--------- 4 files changed, 59 insertions(+), 59 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index efe68977bb..513eb7ab19 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -690,7 +690,7 @@ public static Vector4 WithW(Vector4 value, Vector4 w) /// /// The span of vectors [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static unsafe void CubePowOnXYZ(Span vectors) + public static void CubePowOnXYZ(Span vectors) { ref Vector4 baseRef = ref MemoryMarshal.GetReference(vectors); ref Vector4 endRef = ref Unsafe.Add(ref baseRef, (uint)vectors.Length); diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 076590605d..154f0b5e22 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -602,48 +602,25 @@ private static void Shuffle4Slice3( } /// - /// Performs a multiplication and an addition of the . - /// TODO: Fix. The arguments are in a different order to the FMA intrinsic. + /// Performs a multiplication and a negated addition of the . /// - /// ret = (vm0 * vm1) + va - /// The vector to add to the intermediate result. + /// ret = va - (vm0 * vm1) + /// The vector to add to the negated intermediate result. /// The first vector to multiply. /// The second vector to multiply. /// The . - [MethodImpl(InliningOptions.AlwaysInline)] - public static Vector256 MultiplyAdd( + [MethodImpl(InliningOptions.ShortMethod)] + public static Vector256 MultiplyAddNegated( Vector256 va, Vector256 vm0, Vector256 vm1) { if (Fma.IsSupported) { - return Fma.MultiplyAdd(vm1, vm0, va); - } - - return va + (vm0 * vm1); - } - - /// - /// Performs a multiplication and a negated addition of the . - /// - /// ret = c - (a * b) - /// The first vector to multiply. - /// The second vector to multiply. - /// The vector to add negated to the intermediate result. - /// The . - [MethodImpl(InliningOptions.ShortMethod)] - public static Vector256 MultiplyAddNegated( - Vector256 a, - Vector256 b, - Vector256 c) - { - if (Fma.IsSupported) - { - return Fma.MultiplyAddNegated(a, b, c); + return Fma.MultiplyAddNegated(vm0, vm1, va); } - return Avx.Subtract(c, Avx.Multiply(a, b)); + return Avx.Subtract(va, Avx.Multiply(vm0, vm1)); } /// diff --git a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs index 14ac13dd8d..90e3169b37 100644 --- a/src/ImageSharp/Common/Helpers/Vector256Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector256Utilities.cs @@ -115,6 +115,28 @@ public static Vector256 MultiplyAdd( return va + (vm0 * vm1); } + /// + /// Performs a multiplication and a negated addition of the . + /// + /// ret = va - (vm0 * vm1) + /// The vector to add to the negated intermediate result. + /// The first vector to multiply. + /// The second vector to multiply. + /// The . + [MethodImpl(InliningOptions.ShortMethod)] + public static Vector256 MultiplyAddNegated( + Vector256 va, + Vector256 vm0, + Vector256 vm1) + { + if (Fma.IsSupported) + { + return Fma.MultiplyAddNegated(vm0, vm1, va); + } + + return va - (vm0 * vm1); + } + /// /// Performs a multiplication and a subtraction of the . /// diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index ca358be31c..45c4aade7b 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -5,6 +5,7 @@ using System.Runtime.CompilerServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; +using SixLabors.ImageSharp.Common.Helpers; namespace SixLabors.ImageSharp.PixelFormats.PixelBlenders; @@ -62,7 +63,7 @@ public static Vector4 Multiply(Vector4 backdrop, Vector4 source) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Multiply(Vector256 backdrop, Vector256 source) - => Avx.Multiply(backdrop, source); + => backdrop * source; /// /// Returns the result of the "Add" compositing equation. @@ -82,7 +83,7 @@ public static Vector4 Add(Vector4 backdrop, Vector4 source) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Add(Vector256 backdrop, Vector256 source) - => Avx.Min(Vector256.Create(1F), Avx.Add(backdrop, source)); + => Vector256.Min(Vector256.Create(1F), backdrop + source); /// /// Returns the result of the "Subtract" compositing equation. @@ -102,7 +103,7 @@ public static Vector4 Subtract(Vector4 backdrop, Vector4 source) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Subtract(Vector256 backdrop, Vector256 source) - => Avx.Max(Vector256.Zero, Avx.Subtract(backdrop, source)); + => Vector256.Max(Vector256.Zero, backdrop - source); /// /// Returns the result of the "Screen" compositing equation. @@ -124,7 +125,7 @@ public static Vector4 Screen(Vector4 backdrop, Vector4 source) public static Vector256 Screen(Vector256 backdrop, Vector256 source) { Vector256 vOne = Vector256.Create(1F); - return SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Subtract(vOne, backdrop), Avx.Subtract(vOne, source), vOne); + return Vector256_.MultiplyAddNegated(vOne, vOne - backdrop, vOne - source); } /// @@ -145,7 +146,7 @@ public static Vector4 Darken(Vector4 backdrop, Vector4 source) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Darken(Vector256 backdrop, Vector256 source) - => Avx.Min(backdrop, source); + => Vector256.Min(backdrop, source); /// /// Returns the result of the "Lighten" compositing equation. @@ -164,7 +165,7 @@ public static Vector256 Darken(Vector256 backdrop, Vector256The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 Lighten(Vector256 backdrop, Vector256 source) - => Avx.Max(backdrop, source); + => Vector256.Max(backdrop, source); /// /// Returns the result of the "Overlay" compositing equation. @@ -192,7 +193,7 @@ public static Vector4 Overlay(Vector4 backdrop, Vector4 source) public static Vector256 Overlay(Vector256 backdrop, Vector256 source) { Vector256 color = OverlayValueFunction(backdrop, source); - return Avx.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + return Vector256.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); } /// @@ -221,7 +222,7 @@ public static Vector4 HardLight(Vector4 backdrop, Vector4 source) public static Vector256 HardLight(Vector256 backdrop, Vector256 source) { Vector256 color = OverlayValueFunction(source, backdrop); - return Avx.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); + return Vector256.Min(Vector256.Create(1F), Avx.Blend(color, Vector256.Zero, BlendAlphaControl)); } /// @@ -244,10 +245,10 @@ private static float OverlayValueFunction(float backdrop, float source) public static Vector256 OverlayValueFunction(Vector256 backdrop, Vector256 source) { Vector256 vOne = Vector256.Create(1F); - Vector256 left = Avx.Multiply(Avx.Add(backdrop, backdrop), source); + Vector256 left = (backdrop + backdrop) * source; Vector256 vOneMinusSource = Avx.Subtract(vOne, source); - Vector256 right = SimdUtils.HwIntrinsics.MultiplyAddNegated(Avx.Add(vOneMinusSource, vOneMinusSource), Avx.Subtract(vOne, backdrop), vOne); + Vector256 right = Vector256_.MultiplyAddNegated(vOne, vOneMinusSource + vOneMinusSource, vOne - backdrop); Vector256 cmp = Avx.CompareGreaterThan(backdrop, Vector256.Create(.5F)); return Avx.BlendVariable(left, right, cmp); } @@ -295,17 +296,17 @@ public static Vector256 Over(Vector256 destination, Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); Vector256 dW = Avx.Permute(destination, ShuffleAlphaControl); - Vector256 blendW = Avx.Multiply(sW, dW); - Vector256 dstW = Avx.Subtract(dW, blendW); - Vector256 srcW = Avx.Subtract(sW, blendW); + Vector256 blendW = sW * dW; + Vector256 dstW = dW - blendW; + Vector256 srcW = sW - blendW; // calculate final alpha - Vector256 alpha = Avx.Add(dstW, sW); + Vector256 alpha = dstW + sW; // calculate final color - Vector256 color = Avx.Multiply(destination, dstW); - color = SimdUtils.HwIntrinsics.MultiplyAdd(color, source, srcW); - color = SimdUtils.HwIntrinsics.MultiplyAdd(color, blend, blendW); + Vector256 color = destination * dstW; + color = Vector256_.MultiplyAdd(color, source, srcW); + color = Vector256_.MultiplyAdd(color, blend, blendW); // unpremultiply return Numerics.UnPremultiply(color, alpha); @@ -354,11 +355,11 @@ public static Vector256 Atop(Vector256 destination, Vector256 sW = Avx.Permute(source, ShuffleAlphaControl); - Vector256 blendW = Avx.Multiply(sW, alpha); - Vector256 dstW = Avx.Subtract(alpha, blendW); + Vector256 blendW = sW * alpha; + Vector256 dstW = alpha - blendW; // calculate final color - Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(blend, blendW), destination, dstW); + Vector256 color = Vector256_.MultiplyAdd(Avx.Multiply(blend, blendW), destination, dstW); // unpremultiply return Numerics.UnPremultiply(color, alpha); @@ -392,10 +393,10 @@ public static Vector4 In(Vector4 destination, Vector4 source) public static Vector256 In(Vector256 destination, Vector256 source) { // calculate alpha - Vector256 alpha = Avx.Permute(Avx.Multiply(source, destination), ShuffleAlphaControl); + Vector256 alpha = Avx.Permute(source * destination, ShuffleAlphaControl); // premultiply - Vector256 color = Avx.Multiply(source, alpha); + Vector256 color = source * alpha; // unpremultiply return Numerics.UnPremultiply(color, alpha); @@ -429,10 +430,10 @@ public static Vector4 Out(Vector4 destination, Vector4 source) public static Vector256 Out(Vector256 destination, Vector256 source) { // calculate alpha - Vector256 alpha = Avx.Permute(Avx.Multiply(source, Avx.Subtract(Vector256.Create(1F), destination)), ShuffleAlphaControl); + Vector256 alpha = Avx.Permute(source * (Vector256.Create(1F) - destination), ShuffleAlphaControl); // premultiply - Vector256 color = Avx.Multiply(source, alpha); + Vector256 color = source * alpha; // unpremultiply return Numerics.UnPremultiply(color, alpha); @@ -475,12 +476,12 @@ public static Vector256 Xor(Vector256 destination, Vector256 dW = Avx.Shuffle(destination, destination, ShuffleAlphaControl); Vector256 vOne = Vector256.Create(1F); - Vector256 srcW = Avx.Subtract(vOne, dW); - Vector256 dstW = Avx.Subtract(vOne, sW); + Vector256 srcW = vOne - dW; + Vector256 dstW = vOne - sW; // calculate alpha - Vector256 alpha = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(dW, dstW), sW, srcW); - Vector256 color = SimdUtils.HwIntrinsics.MultiplyAdd(Avx.Multiply(Avx.Multiply(dW, destination), dstW), Avx.Multiply(sW, source), srcW); + Vector256 alpha = Vector256_.MultiplyAdd(Avx.Multiply(dW, dstW), sW, srcW); + Vector256 color = Vector256_.MultiplyAdd(Avx.Multiply(Avx.Multiply(dW, destination), dstW), Avx.Multiply(sW, source), srcW); // unpremultiply return Numerics.UnPremultiply(color, alpha); From 5e4f3ef1ffb738240ccffda755d5d1be3ab80385 Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 8 Apr 2026 14:48:45 +1000 Subject: [PATCH 2/3] Use operators in generated functions --- .../PorterDuffFunctions.Generated.cs | 198 +++++++++--------- .../PorterDuffFunctions.Generated.tt | 22 +- .../FeatureTesting/FeatureTestRunner.cs | 2 + 3 files changed, 112 insertions(+), 110 deletions(-) diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs index 255bafc798..f0635230ca 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs @@ -37,7 +37,7 @@ public static Vector4 NormalSrc(Vector4 backdrop, Vector4 source, float opacity) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "NormalSrcAtop" compositing equation. @@ -64,7 +64,7 @@ public static Vector4 NormalSrcAtop(Vector4 backdrop, Vector4 source, float opac [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Normal(backdrop, source)); } @@ -94,7 +94,7 @@ public static Vector4 NormalSrcOver(Vector4 backdrop, Vector4 source, float opac [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Normal(backdrop, source)); } @@ -123,7 +123,7 @@ public static Vector4 NormalSrcIn(Vector4 backdrop, Vector4 source, float opacit /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "NormalSrcOut" compositing equation. @@ -149,7 +149,7 @@ public static Vector4 NormalSrcOut(Vector4 backdrop, Vector4 source, float opaci /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "NormalDest" compositing equation. @@ -202,7 +202,7 @@ public static Vector4 NormalDestAtop(Vector4 backdrop, Vector4 source, float opa [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Normal(source, backdrop)); } @@ -232,7 +232,7 @@ public static Vector4 NormalDestOver(Vector4 backdrop, Vector4 source, float opa [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Normal(source, backdrop)); } @@ -261,7 +261,7 @@ public static Vector4 NormalDestIn(Vector4 backdrop, Vector4 source, float opaci /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "NormalDestOut" compositing equation. @@ -287,7 +287,7 @@ public static Vector4 NormalDestOut(Vector4 backdrop, Vector4 source, float opac /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "NormalXor" compositing equation. @@ -313,7 +313,7 @@ public static Vector4 NormalXor(Vector4 backdrop, Vector4 source, float opacity) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "NormalClear" compositing equation. @@ -339,7 +339,7 @@ public static Vector4 NormalClear(Vector4 backdrop, Vector4 source, float opacit /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 NormalClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -558,7 +558,7 @@ public static Vector4 MultiplySrc(Vector4 backdrop, Vector4 source, float opacit /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "MultiplySrcAtop" compositing equation. @@ -585,7 +585,7 @@ public static Vector4 MultiplySrcAtop(Vector4 backdrop, Vector4 source, float op [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplySrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Multiply(backdrop, source)); } @@ -615,7 +615,7 @@ public static Vector4 MultiplySrcOver(Vector4 backdrop, Vector4 source, float op [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplySrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Multiply(backdrop, source)); } @@ -644,7 +644,7 @@ public static Vector4 MultiplySrcIn(Vector4 backdrop, Vector4 source, float opac /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "MultiplySrcOut" compositing equation. @@ -670,7 +670,7 @@ public static Vector4 MultiplySrcOut(Vector4 backdrop, Vector4 source, float opa /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "MultiplyDest" compositing equation. @@ -723,7 +723,7 @@ public static Vector4 MultiplyDestAtop(Vector4 backdrop, Vector4 source, float o [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplyDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Multiply(source, backdrop)); } @@ -753,7 +753,7 @@ public static Vector4 MultiplyDestOver(Vector4 backdrop, Vector4 source, float o [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplyDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Multiply(source, backdrop)); } @@ -782,7 +782,7 @@ public static Vector4 MultiplyDestIn(Vector4 backdrop, Vector4 source, float opa /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplyDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "MultiplyDestOut" compositing equation. @@ -808,7 +808,7 @@ public static Vector4 MultiplyDestOut(Vector4 backdrop, Vector4 source, float op /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplyDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "MultiplyXor" compositing equation. @@ -834,7 +834,7 @@ public static Vector4 MultiplyXor(Vector4 backdrop, Vector4 source, float opacit /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplyXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "MultiplyClear" compositing equation. @@ -860,7 +860,7 @@ public static Vector4 MultiplyClear(Vector4 backdrop, Vector4 source, float opac /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 MultiplyClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -1079,7 +1079,7 @@ public static Vector4 AddSrc(Vector4 backdrop, Vector4 source, float opacity) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "AddSrcAtop" compositing equation. @@ -1106,7 +1106,7 @@ public static Vector4 AddSrcAtop(Vector4 backdrop, Vector4 source, float opacity [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Add(backdrop, source)); } @@ -1136,7 +1136,7 @@ public static Vector4 AddSrcOver(Vector4 backdrop, Vector4 source, float opacity [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Add(backdrop, source)); } @@ -1165,7 +1165,7 @@ public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "AddSrcOut" compositing equation. @@ -1191,7 +1191,7 @@ public static Vector4 AddSrcOut(Vector4 backdrop, Vector4 source, float opacity) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "AddDest" compositing equation. @@ -1244,7 +1244,7 @@ public static Vector4 AddDestAtop(Vector4 backdrop, Vector4 source, float opacit [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Add(source, backdrop)); } @@ -1274,7 +1274,7 @@ public static Vector4 AddDestOver(Vector4 backdrop, Vector4 source, float opacit [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Add(source, backdrop)); } @@ -1303,7 +1303,7 @@ public static Vector4 AddDestIn(Vector4 backdrop, Vector4 source, float opacity) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "AddDestOut" compositing equation. @@ -1329,7 +1329,7 @@ public static Vector4 AddDestOut(Vector4 backdrop, Vector4 source, float opacity /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "AddXor" compositing equation. @@ -1355,7 +1355,7 @@ public static Vector4 AddXor(Vector4 backdrop, Vector4 source, float opacity) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "AddClear" compositing equation. @@ -1381,7 +1381,7 @@ public static Vector4 AddClear(Vector4 backdrop, Vector4 source, float opacity) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 AddClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -1600,7 +1600,7 @@ public static Vector4 SubtractSrc(Vector4 backdrop, Vector4 source, float opacit /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "SubtractSrcAtop" compositing equation. @@ -1627,7 +1627,7 @@ public static Vector4 SubtractSrcAtop(Vector4 backdrop, Vector4 source, float op [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Subtract(backdrop, source)); } @@ -1657,7 +1657,7 @@ public static Vector4 SubtractSrcOver(Vector4 backdrop, Vector4 source, float op [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Subtract(backdrop, source)); } @@ -1686,7 +1686,7 @@ public static Vector4 SubtractSrcIn(Vector4 backdrop, Vector4 source, float opac /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "SubtractSrcOut" compositing equation. @@ -1712,7 +1712,7 @@ public static Vector4 SubtractSrcOut(Vector4 backdrop, Vector4 source, float opa /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "SubtractDest" compositing equation. @@ -1765,7 +1765,7 @@ public static Vector4 SubtractDestAtop(Vector4 backdrop, Vector4 source, float o [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Subtract(source, backdrop)); } @@ -1795,7 +1795,7 @@ public static Vector4 SubtractDestOver(Vector4 backdrop, Vector4 source, float o [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Subtract(source, backdrop)); } @@ -1824,7 +1824,7 @@ public static Vector4 SubtractDestIn(Vector4 backdrop, Vector4 source, float opa /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "SubtractDestOut" compositing equation. @@ -1850,7 +1850,7 @@ public static Vector4 SubtractDestOut(Vector4 backdrop, Vector4 source, float op /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "SubtractXor" compositing equation. @@ -1876,7 +1876,7 @@ public static Vector4 SubtractXor(Vector4 backdrop, Vector4 source, float opacit /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "SubtractClear" compositing equation. @@ -1902,7 +1902,7 @@ public static Vector4 SubtractClear(Vector4 backdrop, Vector4 source, float opac /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 SubtractClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -2121,7 +2121,7 @@ public static Vector4 ScreenSrc(Vector4 backdrop, Vector4 source, float opacity) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "ScreenSrcAtop" compositing equation. @@ -2148,7 +2148,7 @@ public static Vector4 ScreenSrcAtop(Vector4 backdrop, Vector4 source, float opac [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Screen(backdrop, source)); } @@ -2178,7 +2178,7 @@ public static Vector4 ScreenSrcOver(Vector4 backdrop, Vector4 source, float opac [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Screen(backdrop, source)); } @@ -2207,7 +2207,7 @@ public static Vector4 ScreenSrcIn(Vector4 backdrop, Vector4 source, float opacit /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "ScreenSrcOut" compositing equation. @@ -2233,7 +2233,7 @@ public static Vector4 ScreenSrcOut(Vector4 backdrop, Vector4 source, float opaci /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "ScreenDest" compositing equation. @@ -2286,7 +2286,7 @@ public static Vector4 ScreenDestAtop(Vector4 backdrop, Vector4 source, float opa [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Screen(source, backdrop)); } @@ -2316,7 +2316,7 @@ public static Vector4 ScreenDestOver(Vector4 backdrop, Vector4 source, float opa [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Screen(source, backdrop)); } @@ -2345,7 +2345,7 @@ public static Vector4 ScreenDestIn(Vector4 backdrop, Vector4 source, float opaci /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "ScreenDestOut" compositing equation. @@ -2371,7 +2371,7 @@ public static Vector4 ScreenDestOut(Vector4 backdrop, Vector4 source, float opac /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "ScreenXor" compositing equation. @@ -2397,7 +2397,7 @@ public static Vector4 ScreenXor(Vector4 backdrop, Vector4 source, float opacity) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "ScreenClear" compositing equation. @@ -2423,7 +2423,7 @@ public static Vector4 ScreenClear(Vector4 backdrop, Vector4 source, float opacit /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 ScreenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -2642,7 +2642,7 @@ public static Vector4 DarkenSrc(Vector4 backdrop, Vector4 source, float opacity) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "DarkenSrcAtop" compositing equation. @@ -2669,7 +2669,7 @@ public static Vector4 DarkenSrcAtop(Vector4 backdrop, Vector4 source, float opac [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Darken(backdrop, source)); } @@ -2699,7 +2699,7 @@ public static Vector4 DarkenSrcOver(Vector4 backdrop, Vector4 source, float opac [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Darken(backdrop, source)); } @@ -2728,7 +2728,7 @@ public static Vector4 DarkenSrcIn(Vector4 backdrop, Vector4 source, float opacit /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "DarkenSrcOut" compositing equation. @@ -2754,7 +2754,7 @@ public static Vector4 DarkenSrcOut(Vector4 backdrop, Vector4 source, float opaci /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "DarkenDest" compositing equation. @@ -2807,7 +2807,7 @@ public static Vector4 DarkenDestAtop(Vector4 backdrop, Vector4 source, float opa [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Darken(source, backdrop)); } @@ -2837,7 +2837,7 @@ public static Vector4 DarkenDestOver(Vector4 backdrop, Vector4 source, float opa [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Darken(source, backdrop)); } @@ -2866,7 +2866,7 @@ public static Vector4 DarkenDestIn(Vector4 backdrop, Vector4 source, float opaci /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "DarkenDestOut" compositing equation. @@ -2892,7 +2892,7 @@ public static Vector4 DarkenDestOut(Vector4 backdrop, Vector4 source, float opac /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "DarkenXor" compositing equation. @@ -2918,7 +2918,7 @@ public static Vector4 DarkenXor(Vector4 backdrop, Vector4 source, float opacity) /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "DarkenClear" compositing equation. @@ -2944,7 +2944,7 @@ public static Vector4 DarkenClear(Vector4 backdrop, Vector4 source, float opacit /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 DarkenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -3163,7 +3163,7 @@ public static Vector4 LightenSrc(Vector4 backdrop, Vector4 source, float opacity /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "LightenSrcAtop" compositing equation. @@ -3190,7 +3190,7 @@ public static Vector4 LightenSrcAtop(Vector4 backdrop, Vector4 source, float opa [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Lighten(backdrop, source)); } @@ -3220,7 +3220,7 @@ public static Vector4 LightenSrcOver(Vector4 backdrop, Vector4 source, float opa [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Lighten(backdrop, source)); } @@ -3249,7 +3249,7 @@ public static Vector4 LightenSrcIn(Vector4 backdrop, Vector4 source, float opaci /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "LightenSrcOut" compositing equation. @@ -3275,7 +3275,7 @@ public static Vector4 LightenSrcOut(Vector4 backdrop, Vector4 source, float opac /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "LightenDest" compositing equation. @@ -3328,7 +3328,7 @@ public static Vector4 LightenDestAtop(Vector4 backdrop, Vector4 source, float op [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Lighten(source, backdrop)); } @@ -3358,7 +3358,7 @@ public static Vector4 LightenDestOver(Vector4 backdrop, Vector4 source, float op [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Lighten(source, backdrop)); } @@ -3387,7 +3387,7 @@ public static Vector4 LightenDestIn(Vector4 backdrop, Vector4 source, float opac /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "LightenDestOut" compositing equation. @@ -3413,7 +3413,7 @@ public static Vector4 LightenDestOut(Vector4 backdrop, Vector4 source, float opa /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "LightenXor" compositing equation. @@ -3439,7 +3439,7 @@ public static Vector4 LightenXor(Vector4 backdrop, Vector4 source, float opacity /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "LightenClear" compositing equation. @@ -3465,7 +3465,7 @@ public static Vector4 LightenClear(Vector4 backdrop, Vector4 source, float opaci /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 LightenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -3684,7 +3684,7 @@ public static Vector4 OverlaySrc(Vector4 backdrop, Vector4 source, float opacity /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlaySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "OverlaySrcAtop" compositing equation. @@ -3711,7 +3711,7 @@ public static Vector4 OverlaySrcAtop(Vector4 backdrop, Vector4 source, float opa [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlaySrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, Overlay(backdrop, source)); } @@ -3741,7 +3741,7 @@ public static Vector4 OverlaySrcOver(Vector4 backdrop, Vector4 source, float opa [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlaySrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, Overlay(backdrop, source)); } @@ -3770,7 +3770,7 @@ public static Vector4 OverlaySrcIn(Vector4 backdrop, Vector4 source, float opaci /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlaySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "OverlaySrcOut" compositing equation. @@ -3796,7 +3796,7 @@ public static Vector4 OverlaySrcOut(Vector4 backdrop, Vector4 source, float opac /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlaySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "OverlayDest" compositing equation. @@ -3849,7 +3849,7 @@ public static Vector4 OverlayDestAtop(Vector4 backdrop, Vector4 source, float op [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlayDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, Overlay(source, backdrop)); } @@ -3879,7 +3879,7 @@ public static Vector4 OverlayDestOver(Vector4 backdrop, Vector4 source, float op [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlayDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, Overlay(source, backdrop)); } @@ -3908,7 +3908,7 @@ public static Vector4 OverlayDestIn(Vector4 backdrop, Vector4 source, float opac /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlayDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "OverlayDestOut" compositing equation. @@ -3934,7 +3934,7 @@ public static Vector4 OverlayDestOut(Vector4 backdrop, Vector4 source, float opa /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlayDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "OverlayXor" compositing equation. @@ -3960,7 +3960,7 @@ public static Vector4 OverlayXor(Vector4 backdrop, Vector4 source, float opacity /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlayXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "OverlayClear" compositing equation. @@ -3986,7 +3986,7 @@ public static Vector4 OverlayClear(Vector4 backdrop, Vector4 source, float opaci /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 OverlayClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// @@ -4205,7 +4205,7 @@ public static Vector4 HardLightSrc(Vector4 backdrop, Vector4 source, float opaci /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "HardLightSrcAtop" compositing equation. @@ -4232,7 +4232,7 @@ public static Vector4 HardLightSrcAtop(Vector4 backdrop, Vector4 source, float o [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightSrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, HardLight(backdrop, source)); } @@ -4262,7 +4262,7 @@ public static Vector4 HardLightSrcOver(Vector4 backdrop, Vector4 source, float o [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightSrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, HardLight(backdrop, source)); } @@ -4291,7 +4291,7 @@ public static Vector4 HardLightSrcIn(Vector4 backdrop, Vector4 source, float opa /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "HardLightSrcOut" compositing equation. @@ -4317,7 +4317,7 @@ public static Vector4 HardLightSrcOut(Vector4 backdrop, Vector4 source, float op /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "HardLightDest" compositing equation. @@ -4370,7 +4370,7 @@ public static Vector4 HardLightDestAtop(Vector4 backdrop, Vector4 source, float [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, HardLight(source, backdrop)); } @@ -4400,7 +4400,7 @@ public static Vector4 HardLightDestOver(Vector4 backdrop, Vector4 source, float [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightDestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, HardLight(source, backdrop)); } @@ -4429,7 +4429,7 @@ public static Vector4 HardLightDestIn(Vector4 backdrop, Vector4 source, float op /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "HardLightDestOut" compositing equation. @@ -4455,7 +4455,7 @@ public static Vector4 HardLightDestOut(Vector4 backdrop, Vector4 source, float o /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "HardLightXor" compositing equation. @@ -4481,7 +4481,7 @@ public static Vector4 HardLightXor(Vector4 backdrop, Vector4 source, float opaci /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightXor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "HardLightClear" compositing equation. @@ -4507,7 +4507,7 @@ public static Vector4 HardLightClear(Vector4 backdrop, Vector4 source, float opa /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 HardLightClear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt index 150adb33a8..83bc055eff 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt @@ -47,7 +47,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>Src(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + => Avx.Blend(source, source * opacity, BlendAlphaControl); /// /// Returns the result of the "<#=blender#>SrcAtop" compositing equation. @@ -74,7 +74,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>SrcAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(backdrop, source, <#=blender#>(backdrop, source)); } @@ -104,7 +104,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>SrcOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(backdrop, source, <#=blender#>(backdrop, source)); } @@ -133,7 +133,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>SrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "<#=blender#>SrcOut" compositing equation. @@ -159,7 +159,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>SrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "<#=blender#>Dest" compositing equation. @@ -212,7 +212,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>DestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Atop(source, backdrop, <#=blender#>(source, backdrop)); } @@ -242,7 +242,7 @@ internal static partial class PorterDuffFunctions [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>DestOver(Vector256 backdrop, Vector256 source, Vector256 opacity) { - source = Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl); + source = Avx.Blend(source, source * opacity, BlendAlphaControl); return Over(source, backdrop, <#=blender#>(source, backdrop)); } @@ -271,7 +271,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>DestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "<#=blender#>DestOut" compositing equation. @@ -297,7 +297,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>DestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Out(Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl), backdrop); + => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); /// /// Returns the result of the "<#=blender#>Xor" compositing equation. @@ -323,7 +323,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>Xor(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Xor(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); /// /// Returns the result of the "<#=blender#>Clear" compositing equation. @@ -349,7 +349,7 @@ internal static partial class PorterDuffFunctions /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] public static Vector256 <#=blender#>Clear(Vector256 backdrop, Vector256 source, Vector256 opacity) - => Clear(backdrop, Avx.Blend(source, Avx.Multiply(source, opacity), BlendAlphaControl)); + => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); <#} #> diff --git a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs index d3671abd47..be3e9ccd5d 100644 --- a/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs +++ b/tests/ImageSharp.Tests/TestUtilities/FeatureTesting/FeatureTestRunner.cs @@ -455,6 +455,7 @@ public enum HwIntrinsics : long DisableVAES = 1L << 17, DisableWAITPKG = 1L << 18, DisableX86Serialize = 1 << 19, + // Arm64 DisableArm64Aes = 1L << 20, DisableArm64Atomics = 1L << 21, @@ -466,6 +467,7 @@ public enum HwIntrinsics : long DisableArm64Sha256 = 1L << 27, DisableArm64Sve = 1L << 28, DisableArm64Sve2 = 1L << 29, + // RISC-V64 DisableRiscV64Zba = 1L << 30, DisableRiscV64Zbb = 1L << 31, From fd688db0eb1c011a2b41e879c6b83ba843b9136c Mon Sep 17 00:00:00 2001 From: James Jackson-South Date: Wed, 8 Apr 2026 15:32:55 +1000 Subject: [PATCH 3/3] Complete implementation and add tests/benchmark --- src/ImageSharp/Common/Helpers/Numerics.cs | 14 + .../Common/Helpers/SimdUtils.HwIntrinsics.cs | 22 - .../Common/Helpers/Vector512Utilities.cs | 15 + .../DefaultPixelBlenders.Generated.cs | 17056 +++++++++++++++- .../DefaultPixelBlenders.Generated.tt | 156 +- .../PorterDuffFunctions.Generated.cs | 1370 +- .../PorterDuffFunctions.Generated.tt | 150 + .../PixelBlenders/PorterDuffFunctions.cs | 242 + .../PorterDuffBulkVsSingleVector.cs | 23 +- .../PorterDuffCompositorTests.cs | 2 +- .../PixelBlenders/PorterDuffFunctionsTests.cs | 152 +- .../PorterDuffFunctionsTestsTPixel.cs | 108 +- .../TestUtilities/ApproximateFloatComparer.cs | 18 +- 13 files changed, 18723 insertions(+), 605 deletions(-) diff --git a/src/ImageSharp/Common/Helpers/Numerics.cs b/src/ImageSharp/Common/Helpers/Numerics.cs index 513eb7ab19..04ed48e210 100644 --- a/src/ImageSharp/Common/Helpers/Numerics.cs +++ b/src/ImageSharp/Common/Helpers/Numerics.cs @@ -643,6 +643,20 @@ public static Vector256 UnPremultiply(Vector256 source, Vector256< return Avx.Blend(result, alpha, BlendAlphaControl); } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 UnPremultiply(Vector512 source, Vector512 alpha) + { + // Check if alpha is zero to avoid division by zero + Vector512 zeroMask = Vector512.Equals(alpha, Vector512.Zero); + + // Divide source by alpha if alpha is nonzero, otherwise set all components to match the source value + Vector512 result = Vector512.ConditionalSelect(zeroMask, source, source / alpha); + + // Blend the result with the alpha vector to ensure that the alpha component is unchanged + Vector512 alphaMask = Vector512.Create(0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1).AsSingle(); + return Vector512.ConditionalSelect(alphaMask, alpha, result); + } + /// /// Permutes the given vector return a new instance with all the values set to . /// diff --git a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs index 154f0b5e22..022056deb0 100644 --- a/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs +++ b/src/ImageSharp/Common/Helpers/SimdUtils.HwIntrinsics.cs @@ -601,28 +601,6 @@ private static void Shuffle4Slice3( } } - /// - /// Performs a multiplication and a negated addition of the . - /// - /// ret = va - (vm0 * vm1) - /// The vector to add to the negated intermediate result. - /// The first vector to multiply. - /// The second vector to multiply. - /// The . - [MethodImpl(InliningOptions.ShortMethod)] - public static Vector256 MultiplyAddNegated( - Vector256 va, - Vector256 vm0, - Vector256 vm1) - { - if (Fma.IsSupported) - { - return Fma.MultiplyAddNegated(vm0, vm1, va); - } - - return Avx.Subtract(va, Avx.Multiply(vm0, vm1)); - } - /// /// Blend packed 8-bit integers from and using . /// The high bit of each corresponding byte determines the selection. diff --git a/src/ImageSharp/Common/Helpers/Vector512Utilities.cs b/src/ImageSharp/Common/Helpers/Vector512Utilities.cs index 03ee4626cd..82a20158ae 100644 --- a/src/ImageSharp/Common/Helpers/Vector512Utilities.cs +++ b/src/ImageSharp/Common/Helpers/Vector512Utilities.cs @@ -87,6 +87,21 @@ public static Vector512 MultiplyAdd( Vector512 vm1) => Avx512F.FusedMultiplyAdd(vm0, vm1, va); + /// + /// Performs a multiplication and a negated addition of the . + /// + /// ret = va - (vm0 * vm1) + /// The vector to add to the negated intermediate result. + /// The first vector to multiply. + /// The second vector to multiply. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyAddNegated( + Vector512 va, + Vector512 vm0, + Vector512 vm1) + => Avx512F.FusedMultiplyAddNegated(vm0, vm1, va); + /// /// Restricts a vector between a minimum and a maximum value. /// diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs index 7cd9cc57ad..883693031e 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.cs @@ -46,7 +46,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -85,7 +112,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -121,7 +178,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -168,7 +269,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -233,7 +381,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -272,7 +447,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -308,7 +513,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -355,7 +604,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -420,7 +716,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -459,7 +782,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -495,7 +848,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -542,7 +939,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -607,7 +1051,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -646,7 +1117,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -682,7 +1183,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -729,7 +1274,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -794,15 +1386,15 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) { - // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 - ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); - ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 2u); + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); - ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); - ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); - Vector256 opacity = Vector256.Create(amount); + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) { @@ -812,9 +1404,36 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + { + // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 + ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector256 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 2u); + + ref Vector256 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector256 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector256 opacity = Vector256.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + if (Numerics.Modulo2(destination.Length) != 0) + { + // Vector4 fits neatly in pairs. Any overlap has to be equal to 1. int i = destination.Length - 1; destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], amount); } @@ -833,7 +1452,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -869,7 +1518,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -916,7 +1609,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -981,7 +1721,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1020,7 +1787,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1056,7 +1853,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1103,7 +1944,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1168,7 +2056,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1207,7 +2122,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1243,7 +2188,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1290,7 +2279,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1355,7 +2391,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1394,7 +2457,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1430,7 +2523,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1477,7 +2614,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1542,7 +2726,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1581,7 +2792,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1617,7 +2858,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1664,7 +2949,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrc(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrc(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1729,7 +3061,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1768,7 +3127,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1804,7 +3193,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1851,7 +3284,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1916,7 +3396,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1955,7 +3462,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -1991,7 +3528,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2038,7 +3619,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2103,7 +3731,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2142,7 +3797,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2178,7 +3863,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2225,7 +3954,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2290,7 +4066,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2329,7 +4132,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2365,7 +4198,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2412,7 +4289,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2477,7 +4401,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2516,7 +4467,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2552,7 +4533,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2599,7 +4624,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2664,7 +4736,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2703,7 +4802,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2739,7 +4868,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2786,7 +4959,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2851,7 +5071,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2890,7 +5137,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2926,7 +5203,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -2960,20 +5281,67 @@ protected override void BlendFunction(Span destination, ReadOnlySpan + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) { - destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3038,7 +5406,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3077,7 +5472,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3113,7 +5538,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3160,7 +5629,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3225,7 +5741,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3264,7 +5807,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3300,7 +5873,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3347,7 +5964,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3412,7 +6076,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3451,7 +6142,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3487,7 +6208,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3534,7 +6299,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3599,7 +6411,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3638,7 +6477,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3674,7 +6543,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3721,7 +6634,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3786,7 +6746,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3825,7 +6812,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3861,7 +6878,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3908,7 +6969,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -3973,7 +7081,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4012,7 +7147,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4048,7 +7213,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4095,7 +7304,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4160,7 +7416,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4199,7 +7482,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4235,7 +7548,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4282,7 +7639,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4347,7 +7751,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4386,7 +7817,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4422,7 +7883,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4461,15 +7966,62 @@ protected override void BlendFunction(Span destination, ReadOnlySpan + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4534,7 +8086,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4573,7 +8152,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4609,7 +8218,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4656,7 +8309,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4721,7 +8421,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4760,7 +8487,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4796,7 +8553,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4843,7 +8644,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4908,7 +8756,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4947,7 +8822,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -4983,7 +8888,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5030,7 +8979,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5095,7 +9091,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5134,7 +9157,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5170,7 +9223,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5217,7 +9314,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5282,7 +9426,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5321,7 +9492,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5357,7 +9558,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5404,7 +9649,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5469,7 +9761,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5508,7 +9827,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5544,7 +9893,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5591,7 +9984,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5656,7 +10096,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5695,7 +10162,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5731,7 +10228,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5778,7 +10319,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5843,7 +10431,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5882,7 +10497,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5918,7 +10563,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -5952,20 +10641,67 @@ protected override void BlendFunction(Span destination, ReadOnlySpan + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) { - destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6030,7 +10766,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6069,7 +10832,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6105,7 +10898,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6152,7 +10989,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6217,7 +11101,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6256,7 +11167,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6292,7 +11233,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6339,7 +11324,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6404,7 +11436,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6443,7 +11502,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6479,7 +11568,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6526,7 +11659,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6591,7 +11771,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6630,7 +11837,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6666,7 +11903,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6713,7 +11994,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6778,7 +12106,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6817,7 +12172,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6853,7 +12238,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6900,7 +12329,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalSrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -6965,7 +12441,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7004,7 +12507,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7040,7 +12573,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7087,7 +12664,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplySrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7152,7 +12776,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7191,7 +12842,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7227,7 +12908,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7274,7 +12999,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddSrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7339,7 +13111,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7378,7 +13177,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7414,7 +13243,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7453,15 +13326,62 @@ protected override void BlendFunction(Span destination, ReadOnlySpan + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractSrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7526,7 +13446,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7565,7 +13512,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7601,7 +13578,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7648,7 +13669,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenSrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7713,7 +13781,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7752,7 +13847,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7788,7 +13913,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7835,7 +14004,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenSrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7900,7 +14116,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7939,7 +14182,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -7975,7 +14248,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8022,7 +14339,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenSrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8087,7 +14451,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8126,7 +14517,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8162,7 +14583,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8209,7 +14674,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlaySrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlaySrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8274,7 +14786,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8313,7 +14852,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8349,7 +14918,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8396,7 +15009,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightSrcOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightSrcOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8461,7 +15121,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8500,7 +15187,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8536,7 +15253,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8583,7 +15344,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8648,7 +15456,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8687,7 +15522,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8723,7 +15588,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8770,7 +15679,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8835,7 +15791,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8874,7 +15857,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8910,7 +15923,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -8944,20 +16001,67 @@ protected override void BlendFunction(Span destination, ReadOnlySpan + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) { - destination[i] = PorterDuffFunctions.AddDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9022,7 +16126,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9061,7 +16192,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9097,7 +16258,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9144,7 +16349,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9209,7 +16461,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9248,7 +16527,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9284,7 +16593,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9331,7 +16684,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9396,7 +16796,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9435,7 +16862,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9471,7 +16928,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9518,7 +17019,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9583,7 +17131,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9622,7 +17197,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9658,7 +17263,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9705,7 +17354,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9770,7 +17466,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9809,7 +17532,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9845,7 +17598,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9892,7 +17689,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9957,7 +17801,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -9996,7 +17867,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10032,7 +17933,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10079,7 +18024,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDest(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDest(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10144,7 +18136,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10183,7 +18202,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10219,7 +18268,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10266,7 +18359,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10331,7 +18471,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10370,7 +18537,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10406,7 +18603,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10445,15 +18686,62 @@ protected override void BlendFunction(Span destination, ReadOnlySpan + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10518,7 +18806,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10557,7 +18872,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10593,7 +18938,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10640,7 +19029,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10705,7 +19141,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10744,7 +19207,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10780,7 +19273,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10827,7 +19364,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10892,7 +19476,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10931,7 +19542,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -10967,7 +19608,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11014,7 +19699,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11079,7 +19811,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11118,7 +19877,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11154,7 +19943,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11201,7 +20034,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11266,7 +20146,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11305,7 +20212,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11341,7 +20278,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11388,7 +20369,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11453,7 +20481,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11492,7 +20547,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11528,7 +20613,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11575,7 +20704,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11640,7 +20816,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11679,7 +20882,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11715,7 +20948,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11762,7 +21039,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestAtop(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestAtop(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11827,7 +21151,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11866,7 +21217,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11902,7 +21283,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -11936,20 +21361,67 @@ protected override void BlendFunction(Span destination, ReadOnlySpan + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) { - destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12014,7 +21486,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12053,7 +21552,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12089,7 +21618,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12136,7 +21709,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12201,7 +21821,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12240,7 +21887,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12276,7 +21953,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12323,7 +22044,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12388,7 +22156,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12427,7 +22222,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12463,7 +22288,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12510,7 +22379,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12575,7 +22491,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12614,7 +22557,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12650,7 +22623,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12697,7 +22714,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12762,7 +22826,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12801,7 +22892,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12837,7 +22958,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12884,7 +23049,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12949,7 +23161,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -12988,7 +23227,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13024,7 +23293,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13071,7 +23384,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13136,7 +23496,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13175,7 +23562,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13211,7 +23628,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13258,7 +23719,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13323,7 +23831,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13362,7 +23897,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13398,7 +23963,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13437,15 +24046,62 @@ protected override void BlendFunction(Span destination, ReadOnlySpan + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestOver(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOver(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13510,7 +24166,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13549,7 +24232,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13585,7 +24298,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13632,7 +24389,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13697,7 +24501,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13736,7 +24567,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13772,7 +24633,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13819,7 +24724,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13884,7 +24836,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13923,7 +24902,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -13959,7 +24968,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14006,7 +25059,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14071,7 +25171,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14110,7 +25237,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14146,7 +25303,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14193,7 +25394,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14258,7 +25506,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14297,7 +25572,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14333,7 +25638,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14380,7 +25729,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14445,7 +25841,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14484,7 +25907,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14520,7 +25973,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14567,7 +26064,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14632,7 +26176,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14671,7 +26242,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14707,7 +26308,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14754,7 +26399,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14819,7 +26511,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14858,7 +26577,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14894,7 +26643,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -14928,20 +26721,67 @@ protected override void BlendFunction(Span destination, ReadOnlySpan + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) { - destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15006,7 +26846,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15045,7 +26912,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15081,7 +26978,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15128,7 +27069,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestIn(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestIn(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15193,7 +27181,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15232,7 +27247,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15268,7 +27313,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15315,7 +27404,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15380,7 +27516,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15419,7 +27582,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15455,7 +27648,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15502,7 +27739,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15567,7 +27851,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15606,7 +27917,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15642,7 +27983,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15689,7 +28074,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15754,7 +28186,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15793,7 +28252,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15829,7 +28318,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15876,7 +28409,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15941,7 +28521,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -15980,7 +28587,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16016,7 +28653,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16063,7 +28744,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16128,7 +28856,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16167,7 +28922,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16203,7 +28988,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16250,7 +29079,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16315,7 +29191,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16354,7 +29257,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16390,7 +29323,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16429,15 +29406,62 @@ protected override void BlendFunction(Span destination, ReadOnlySpan + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16502,7 +29526,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16541,7 +29592,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16577,7 +29658,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16624,7 +29749,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16689,7 +29861,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16728,7 +29927,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16764,7 +29993,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16811,7 +30084,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightDestOut(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightDestOut(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16876,7 +30196,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16915,7 +30262,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16951,7 +30328,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -16998,7 +30419,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17063,7 +30531,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17102,7 +30597,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17138,7 +30663,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17185,7 +30754,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17250,7 +30866,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17289,7 +30932,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17325,7 +30998,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17372,7 +31089,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17437,7 +31201,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17476,7 +31267,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17512,7 +31333,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17559,7 +31424,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17624,7 +31536,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17663,7 +31602,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17699,7 +31668,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17746,7 +31759,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17811,7 +31871,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17850,7 +31937,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17886,7 +32003,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17920,20 +32081,67 @@ protected override void BlendFunction(Span destination, ReadOnlySpan + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) { - destination[i] = PorterDuffFunctions.DarkenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -17998,7 +32206,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18037,7 +32272,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18073,7 +32338,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18120,7 +32429,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18185,7 +32541,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18224,7 +32607,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18260,7 +32673,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18307,7 +32764,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18372,7 +32876,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18411,7 +32942,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18447,7 +33008,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18494,7 +33099,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightClear(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightClear(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18559,7 +33211,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18598,7 +33277,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18634,7 +33343,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18681,7 +33434,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.NormalXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.NormalXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18746,7 +33546,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18785,7 +33612,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18821,7 +33678,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18868,7 +33769,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.MultiplyXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.MultiplyXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18933,7 +33881,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -18972,7 +33947,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19008,7 +34013,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19055,7 +34104,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.AddXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.AddXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19120,7 +34216,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19159,7 +34282,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19195,7 +34348,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19242,7 +34439,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.SubtractXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.SubtractXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19307,7 +34551,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19346,7 +34617,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19382,7 +34683,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19421,15 +34766,62 @@ protected override void BlendFunction(Span destination, ReadOnlySpan + protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) + { + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.ScreenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.ScreenXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } } } - } - - /// - protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) - { - if (Avx2.IsSupported && destination.Length >= 2) + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19494,7 +34886,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19533,7 +34952,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19569,7 +35018,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19616,7 +35109,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.DarkenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.DarkenXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19681,7 +35221,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19720,7 +35287,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19756,7 +35353,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19803,7 +35444,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.LightenXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.LightenXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19868,7 +35556,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19907,7 +35622,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19943,7 +35688,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -19990,7 +35779,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.OverlayXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.OverlayXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -20055,7 +35891,34 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -20094,7 +35957,37 @@ protected override void BlendFunction(Span destination, ReadOnlySpan= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -20130,7 +36023,51 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -20177,7 +36114,54 @@ protected override void BlendFunction(Span destination, ReadOnlySpan protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.HardLightXor(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.HardLightXor(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt index 3b885826b8..c2439c24cc 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/DefaultPixelBlenders.Generated.tt @@ -89,7 +89,34 @@ var blenders = new []{ { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -128,7 +155,37 @@ var blenders = new []{ { amount = Numerics.Clamp(amount, 0, 1); - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 opacity = Vector512.Create(amount); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source, amount); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -164,7 +221,51 @@ var blenders = new []{ /// protected override void BlendFunction(Span destination, ReadOnlySpan background, ReadOnlySpan source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref Vector512 sourceBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(source)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + sourceBase = ref Unsafe.Add(ref sourceBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source[i], Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); @@ -211,7 +312,54 @@ var blenders = new []{ /// protected override void BlendFunction(Span destination, ReadOnlySpan background, Vector4 source, ReadOnlySpan amount) { - if (Avx2.IsSupported && destination.Length >= 2) + if (Avx512F.IsSupported && destination.Length >= 4) + { + // Divide by 4 as 4 elements per Vector4 and 16 per Vector512 + ref Vector512 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); + ref Vector512 destinationLast = ref Unsafe.Add(ref destinationBase, (uint)destination.Length / 4u); + + ref Vector512 backgroundBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(background)); + ref float amountBase = ref MemoryMarshal.GetReference(amount); + + Vector512 sourceBase = Vector512.Create( + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W, + source.X, source.Y, source.Z, source.W); + Vector512 vOne = Vector512.Create(1F); + + while (Unsafe.IsAddressLessThan(ref destinationBase, ref destinationLast)) + { + float amount0 = amountBase; + float amount1 = Unsafe.Add(ref amountBase, 1); + float amount2 = Unsafe.Add(ref amountBase, 2); + float amount3 = Unsafe.Add(ref amountBase, 3); + + // We need to create a Vector512 containing the current four amount values + // taking up each quarter of the Vector512 and then clamp them. + Vector512 opacity = Vector512.Create( + amount0, amount0, amount0, amount0, + amount1, amount1, amount1, amount1, + amount2, amount2, amount2, amount2, + amount3, amount3, amount3, amount3); + opacity = Vector512.Min(Vector512.Max(Vector512.Zero, opacity), vOne); + + destinationBase = PorterDuffFunctions.<#=blender_composer#>(backgroundBase, sourceBase, opacity); + destinationBase = ref Unsafe.Add(ref destinationBase, 1); + backgroundBase = ref Unsafe.Add(ref backgroundBase, 1); + amountBase = ref Unsafe.Add(ref amountBase, 4); + } + + int remainder = Numerics.Modulo4(destination.Length); + if (remainder != 0) + { + for (int i = destination.Length - remainder; i < destination.Length; i++) + { + destination[i] = PorterDuffFunctions.<#=blender_composer#>(background[i], source, Numerics.Clamp(amount[i], 0, 1F)); + } + } + } + else if (Avx2.IsSupported && destination.Length >= 2) { // Divide by 2 as 4 elements per Vector4 and 8 per Vector256 ref Vector256 destinationBase = ref Unsafe.As>(ref MemoryMarshal.GetReference(destination)); diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs index f0635230ca..d32966c24e 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.cs @@ -39,6 +39,17 @@ public static Vector4 NormalSrc(Vector4 backdrop, Vector4 source, float opacity) public static Vector256 NormalSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "NormalSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalSrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "NormalSrcAtop" compositing equation. /// @@ -69,6 +80,21 @@ public static Vector256 NormalSrcAtop(Vector256 backdrop, Vector25 return Atop(backdrop, source, Normal(backdrop, source)); } + /// + /// Returns the result of the "NormalSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalSrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Normal(backdrop, source)); + } + /// /// Returns the result of the "NormalSrcOver" compositing equation. /// @@ -99,6 +125,21 @@ public static Vector256 NormalSrcOver(Vector256 backdrop, Vector25 return Over(backdrop, source, Normal(backdrop, source)); } + /// + /// Returns the result of the "NormalSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalSrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, Normal(backdrop, source)); + } + /// /// Returns the result of the "NormalSrcIn" compositing equation. /// @@ -125,6 +166,17 @@ public static Vector4 NormalSrcIn(Vector4 backdrop, Vector4 source, float opacit public static Vector256 NormalSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "NormalSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalSrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "NormalSrcOut" compositing equation. /// @@ -151,6 +203,17 @@ public static Vector4 NormalSrcOut(Vector4 backdrop, Vector4 source, float opaci public static Vector256 NormalSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "NormalSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalSrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "NormalDest" compositing equation. /// @@ -177,6 +240,19 @@ public static Vector256 NormalDest(Vector256 backdrop, Vector256 + /// Returns the result of the "NormalDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "NormalDestAtop" compositing equation. /// @@ -207,6 +283,21 @@ public static Vector256 NormalDestAtop(Vector256 backdrop, Vector2 return Atop(source, backdrop, Normal(source, backdrop)); } + /// + /// Returns the result of the "NormalDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, Normal(source, backdrop)); + } + /// /// Returns the result of the "NormalDestOver" compositing equation. /// @@ -237,6 +328,21 @@ public static Vector256 NormalDestOver(Vector256 backdrop, Vector2 return Over(source, backdrop, Normal(source, backdrop)); } + /// + /// Returns the result of the "NormalDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Normal(source, backdrop)); + } + /// /// Returns the result of the "NormalDestIn" compositing equation. /// @@ -263,6 +369,17 @@ public static Vector4 NormalDestIn(Vector4 backdrop, Vector4 source, float opaci public static Vector256 NormalDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "NormalDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "NormalDestOut" compositing equation. /// @@ -289,6 +406,17 @@ public static Vector4 NormalDestOut(Vector4 backdrop, Vector4 source, float opac public static Vector256 NormalDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "NormalDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "NormalXor" compositing equation. /// @@ -315,6 +443,17 @@ public static Vector4 NormalXor(Vector4 backdrop, Vector4 source, float opacity) public static Vector256 NormalXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "NormalXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "NormalClear" compositing equation. /// @@ -341,6 +480,17 @@ public static Vector4 NormalClear(Vector4 backdrop, Vector4 source, float opacit public static Vector256 NormalClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "NormalClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 NormalClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "NormalSrc" compositing equation. @@ -560,6 +710,17 @@ public static Vector4 MultiplySrc(Vector4 backdrop, Vector4 source, float opacit public static Vector256 MultiplySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "MultiplySrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplySrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "MultiplySrcAtop" compositing equation. /// @@ -590,6 +751,21 @@ public static Vector256 MultiplySrcAtop(Vector256 backdrop, Vector return Atop(backdrop, source, Multiply(backdrop, source)); } + /// + /// Returns the result of the "MultiplySrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplySrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Multiply(backdrop, source)); + } + /// /// Returns the result of the "MultiplySrcOver" compositing equation. /// @@ -620,6 +796,21 @@ public static Vector256 MultiplySrcOver(Vector256 backdrop, Vector return Over(backdrop, source, Multiply(backdrop, source)); } + /// + /// Returns the result of the "MultiplySrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplySrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, Multiply(backdrop, source)); + } + /// /// Returns the result of the "MultiplySrcIn" compositing equation. /// @@ -646,6 +837,17 @@ public static Vector4 MultiplySrcIn(Vector4 backdrop, Vector4 source, float opac public static Vector256 MultiplySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "MultiplySrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplySrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "MultiplySrcOut" compositing equation. /// @@ -672,6 +874,17 @@ public static Vector4 MultiplySrcOut(Vector4 backdrop, Vector4 source, float opa public static Vector256 MultiplySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "MultiplySrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplySrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "MultiplyDest" compositing equation. /// @@ -698,6 +911,19 @@ public static Vector256 MultiplyDest(Vector256 backdrop, Vector256 return backdrop; } + /// + /// Returns the result of the "MultiplyDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "MultiplyDestAtop" compositing equation. /// @@ -728,6 +954,21 @@ public static Vector256 MultiplyDestAtop(Vector256 backdrop, Vecto return Atop(source, backdrop, Multiply(source, backdrop)); } + /// + /// Returns the result of the "MultiplyDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, Multiply(source, backdrop)); + } + /// /// Returns the result of the "MultiplyDestOver" compositing equation. /// @@ -758,6 +999,21 @@ public static Vector256 MultiplyDestOver(Vector256 backdrop, Vecto return Over(source, backdrop, Multiply(source, backdrop)); } + /// + /// Returns the result of the "MultiplyDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Multiply(source, backdrop)); + } + /// /// Returns the result of the "MultiplyDestIn" compositing equation. /// @@ -784,6 +1040,17 @@ public static Vector4 MultiplyDestIn(Vector4 backdrop, Vector4 source, float opa public static Vector256 MultiplyDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "MultiplyDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "MultiplyDestOut" compositing equation. /// @@ -810,6 +1077,17 @@ public static Vector4 MultiplyDestOut(Vector4 backdrop, Vector4 source, float op public static Vector256 MultiplyDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "MultiplyDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "MultiplyXor" compositing equation. /// @@ -836,6 +1114,17 @@ public static Vector4 MultiplyXor(Vector4 backdrop, Vector4 source, float opacit public static Vector256 MultiplyXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "MultiplyXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "MultiplyClear" compositing equation. /// @@ -862,6 +1151,17 @@ public static Vector4 MultiplyClear(Vector4 backdrop, Vector4 source, float opac public static Vector256 MultiplyClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "MultiplyClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 MultiplyClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "MultiplySrc" compositing equation. @@ -1081,6 +1381,17 @@ public static Vector4 AddSrc(Vector4 backdrop, Vector4 source, float opacity) public static Vector256 AddSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "AddSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddSrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "AddSrcAtop" compositing equation. /// @@ -1111,6 +1422,21 @@ public static Vector256 AddSrcAtop(Vector256 backdrop, Vector256 + /// Returns the result of the "AddSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddSrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Add(backdrop, source)); + } + /// /// Returns the result of the "AddSrcOver" compositing equation. /// @@ -1142,18 +1468,33 @@ public static Vector256 AddSrcOver(Vector256 backdrop, Vector256 - /// Returns the result of the "AddSrcIn" compositing equation. + /// Returns the result of the "AddSrcOver" compositing equation. /// /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) + public static Vector512 AddSrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) { - source = Numerics.WithW(source, source * opacity); + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); - return In(backdrop, source); + return Over(backdrop, source, Add(backdrop, source)); + } + + /// + /// Returns the result of the "AddSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) + { + source = Numerics.WithW(source, source * opacity); + + return In(backdrop, source); } /// @@ -1167,6 +1508,17 @@ public static Vector4 AddSrcIn(Vector4 backdrop, Vector4 source, float opacity) public static Vector256 AddSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "AddSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddSrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "AddSrcOut" compositing equation. /// @@ -1193,6 +1545,17 @@ public static Vector4 AddSrcOut(Vector4 backdrop, Vector4 source, float opacity) public static Vector256 AddSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "AddSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddSrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "AddDest" compositing equation. /// @@ -1219,6 +1582,19 @@ public static Vector256 AddDest(Vector256 backdrop, Vector256 + /// Returns the result of the "AddDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "AddDestAtop" compositing equation. /// @@ -1249,6 +1625,21 @@ public static Vector256 AddDestAtop(Vector256 backdrop, Vector256< return Atop(source, backdrop, Add(source, backdrop)); } + /// + /// Returns the result of the "AddDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, Add(source, backdrop)); + } + /// /// Returns the result of the "AddDestOver" compositing equation. /// @@ -1279,6 +1670,21 @@ public static Vector256 AddDestOver(Vector256 backdrop, Vector256< return Over(source, backdrop, Add(source, backdrop)); } + /// + /// Returns the result of the "AddDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Add(source, backdrop)); + } + /// /// Returns the result of the "AddDestIn" compositing equation. /// @@ -1305,6 +1711,17 @@ public static Vector4 AddDestIn(Vector4 backdrop, Vector4 source, float opacity) public static Vector256 AddDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "AddDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "AddDestOut" compositing equation. /// @@ -1331,6 +1748,17 @@ public static Vector4 AddDestOut(Vector4 backdrop, Vector4 source, float opacity public static Vector256 AddDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "AddDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "AddXor" compositing equation. /// @@ -1357,6 +1785,17 @@ public static Vector4 AddXor(Vector4 backdrop, Vector4 source, float opacity) public static Vector256 AddXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "AddXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "AddClear" compositing equation. /// @@ -1383,6 +1822,17 @@ public static Vector4 AddClear(Vector4 backdrop, Vector4 source, float opacity) public static Vector256 AddClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "AddClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 AddClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "AddSrc" compositing equation. @@ -1602,6 +2052,17 @@ public static Vector4 SubtractSrc(Vector4 backdrop, Vector4 source, float opacit public static Vector256 SubtractSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "SubtractSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractSrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "SubtractSrcAtop" compositing equation. /// @@ -1632,6 +2093,21 @@ public static Vector256 SubtractSrcAtop(Vector256 backdrop, Vector return Atop(backdrop, source, Subtract(backdrop, source)); } + /// + /// Returns the result of the "SubtractSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractSrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Subtract(backdrop, source)); + } + /// /// Returns the result of the "SubtractSrcOver" compositing equation. /// @@ -1662,6 +2138,21 @@ public static Vector256 SubtractSrcOver(Vector256 backdrop, Vector return Over(backdrop, source, Subtract(backdrop, source)); } + /// + /// Returns the result of the "SubtractSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractSrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, Subtract(backdrop, source)); + } + /// /// Returns the result of the "SubtractSrcIn" compositing equation. /// @@ -1688,6 +2179,17 @@ public static Vector4 SubtractSrcIn(Vector4 backdrop, Vector4 source, float opac public static Vector256 SubtractSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "SubtractSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractSrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "SubtractSrcOut" compositing equation. /// @@ -1714,6 +2216,17 @@ public static Vector4 SubtractSrcOut(Vector4 backdrop, Vector4 source, float opa public static Vector256 SubtractSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "SubtractSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractSrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "SubtractDest" compositing equation. /// @@ -1740,6 +2253,19 @@ public static Vector256 SubtractDest(Vector256 backdrop, Vector256 return backdrop; } + /// + /// Returns the result of the "SubtractDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "SubtractDestAtop" compositing equation. /// @@ -1770,6 +2296,21 @@ public static Vector256 SubtractDestAtop(Vector256 backdrop, Vecto return Atop(source, backdrop, Subtract(source, backdrop)); } + /// + /// Returns the result of the "SubtractDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, Subtract(source, backdrop)); + } + /// /// Returns the result of the "SubtractDestOver" compositing equation. /// @@ -1800,6 +2341,21 @@ public static Vector256 SubtractDestOver(Vector256 backdrop, Vecto return Over(source, backdrop, Subtract(source, backdrop)); } + /// + /// Returns the result of the "SubtractDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Subtract(source, backdrop)); + } + /// /// Returns the result of the "SubtractDestIn" compositing equation. /// @@ -1826,6 +2382,17 @@ public static Vector4 SubtractDestIn(Vector4 backdrop, Vector4 source, float opa public static Vector256 SubtractDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "SubtractDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "SubtractDestOut" compositing equation. /// @@ -1852,6 +2419,17 @@ public static Vector4 SubtractDestOut(Vector4 backdrop, Vector4 source, float op public static Vector256 SubtractDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "SubtractDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "SubtractXor" compositing equation. /// @@ -1878,6 +2456,17 @@ public static Vector4 SubtractXor(Vector4 backdrop, Vector4 source, float opacit public static Vector256 SubtractXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "SubtractXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "SubtractClear" compositing equation. /// @@ -1904,6 +2493,17 @@ public static Vector4 SubtractClear(Vector4 backdrop, Vector4 source, float opac public static Vector256 SubtractClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "SubtractClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 SubtractClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "SubtractSrc" compositing equation. @@ -2123,6 +2723,17 @@ public static Vector4 ScreenSrc(Vector4 backdrop, Vector4 source, float opacity) public static Vector256 ScreenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "ScreenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenSrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "ScreenSrcAtop" compositing equation. /// @@ -2153,6 +2764,21 @@ public static Vector256 ScreenSrcAtop(Vector256 backdrop, Vector25 return Atop(backdrop, source, Screen(backdrop, source)); } + /// + /// Returns the result of the "ScreenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenSrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Screen(backdrop, source)); + } + /// /// Returns the result of the "ScreenSrcOver" compositing equation. /// @@ -2183,6 +2809,21 @@ public static Vector256 ScreenSrcOver(Vector256 backdrop, Vector25 return Over(backdrop, source, Screen(backdrop, source)); } + /// + /// Returns the result of the "ScreenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenSrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, Screen(backdrop, source)); + } + /// /// Returns the result of the "ScreenSrcIn" compositing equation. /// @@ -2209,6 +2850,17 @@ public static Vector4 ScreenSrcIn(Vector4 backdrop, Vector4 source, float opacit public static Vector256 ScreenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "ScreenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenSrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "ScreenSrcOut" compositing equation. /// @@ -2235,6 +2887,17 @@ public static Vector4 ScreenSrcOut(Vector4 backdrop, Vector4 source, float opaci public static Vector256 ScreenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "ScreenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenSrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "ScreenDest" compositing equation. /// @@ -2261,6 +2924,19 @@ public static Vector256 ScreenDest(Vector256 backdrop, Vector256 + /// Returns the result of the "ScreenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "ScreenDestAtop" compositing equation. /// @@ -2282,11 +2958,26 @@ public static Vector4 ScreenDestAtop(Vector4 backdrop, Vector4 source, float opa /// The backdrop vector. /// The source vector. /// The source opacity. Range 0..1 - /// The . + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector256 ScreenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + { + source = Avx.Blend(source, source * opacity, BlendAlphaControl); + + return Atop(source, backdrop, Screen(source, backdrop)); + } + + /// + /// Returns the result of the "ScreenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 ScreenDestAtop(Vector256 backdrop, Vector256 source, Vector256 opacity) + public static Vector512 ScreenDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) { - source = Avx.Blend(source, source * opacity, BlendAlphaControl); + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); return Atop(source, backdrop, Screen(source, backdrop)); } @@ -2321,6 +3012,21 @@ public static Vector256 ScreenDestOver(Vector256 backdrop, Vector2 return Over(source, backdrop, Screen(source, backdrop)); } + /// + /// Returns the result of the "ScreenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Screen(source, backdrop)); + } + /// /// Returns the result of the "ScreenDestIn" compositing equation. /// @@ -2347,6 +3053,17 @@ public static Vector4 ScreenDestIn(Vector4 backdrop, Vector4 source, float opaci public static Vector256 ScreenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "ScreenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "ScreenDestOut" compositing equation. /// @@ -2373,6 +3090,17 @@ public static Vector4 ScreenDestOut(Vector4 backdrop, Vector4 source, float opac public static Vector256 ScreenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "ScreenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "ScreenXor" compositing equation. /// @@ -2399,6 +3127,17 @@ public static Vector4 ScreenXor(Vector4 backdrop, Vector4 source, float opacity) public static Vector256 ScreenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "ScreenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "ScreenClear" compositing equation. /// @@ -2425,6 +3164,17 @@ public static Vector4 ScreenClear(Vector4 backdrop, Vector4 source, float opacit public static Vector256 ScreenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "ScreenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 ScreenClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "ScreenSrc" compositing equation. @@ -2644,6 +3394,17 @@ public static Vector4 DarkenSrc(Vector4 backdrop, Vector4 source, float opacity) public static Vector256 DarkenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "DarkenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenSrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "DarkenSrcAtop" compositing equation. /// @@ -2674,6 +3435,21 @@ public static Vector256 DarkenSrcAtop(Vector256 backdrop, Vector25 return Atop(backdrop, source, Darken(backdrop, source)); } + /// + /// Returns the result of the "DarkenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenSrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Darken(backdrop, source)); + } + /// /// Returns the result of the "DarkenSrcOver" compositing equation. /// @@ -2704,6 +3480,21 @@ public static Vector256 DarkenSrcOver(Vector256 backdrop, Vector25 return Over(backdrop, source, Darken(backdrop, source)); } + /// + /// Returns the result of the "DarkenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenSrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, Darken(backdrop, source)); + } + /// /// Returns the result of the "DarkenSrcIn" compositing equation. /// @@ -2730,6 +3521,17 @@ public static Vector4 DarkenSrcIn(Vector4 backdrop, Vector4 source, float opacit public static Vector256 DarkenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "DarkenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenSrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "DarkenSrcOut" compositing equation. /// @@ -2756,6 +3558,17 @@ public static Vector4 DarkenSrcOut(Vector4 backdrop, Vector4 source, float opaci public static Vector256 DarkenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "DarkenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenSrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "DarkenDest" compositing equation. /// @@ -2782,6 +3595,19 @@ public static Vector256 DarkenDest(Vector256 backdrop, Vector256 + /// Returns the result of the "DarkenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "DarkenDestAtop" compositing equation. /// @@ -2812,6 +3638,21 @@ public static Vector256 DarkenDestAtop(Vector256 backdrop, Vector2 return Atop(source, backdrop, Darken(source, backdrop)); } + /// + /// Returns the result of the "DarkenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, Darken(source, backdrop)); + } + /// /// Returns the result of the "DarkenDestOver" compositing equation. /// @@ -2842,6 +3683,21 @@ public static Vector256 DarkenDestOver(Vector256 backdrop, Vector2 return Over(source, backdrop, Darken(source, backdrop)); } + /// + /// Returns the result of the "DarkenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Darken(source, backdrop)); + } + /// /// Returns the result of the "DarkenDestIn" compositing equation. /// @@ -2868,6 +3724,17 @@ public static Vector4 DarkenDestIn(Vector4 backdrop, Vector4 source, float opaci public static Vector256 DarkenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "DarkenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "DarkenDestOut" compositing equation. /// @@ -2894,6 +3761,17 @@ public static Vector4 DarkenDestOut(Vector4 backdrop, Vector4 source, float opac public static Vector256 DarkenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "DarkenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "DarkenXor" compositing equation. /// @@ -2920,6 +3798,17 @@ public static Vector4 DarkenXor(Vector4 backdrop, Vector4 source, float opacity) public static Vector256 DarkenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "DarkenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "DarkenClear" compositing equation. /// @@ -2946,6 +3835,17 @@ public static Vector4 DarkenClear(Vector4 backdrop, Vector4 source, float opacit public static Vector256 DarkenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "DarkenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 DarkenClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "DarkenSrc" compositing equation. @@ -3165,6 +4065,17 @@ public static Vector4 LightenSrc(Vector4 backdrop, Vector4 source, float opacity public static Vector256 LightenSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "LightenSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenSrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "LightenSrcAtop" compositing equation. /// @@ -3195,6 +4106,21 @@ public static Vector256 LightenSrcAtop(Vector256 backdrop, Vector2 return Atop(backdrop, source, Lighten(backdrop, source)); } + /// + /// Returns the result of the "LightenSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenSrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Lighten(backdrop, source)); + } + /// /// Returns the result of the "LightenSrcOver" compositing equation. /// @@ -3225,6 +4151,21 @@ public static Vector256 LightenSrcOver(Vector256 backdrop, Vector2 return Over(backdrop, source, Lighten(backdrop, source)); } + /// + /// Returns the result of the "LightenSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenSrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, Lighten(backdrop, source)); + } + /// /// Returns the result of the "LightenSrcIn" compositing equation. /// @@ -3251,6 +4192,17 @@ public static Vector4 LightenSrcIn(Vector4 backdrop, Vector4 source, float opaci public static Vector256 LightenSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "LightenSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenSrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "LightenSrcOut" compositing equation. /// @@ -3277,6 +4229,17 @@ public static Vector4 LightenSrcOut(Vector4 backdrop, Vector4 source, float opac public static Vector256 LightenSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "LightenSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenSrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "LightenDest" compositing equation. /// @@ -3303,6 +4266,19 @@ public static Vector256 LightenDest(Vector256 backdrop, Vector256< return backdrop; } + /// + /// Returns the result of the "LightenDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "LightenDestAtop" compositing equation. /// @@ -3333,6 +4309,21 @@ public static Vector256 LightenDestAtop(Vector256 backdrop, Vector return Atop(source, backdrop, Lighten(source, backdrop)); } + /// + /// Returns the result of the "LightenDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, Lighten(source, backdrop)); + } + /// /// Returns the result of the "LightenDestOver" compositing equation. /// @@ -3363,6 +4354,21 @@ public static Vector256 LightenDestOver(Vector256 backdrop, Vector return Over(source, backdrop, Lighten(source, backdrop)); } + /// + /// Returns the result of the "LightenDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Lighten(source, backdrop)); + } + /// /// Returns the result of the "LightenDestIn" compositing equation. /// @@ -3386,8 +4392,19 @@ public static Vector4 LightenDestIn(Vector4 backdrop, Vector4 source, float opac /// The source opacity. Range 0..1 /// The . [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static Vector256 LightenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) - => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + public static Vector256 LightenDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) + => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + + /// + /// Returns the result of the "LightenDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); /// /// Returns the result of the "LightenDestOut" compositing equation. @@ -3415,6 +4432,17 @@ public static Vector4 LightenDestOut(Vector4 backdrop, Vector4 source, float opa public static Vector256 LightenDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "LightenDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "LightenXor" compositing equation. /// @@ -3441,6 +4469,17 @@ public static Vector4 LightenXor(Vector4 backdrop, Vector4 source, float opacity public static Vector256 LightenXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "LightenXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "LightenClear" compositing equation. /// @@ -3467,6 +4506,17 @@ public static Vector4 LightenClear(Vector4 backdrop, Vector4 source, float opaci public static Vector256 LightenClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "LightenClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 LightenClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "LightenSrc" compositing equation. @@ -3686,6 +4736,17 @@ public static Vector4 OverlaySrc(Vector4 backdrop, Vector4 source, float opacity public static Vector256 OverlaySrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "OverlaySrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlaySrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "OverlaySrcAtop" compositing equation. /// @@ -3716,6 +4777,21 @@ public static Vector256 OverlaySrcAtop(Vector256 backdrop, Vector2 return Atop(backdrop, source, Overlay(backdrop, source)); } + /// + /// Returns the result of the "OverlaySrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlaySrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, Overlay(backdrop, source)); + } + /// /// Returns the result of the "OverlaySrcOver" compositing equation. /// @@ -3746,6 +4822,21 @@ public static Vector256 OverlaySrcOver(Vector256 backdrop, Vector2 return Over(backdrop, source, Overlay(backdrop, source)); } + /// + /// Returns the result of the "OverlaySrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlaySrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, Overlay(backdrop, source)); + } + /// /// Returns the result of the "OverlaySrcIn" compositing equation. /// @@ -3772,6 +4863,17 @@ public static Vector4 OverlaySrcIn(Vector4 backdrop, Vector4 source, float opaci public static Vector256 OverlaySrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "OverlaySrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlaySrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "OverlaySrcOut" compositing equation. /// @@ -3798,6 +4900,17 @@ public static Vector4 OverlaySrcOut(Vector4 backdrop, Vector4 source, float opac public static Vector256 OverlaySrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "OverlaySrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlaySrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "OverlayDest" compositing equation. /// @@ -3824,6 +4937,19 @@ public static Vector256 OverlayDest(Vector256 backdrop, Vector256< return backdrop; } + /// + /// Returns the result of the "OverlayDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "OverlayDestAtop" compositing equation. /// @@ -3854,6 +4980,21 @@ public static Vector256 OverlayDestAtop(Vector256 backdrop, Vector return Atop(source, backdrop, Overlay(source, backdrop)); } + /// + /// Returns the result of the "OverlayDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, Overlay(source, backdrop)); + } + /// /// Returns the result of the "OverlayDestOver" compositing equation. /// @@ -3884,6 +5025,21 @@ public static Vector256 OverlayDestOver(Vector256 backdrop, Vector return Over(source, backdrop, Overlay(source, backdrop)); } + /// + /// Returns the result of the "OverlayDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, Overlay(source, backdrop)); + } + /// /// Returns the result of the "OverlayDestIn" compositing equation. /// @@ -3910,6 +5066,17 @@ public static Vector4 OverlayDestIn(Vector4 backdrop, Vector4 source, float opac public static Vector256 OverlayDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "OverlayDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "OverlayDestOut" compositing equation. /// @@ -3936,6 +5103,17 @@ public static Vector4 OverlayDestOut(Vector4 backdrop, Vector4 source, float opa public static Vector256 OverlayDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "OverlayDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "OverlayXor" compositing equation. /// @@ -3962,6 +5140,17 @@ public static Vector4 OverlayXor(Vector4 backdrop, Vector4 source, float opacity public static Vector256 OverlayXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "OverlayXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "OverlayClear" compositing equation. /// @@ -3988,6 +5177,17 @@ public static Vector4 OverlayClear(Vector4 backdrop, Vector4 source, float opaci public static Vector256 OverlayClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "OverlayClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "OverlaySrc" compositing equation. @@ -4207,6 +5407,17 @@ public static Vector4 HardLightSrc(Vector4 backdrop, Vector4 source, float opaci public static Vector256 HardLightSrc(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "HardLightSrc compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightSrc(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "HardLightSrcAtop" compositing equation. /// @@ -4237,6 +5448,21 @@ public static Vector256 HardLightSrcAtop(Vector256 backdrop, Vecto return Atop(backdrop, source, HardLight(backdrop, source)); } + /// + /// Returns the result of the "HardLightSrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightSrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, HardLight(backdrop, source)); + } + /// /// Returns the result of the "HardLightSrcOver" compositing equation. /// @@ -4267,6 +5493,21 @@ public static Vector256 HardLightSrcOver(Vector256 backdrop, Vecto return Over(backdrop, source, HardLight(backdrop, source)); } + /// + /// Returns the result of the "HardLightSrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightSrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, HardLight(backdrop, source)); + } + /// /// Returns the result of the "HardLightSrcIn" compositing equation. /// @@ -4293,6 +5534,17 @@ public static Vector4 HardLightSrcIn(Vector4 backdrop, Vector4 source, float opa public static Vector256 HardLightSrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "HardLightSrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightSrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "HardLightSrcOut" compositing equation. /// @@ -4319,6 +5571,17 @@ public static Vector4 HardLightSrcOut(Vector4 backdrop, Vector4 source, float op public static Vector256 HardLightSrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "HardLightSrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightSrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "HardLightDest" compositing equation. /// @@ -4345,6 +5608,19 @@ public static Vector256 HardLightDest(Vector256 backdrop, Vector25 return backdrop; } + /// + /// Returns the result of the "HardLightDest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightDest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "HardLightDestAtop" compositing equation. /// @@ -4375,6 +5651,21 @@ public static Vector256 HardLightDestAtop(Vector256 backdrop, Vect return Atop(source, backdrop, HardLight(source, backdrop)); } + /// + /// Returns the result of the "HardLightDestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightDestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, HardLight(source, backdrop)); + } + /// /// Returns the result of the "HardLightDestOver" compositing equation. /// @@ -4405,6 +5696,21 @@ public static Vector256 HardLightDestOver(Vector256 backdrop, Vect return Over(source, backdrop, HardLight(source, backdrop)); } + /// + /// Returns the result of the "HardLightDestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightDestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, HardLight(source, backdrop)); + } + /// /// Returns the result of the "HardLightDestIn" compositing equation. /// @@ -4431,6 +5737,17 @@ public static Vector4 HardLightDestIn(Vector4 backdrop, Vector4 source, float op public static Vector256 HardLightDestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "HardLightDestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightDestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "HardLightDestOut" compositing equation. /// @@ -4457,6 +5774,17 @@ public static Vector4 HardLightDestOut(Vector4 backdrop, Vector4 source, float o public static Vector256 HardLightDestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "HardLightDestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightDestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "HardLightXor" compositing equation. /// @@ -4483,6 +5811,17 @@ public static Vector4 HardLightXor(Vector4 backdrop, Vector4 source, float opaci public static Vector256 HardLightXor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "HardLightXor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightXor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "HardLightClear" compositing equation. /// @@ -4509,6 +5848,17 @@ public static Vector4 HardLightClear(Vector4 backdrop, Vector4 source, float opa public static Vector256 HardLightClear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "HardLightClear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLightClear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "HardLightSrc" compositing equation. diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt index 83bc055eff..7cb007bcae 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.Generated.tt @@ -49,6 +49,17 @@ internal static partial class PorterDuffFunctions public static Vector256 <#=blender#>Src(Vector256 backdrop, Vector256 source, Vector256 opacity) => Avx.Blend(source, source * opacity, BlendAlphaControl); + /// + /// Returns the result of the "<#=blender#>Src compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>Src(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + /// /// Returns the result of the "<#=blender#>SrcAtop" compositing equation. /// @@ -79,6 +90,21 @@ internal static partial class PorterDuffFunctions return Atop(backdrop, source, <#=blender#>(backdrop, source)); } + /// + /// Returns the result of the "<#=blender#>SrcAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>SrcAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(backdrop, source, <#=blender#>(backdrop, source)); + } + /// /// Returns the result of the "<#=blender#>SrcOver" compositing equation. /// @@ -109,6 +135,21 @@ internal static partial class PorterDuffFunctions return Over(backdrop, source, <#=blender#>(backdrop, source)); } + /// + /// Returns the result of the "<#=blender#>SrcOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>SrcOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(backdrop, source, <#=blender#>(backdrop, source)); + } + /// /// Returns the result of the "<#=blender#>SrcIn" compositing equation. /// @@ -135,6 +176,17 @@ internal static partial class PorterDuffFunctions public static Vector256 <#=blender#>SrcIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "<#=blender#>SrcIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>SrcIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "<#=blender#>SrcOut" compositing equation. /// @@ -161,6 +213,17 @@ internal static partial class PorterDuffFunctions public static Vector256 <#=blender#>SrcOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "<#=blender#>SrcOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>SrcOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "<#=blender#>Dest" compositing equation. /// @@ -187,6 +250,19 @@ internal static partial class PorterDuffFunctions return backdrop; } + /// + /// Returns the result of the "<#=blender#>Dest" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>Dest(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + return backdrop; + } + /// /// Returns the result of the "<#=blender#>DestAtop" compositing equation. /// @@ -217,6 +293,21 @@ internal static partial class PorterDuffFunctions return Atop(source, backdrop, <#=blender#>(source, backdrop)); } + /// + /// Returns the result of the "<#=blender#>DestAtop" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>DestAtop(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Atop(source, backdrop, <#=blender#>(source, backdrop)); + } + /// /// Returns the result of the "<#=blender#>DestOver" compositing equation. /// @@ -247,6 +338,21 @@ internal static partial class PorterDuffFunctions return Over(source, backdrop, <#=blender#>(source, backdrop)); } + /// + /// Returns the result of the "<#=blender#>DestOver" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>DestOver(Vector512 backdrop, Vector512 source, Vector512 opacity) + { + source = Avx512F.BlendVariable(source, source * opacity, AlphaMask512()); + + return Over(source, backdrop, <#=blender#>(source, backdrop)); + } + /// /// Returns the result of the "<#=blender#>DestIn" compositing equation. /// @@ -273,6 +379,17 @@ internal static partial class PorterDuffFunctions public static Vector256 <#=blender#>DestIn(Vector256 backdrop, Vector256 source, Vector256 opacity) => In(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "<#=blender#>DestIn" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>DestIn(Vector512 backdrop, Vector512 source, Vector512 opacity) + => In(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "<#=blender#>DestOut" compositing equation. /// @@ -299,6 +416,17 @@ internal static partial class PorterDuffFunctions public static Vector256 <#=blender#>DestOut(Vector256 backdrop, Vector256 source, Vector256 opacity) => Out(Avx.Blend(source, source * opacity, BlendAlphaControl), backdrop); + /// + /// Returns the result of the "<#=blender#>DestOut" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>DestOut(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Out(Avx512F.BlendVariable(source, source * opacity, AlphaMask512()), backdrop); + /// /// Returns the result of the "<#=blender#>Xor" compositing equation. /// @@ -325,6 +453,17 @@ internal static partial class PorterDuffFunctions public static Vector256 <#=blender#>Xor(Vector256 backdrop, Vector256 source, Vector256 opacity) => Xor(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "<#=blender#>Xor" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>Xor(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Xor(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + /// /// Returns the result of the "<#=blender#>Clear" compositing equation. /// @@ -351,6 +490,17 @@ internal static partial class PorterDuffFunctions public static Vector256 <#=blender#>Clear(Vector256 backdrop, Vector256 source, Vector256 opacity) => Clear(backdrop, Avx.Blend(source, source * opacity, BlendAlphaControl)); + /// + /// Returns the result of the "<#=blender#>Clear" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The source opacity. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 <#=blender#>Clear(Vector512 backdrop, Vector512 source, Vector512 opacity) + => Clear(backdrop, Avx512F.BlendVariable(source, source * opacity, AlphaMask512())); + <#} #> <# void GenerateGenericPixelBlender(string blender, string composer) { #> diff --git a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs index 45c4aade7b..948076fa32 100644 --- a/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs +++ b/src/ImageSharp/PixelFormats/PixelBlenders/PorterDuffFunctions.cs @@ -45,6 +45,16 @@ public static Vector4 Normal(Vector4 backdrop, Vector4 source) public static Vector256 Normal(Vector256 backdrop, Vector256 source) => source; + /// + /// Returns the result of the "Normal" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Normal(Vector512 backdrop, Vector512 source) + => source; + /// /// Returns the result of the "Multiply" compositing equation. /// @@ -65,6 +75,16 @@ public static Vector4 Multiply(Vector4 backdrop, Vector4 source) public static Vector256 Multiply(Vector256 backdrop, Vector256 source) => backdrop * source; + /// + /// Returns the result of the "Multiply" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Multiply(Vector512 backdrop, Vector512 source) + => backdrop * source; + /// /// Returns the result of the "Add" compositing equation. /// @@ -85,6 +105,16 @@ public static Vector4 Add(Vector4 backdrop, Vector4 source) public static Vector256 Add(Vector256 backdrop, Vector256 source) => Vector256.Min(Vector256.Create(1F), backdrop + source); + /// + /// Returns the result of the "Add" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Add(Vector512 backdrop, Vector512 source) + => Vector512.Min(Vector512.Create(1F), backdrop + source); + /// /// Returns the result of the "Subtract" compositing equation. /// @@ -105,6 +135,16 @@ public static Vector4 Subtract(Vector4 backdrop, Vector4 source) public static Vector256 Subtract(Vector256 backdrop, Vector256 source) => Vector256.Max(Vector256.Zero, backdrop - source); + /// + /// Returns the result of the "Subtract" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Subtract(Vector512 backdrop, Vector512 source) + => Vector512.Max(Vector512.Zero, backdrop - source); + /// /// Returns the result of the "Screen" compositing equation. /// @@ -128,6 +168,19 @@ public static Vector256 Screen(Vector256 backdrop, Vector256 + /// Returns the result of the "Screen" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Screen(Vector512 backdrop, Vector512 source) + { + Vector512 vOne = Vector512.Create(1F); + return Vector512_.MultiplyAddNegated(vOne, vOne - backdrop, vOne - source); + } + /// /// Returns the result of the "Darken" compositing equation. /// @@ -148,6 +201,16 @@ public static Vector4 Darken(Vector4 backdrop, Vector4 source) public static Vector256 Darken(Vector256 backdrop, Vector256 source) => Vector256.Min(backdrop, source); + /// + /// Returns the result of the "Darken" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Darken(Vector512 backdrop, Vector512 source) + => Vector512.Min(backdrop, source); + /// /// Returns the result of the "Lighten" compositing equation. /// @@ -167,6 +230,16 @@ public static Vector256 Darken(Vector256 backdrop, Vector256 Lighten(Vector256 backdrop, Vector256 source) => Vector256.Max(backdrop, source); + /// + /// Returns the result of the "Lighten" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Lighten(Vector512 backdrop, Vector512 source) + => Vector512.Max(backdrop, source); + /// /// Returns the result of the "Overlay" compositing equation. /// @@ -196,6 +269,19 @@ public static Vector256 Overlay(Vector256 backdrop, Vector256.Zero, BlendAlphaControl)); } + /// + /// Returns the result of the "Overlay" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Overlay(Vector512 backdrop, Vector512 source) + { + Vector512 color = OverlayValueFunction(backdrop, source); + return Vector512.Min(Vector512.Create(1F), Vector512.ConditionalSelect(AlphaMask512(), Vector512.Zero, color)); + } + /// /// Returns the result of the "HardLight" compositing equation. /// @@ -225,6 +311,19 @@ public static Vector256 HardLight(Vector256 backdrop, Vector256.Zero, BlendAlphaControl)); } + /// + /// Returns the result of the "HardLight" compositing equation. + /// + /// The backdrop vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 HardLight(Vector512 backdrop, Vector512 source) + { + Vector512 color = OverlayValueFunction(source, backdrop); + return Vector512.Min(Vector512.Create(1F), Vector512.ConditionalSelect(AlphaMask512(), Vector512.Zero, color)); + } + /// /// Helper function for Overlay and HardLight modes /// @@ -253,6 +352,24 @@ public static Vector256 OverlayValueFunction(Vector256 backdrop, V return Avx.BlendVariable(left, right, cmp); } + /// + /// Helper function for Overlay and HardLight modes + /// + /// Backdrop color element + /// Source color element + /// Overlay value + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 OverlayValueFunction(Vector512 backdrop, Vector512 source) + { + Vector512 vOne = Vector512.Create(1F); + Vector512 left = (backdrop + backdrop) * source; + + Vector512 vOneMinusSource = vOne - source; + Vector512 right = Vector512_.MultiplyAddNegated(vOne, vOneMinusSource + vOneMinusSource, vOne - backdrop); + Vector512 cmp = Avx512F.CompareGreaterThan(backdrop, Vector512.Create(.5F)); + return Vector512.ConditionalSelect(cmp, right, left); + } + /// /// Returns the result of the "Over" compositing equation. /// @@ -312,6 +429,36 @@ public static Vector256 Over(Vector256 destination, Vector256 + /// Returns the result of the "Over" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The amount to blend. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Over(Vector512 destination, Vector512 source, Vector512 blend) + { + // calculate weights + Vector512 sW = Vector512_.ShuffleNative(source, ShuffleAlphaControl); + Vector512 dW = Vector512_.ShuffleNative(destination, ShuffleAlphaControl); + + Vector512 blendW = sW * dW; + Vector512 dstW = dW - blendW; + Vector512 srcW = sW - blendW; + + // calculate final alpha + Vector512 alpha = dstW + sW; + + // calculate final color + Vector512 color = destination * dstW; + color = Vector512_.MultiplyAdd(color, source, srcW); + color = Vector512_.MultiplyAdd(color, blend, blendW); + + // unpremultiply + return Numerics.UnPremultiply(color, alpha); + } + /// /// Returns the result of the "Atop" compositing equation. /// @@ -365,6 +512,31 @@ public static Vector256 Atop(Vector256 destination, Vector256 + /// Returns the result of the "Atop" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The amount to blend. Range 0..1 + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Atop(Vector512 destination, Vector512 source, Vector512 blend) + { + // calculate final alpha + Vector512 alpha = Vector512_.ShuffleNative(destination, ShuffleAlphaControl); + + // calculate weights + Vector512 sW = Vector512_.ShuffleNative(source, ShuffleAlphaControl); + Vector512 blendW = sW * alpha; + Vector512 dstW = alpha - blendW; + + // calculate final color + Vector512 color = Vector512_.MultiplyAdd(blend * blendW, destination, dstW); + + // unpremultiply + return Numerics.UnPremultiply(color, alpha); + } + /// /// Returns the result of the "In" compositing equation. /// @@ -402,6 +574,25 @@ public static Vector256 In(Vector256 destination, Vector256 return Numerics.UnPremultiply(color, alpha); } + /// + /// Returns the result of the "In" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 In(Vector512 destination, Vector512 source) + { + // calculate alpha + Vector512 alpha = Vector512_.ShuffleNative(source * destination, ShuffleAlphaControl); + + // premultiply + Vector512 color = source * alpha; + + // unpremultiply + return Numerics.UnPremultiply(color, alpha); + } + /// /// Returns the result of the "Out" compositing equation. /// @@ -439,6 +630,25 @@ public static Vector256 Out(Vector256 destination, Vector256 + /// Returns the result of the "Out" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Out(Vector512 destination, Vector512 source) + { + // calculate alpha + Vector512 alpha = Vector512_.ShuffleNative(source * (Vector512.Create(1F) - destination), ShuffleAlphaControl); + + // premultiply + Vector512 color = source * alpha; + + // unpremultiply + return Numerics.UnPremultiply(color, alpha); + } + /// /// Returns the result of the "XOr" compositing equation. /// @@ -487,9 +697,41 @@ public static Vector256 Xor(Vector256 destination, Vector256 + /// Returns the result of the "XOr" compositing equation. + /// + /// The destination vector. + /// The source vector. + /// The . + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector512 Xor(Vector512 destination, Vector512 source) + { + // calculate weights + Vector512 sW = Vector512_.ShuffleNative(source, ShuffleAlphaControl); + Vector512 dW = Vector512_.ShuffleNative(destination, ShuffleAlphaControl); + + Vector512 vOne = Vector512.Create(1F); + Vector512 srcW = vOne - dW; + Vector512 dstW = vOne - sW; + + // calculate alpha + Vector512 alpha = Vector512_.MultiplyAdd(dW * dstW, sW, srcW); + Vector512 color = Vector512_.MultiplyAdd((dW * destination) * dstW, sW * source, srcW); + + // unpremultiply + return Numerics.UnPremultiply(color, alpha); + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector4 Clear(Vector4 backdrop, Vector4 source) => Vector4.Zero; [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector256 Clear(Vector256 backdrop, Vector256 source) => Vector256.Zero; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector512 Clear(Vector512 backdrop, Vector512 source) => Vector512.Zero; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector512 AlphaMask512() + => Vector512.Create(0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1, 0, 0, 0, -1).AsSingle(); } diff --git a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs index ecf8b125f7..fe43ce5e79 100644 --- a/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs +++ b/tests/ImageSharp.Benchmarks/PixelBlenders/PorterDuffBulkVsSingleVector.cs @@ -18,8 +18,8 @@ public class PorterDuffBulkVsSingleVector [GlobalSetup] public void Setup() { - this.backdrop = new Vector4[8 * 20]; - this.source = new Vector4[8 * 20]; + this.backdrop = new Vector4[8 * 40]; + this.source = new Vector4[8 * 40]; FillRandom(this.backdrop); FillRandom(this.source); @@ -49,7 +49,7 @@ public Vector4 OverlayValueFunction_Scalar() return result; } - [Benchmark(Description = "Avx")] + [Benchmark(Description = "Avx2")] public Vector256 OverlayValueFunction_Avx() { ref Vector256 backdrop = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.backdrop)); @@ -65,4 +65,21 @@ public Vector256 OverlayValueFunction_Avx() return result; } + + [Benchmark(Description = "Avx512")] + public Vector512 OverlayValueFunction_Avx512() + { + ref Vector512 backdrop = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.backdrop)); + ref Vector512 source = ref Unsafe.As>(ref MemoryMarshal.GetReference(this.source)); + + Vector512 result = default; + Vector512 opacity = Vector512.Create(.5F); + int count = this.backdrop.Length / 4; + for (nuint i = 0; i < (uint)count; i++) + { + result = PorterDuffFunctions.NormalSrcOver(Unsafe.Add(ref backdrop, i), Unsafe.Add(ref source, i), opacity); + } + + return result; + } } diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs index 1086afe76d..994b7d02ee 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffCompositorTests.cs @@ -59,7 +59,7 @@ TestImageProvider provider FeatureTestRunner.RunWithHwIntrinsicsFeature( RunTest, - HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX, + HwIntrinsics.AllowAll | HwIntrinsics.DisableAVX512 | HwIntrinsics.DisableAVX, provider, mode.ToString()); } diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs index 976a272ebf..0def097881 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTests.cs @@ -4,7 +4,6 @@ using System.Numerics; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; -using Castle.Components.DictionaryAdapter; using SixLabors.ImageSharp.PixelFormats.PixelBlenders; using SixLabors.ImageSharp.Tests.TestUtilities; @@ -45,6 +44,22 @@ public void NormalBlendFunction256(TestVector4 back, TestVector4 source, float a Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(NormalBlendFunctionData))] + public void NormalBlendFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.NormalSrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData MultiplyFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -77,6 +92,22 @@ public void MultiplyFunction256(TestVector4 back, TestVector4 source, float amou Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(MultiplyFunctionData))] + public void MultiplyFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.MultiplySrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData AddFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -109,6 +140,22 @@ public void AddFunction256(TestVector4 back, TestVector4 source, float amount, T Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(AddFunctionData))] + public void AddFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.AddSrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData SubtractFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(0, 0, 0, 1) }, @@ -141,6 +188,22 @@ public void SubtractFunction256(TestVector4 back, TestVector4 source, float amou Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(SubtractFunctionData))] + public void SubtractFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.SubtractSrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData ScreenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -173,6 +236,22 @@ public void ScreenFunction256(TestVector4 back, TestVector4 source, float amount Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(ScreenFunctionData))] + public void ScreenFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.ScreenSrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData DarkenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -205,6 +284,22 @@ public void DarkenFunction256(TestVector4 back, TestVector4 source, float amount Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(DarkenFunctionData))] + public void DarkenFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.DarkenSrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData LightenFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -237,6 +332,22 @@ public void LightenFunction256(TestVector4 back, TestVector4 source, float amoun Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(LightenFunctionData))] + public void LightenFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.LightenSrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData OverlayFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -269,6 +380,22 @@ public void OverlayFunction256(TestVector4 back, TestVector4 source, float amoun Assert.Equal(expected256, actual, FloatComparer); } + [Theory] + [MemberData(nameof(OverlayFunctionData))] + public void OverlayFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.OverlaySrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + public static TheoryData HardLightFunctionData { get; } = new() { { new TestVector4(1, 1, 1, 1), new TestVector4(1, 1, 1, 1), 1, new TestVector4(1, 1, 1, 1) }, @@ -300,4 +427,27 @@ public void HardLightFunction256(TestVector4 back, TestVector4 source, float amo Vector256 actual = PorterDuffFunctions.HardLightSrcOver(back256, source256, Vector256.Create(amount)); Assert.Equal(expected256, actual, FloatComparer); } + + [Theory] + [MemberData(nameof(HardLightFunctionData))] + public void HardLightFunction512(TestVector4 back, TestVector4 source, float amount, TestVector4 expected) + { + if (!Avx512F.IsSupported) + { + return; + } + + Vector512 back512 = CreateVector512(back); + Vector512 source512 = CreateVector512(source); + Vector512 expected512 = CreateVector512(expected); + Vector512 actual = PorterDuffFunctions.HardLightSrcOver(back512, source512, Vector512.Create(amount)); + Assert.Equal(expected512, actual, FloatComparer); + } + + private static Vector512 CreateVector512(TestVector4 vector) + => Vector512.Create( + vector.X, vector.Y, vector.Z, vector.W, + vector.X, vector.Y, vector.Z, vector.W, + vector.X, vector.Y, vector.Z, vector.W, + vector.X, vector.Y, vector.Z, vector.W); } diff --git a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTestsTPixel.cs b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTestsTPixel.cs index 2c97cbde07..153a9ac487 100644 --- a/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTestsTPixel.cs +++ b/tests/ImageSharp.Tests/PixelFormats/PixelBlenders/PorterDuffFunctionsTestsTPixel.cs @@ -9,12 +9,21 @@ namespace SixLabors.ImageSharp.Tests.PixelFormats.PixelBlenders; public class PorterDuffFunctionsTestsTPixel { + private const int BulkBlendCount = 4; + private static Span AsSpan(T value) where T : struct { return new Span(new[] { value }); } + private static T[] CreateFilledArray(T value) + { + T[] values = new T[BulkBlendCount]; + values.AsSpan().Fill(value); + return values; + } + public static TheoryData NormalBlendFunctionData = new() { { new TestPixel(1, 1, 1, 1), new TestPixel(1, 1, 1, 1), 1, new TestPixel(1, 1, 1, 1) }, @@ -46,9 +55,14 @@ public void NormalBlendFunctionBlender(TestPixel back, TestPixel public void NormalBlendFunctionBlenderBulk(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.NormalSrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.NormalSrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData MultiplyFunctionData = new() @@ -86,9 +100,14 @@ public void MultiplyFunctionBlender(TestPixel back, TestPixel(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.MultiplySrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.MultiplySrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData AddFunctionData = new() @@ -136,9 +155,14 @@ public void AddFunctionBlender(TestPixel back, TestPixel public void AddFunctionBlenderBulk(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.AddSrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.AddSrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData SubtractFunctionData = new() @@ -176,9 +200,14 @@ public void SubtractFunctionBlender(TestPixel back, TestPixel(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.SubtractSrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.SubtractSrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData ScreenFunctionData = new() @@ -216,9 +245,14 @@ public void ScreenFunctionBlender(TestPixel back, TestPixel(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.ScreenSrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.ScreenSrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData DarkenFunctionData = new() @@ -256,9 +290,14 @@ public void DarkenFunctionBlender(TestPixel back, TestPixel(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.DarkenSrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.DarkenSrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData LightenFunctionData = new() @@ -296,9 +335,14 @@ public void LightenFunctionBlender(TestPixel back, TestPixel(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.LightenSrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.LightenSrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData OverlayFunctionData = new() @@ -336,9 +380,14 @@ public void OverlayFunctionBlender(TestPixel back, TestPixel(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.OverlaySrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.OverlaySrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } public static TheoryData HardLightFunctionData = new() @@ -376,8 +425,13 @@ public void HardLightFunctionBlender(TestPixel back, TestPixel(TestPixel back, TestPixel source, float amount, TestPixel expected) where TPixel : unmanaged, IPixel { - Span dest = new(new TPixel[1]); - new DefaultPixelBlenders.HardLightSrcOver().Blend(this.Configuration, dest, back.AsSpan(), source.AsSpan(), AsSpan(amount)); - VectorAssert.Equal(expected.AsPixel(), dest[0], 2); + TPixel[] dest = new TPixel[BulkBlendCount]; + new DefaultPixelBlenders.HardLightSrcOver().Blend(this.Configuration, dest, CreateFilledArray(back.AsPixel()), CreateFilledArray(source.AsPixel()), CreateFilledArray(amount)); + + TPixel expectedPixel = expected.AsPixel(); + foreach (TPixel pixel in dest) + { + VectorAssert.Equal(expectedPixel, pixel, 2); + } } } diff --git a/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs b/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs index 21ac6966b8..7c45dd047c 100644 --- a/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs +++ b/tests/ImageSharp.Tests/TestUtilities/ApproximateFloatComparer.cs @@ -15,7 +15,8 @@ namespace SixLabors.ImageSharp.Tests; IEqualityComparer, IEqualityComparer, IEqualityComparer, - IEqualityComparer> + IEqualityComparer>, + IEqualityComparer> { private readonly float epsilon; @@ -78,4 +79,19 @@ public bool Equals(Vector256 x, Vector256 y) && this.Equals(x.GetElement(7), y.GetElement(7)); public int GetHashCode([DisallowNull] Vector256 obj) => obj.GetHashCode(); + + public bool Equals(Vector512 x, Vector512 y) + { + for (int i = 0; i < Vector512.Count; i++) + { + if (!this.Equals(x.GetElement(i), y.GetElement(i))) + { + return false; + } + } + + return true; + } + + public int GetHashCode([DisallowNull] Vector512 obj) => obj.GetHashCode(); }