Skip to content

Commit 6fc1cc4

Browse files
Merge pull request #3113 from SixLabors/js/parallel-cleanup
Replace parallel row iteration with sequential loops in Crop/Convolution Processors
2 parents f5238e9 + 7b28e25 commit 6fc1cc4

31 files changed

Lines changed: 88 additions & 172 deletions

File tree

src/ImageSharp/Advanced/ParallelExecutionSettings.cs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ public readonly struct ParallelExecutionSettings
2020
/// </summary>
2121
/// <param name="maxDegreeOfParallelism">
2222
/// The value used for initializing <see cref="ParallelOptions.MaxDegreeOfParallelism"/> when using TPL.
23-
/// Set to <c>-1</c> to leave the degree of parallelism unbounded.
23+
/// If set to <c>-1</c>, there is no limit on the number of concurrently running operations.
2424
/// </param>
2525
/// <param name="minimumPixelsProcessedPerTask">The value for <see cref="MinimumPixelsProcessedPerTask"/>.</param>
2626
/// <param name="memoryAllocator">The <see cref="MemoryAllocator"/>.</param>
@@ -31,7 +31,7 @@ public ParallelExecutionSettings(
3131
{
3232
// Shall be compatible with ParallelOptions.MaxDegreeOfParallelism:
3333
// https://docs.microsoft.com/en-us/dotnet/api/system.threading.tasks.paralleloptions.maxdegreeofparallelism
34-
if (maxDegreeOfParallelism == 0 || maxDegreeOfParallelism < -1)
34+
if (maxDegreeOfParallelism is 0 or < -1)
3535
{
3636
throw new ArgumentOutOfRangeException(nameof(maxDegreeOfParallelism));
3737
}
@@ -49,7 +49,7 @@ public ParallelExecutionSettings(
4949
/// </summary>
5050
/// <param name="maxDegreeOfParallelism">
5151
/// The value used for initializing <see cref="ParallelOptions.MaxDegreeOfParallelism"/> when using TPL.
52-
/// Set to <c>-1</c> to leave the degree of parallelism unbounded.
52+
/// If set to <c>-1</c>, there is no limit on the number of concurrently running operations.
5353
/// </param>
5454
/// <param name="memoryAllocator">The <see cref="MemoryAllocator"/>.</param>
5555
public ParallelExecutionSettings(int maxDegreeOfParallelism, MemoryAllocator memoryAllocator)
@@ -64,7 +64,7 @@ public ParallelExecutionSettings(int maxDegreeOfParallelism, MemoryAllocator mem
6464

6565
/// <summary>
6666
/// Gets the value used for initializing <see cref="ParallelOptions.MaxDegreeOfParallelism"/> when using TPL.
67-
/// A value of <c>-1</c> leaves the degree of parallelism unbounded.
67+
/// A value of <c>-1</c> means there is no limit on the number of concurrently running operations.
6868
/// </summary>
6969
public int MaxDegreeOfParallelism { get; }
7070

@@ -93,12 +93,10 @@ public ParallelExecutionSettings MultiplyMinimumPixelsPerTask(int multiplier)
9393
}
9494

9595
/// <summary>
96-
/// Get the default <see cref="SixLabors.ImageSharp.Advanced.ParallelExecutionSettings"/> for a <see cref="SixLabors.ImageSharp.Configuration"/>
96+
/// Get the default <see cref="ParallelExecutionSettings"/> for a <see cref="Configuration"/>
9797
/// </summary>
9898
/// <param name="configuration">The <see cref="Configuration"/>.</param>
9999
/// <returns>The <see cref="ParallelExecutionSettings"/>.</returns>
100100
public static ParallelExecutionSettings FromConfiguration(Configuration configuration)
101-
{
102-
return new ParallelExecutionSettings(configuration.MaxDegreeOfParallelism, configuration.MemoryAllocator);
103-
}
101+
=> new(configuration.MaxDegreeOfParallelism, configuration.MemoryAllocator);
104102
}

src/ImageSharp/Advanced/ParallelRowIterator.cs

Lines changed: 0 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ public static void IterateRows<T>(
4444
where T : struct, IRowOperation
4545
{
4646
ValidateRectangle(rectangle);
47-
ValidateSettings(parallelSettings);
4847

4948
int top = rectangle.Top;
5049
int bottom = rectangle.Bottom;
@@ -109,7 +108,6 @@ public static void IterateRows<T, TBuffer>(
109108
where TBuffer : unmanaged
110109
{
111110
ValidateRectangle(rectangle);
112-
ValidateSettings(parallelSettings);
113111

114112
int top = rectangle.Top;
115113
int bottom = rectangle.Bottom;
@@ -174,7 +172,6 @@ public static void IterateRowIntervals<T>(
174172
where T : struct, IRowIntervalOperation
175173
{
176174
ValidateRectangle(rectangle);
177-
ValidateSettings(parallelSettings);
178175

179176
int top = rectangle.Top;
180177
int bottom = rectangle.Bottom;
@@ -236,7 +233,6 @@ public static void IterateRowIntervals<T, TBuffer>(
236233
where TBuffer : unmanaged
237234
{
238235
ValidateRectangle(rectangle);
239-
ValidateSettings(parallelSettings);
240236

241237
int top = rectangle.Top;
242238
int bottom = rectangle.Bottom;
@@ -315,35 +311,4 @@ private static void ValidateRectangle(Rectangle rectangle)
315311
0,
316312
$"{nameof(rectangle)}.{nameof(rectangle.Height)}");
317313
}
318-
319-
/// <summary>
320-
/// Validates the supplied <see cref="ParallelExecutionSettings"/>.
321-
/// </summary>
322-
/// <param name="parallelSettings">The execution settings.</param>
323-
/// <exception cref="ArgumentOutOfRangeException">
324-
/// Thrown when <see cref="ParallelExecutionSettings.MaxDegreeOfParallelism"/> or
325-
/// <see cref="ParallelExecutionSettings.MinimumPixelsProcessedPerTask"/> is invalid.
326-
/// </exception>
327-
/// <exception cref="ArgumentNullException">
328-
/// Thrown when <see cref="ParallelExecutionSettings.MemoryAllocator"/> is null.
329-
/// This also guards the public <see cref="ParallelExecutionSettings"/> default value, which bypasses constructor validation.
330-
/// </exception>
331-
private static void ValidateSettings(in ParallelExecutionSettings parallelSettings)
332-
{
333-
// ParallelExecutionSettings is a public struct, so callers can pass default and bypass constructor validation.
334-
if (parallelSettings.MaxDegreeOfParallelism is 0 or < -1)
335-
{
336-
throw new ArgumentOutOfRangeException(
337-
$"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MaxDegreeOfParallelism)}");
338-
}
339-
340-
Guard.MustBeGreaterThan(
341-
parallelSettings.MinimumPixelsProcessedPerTask,
342-
0,
343-
$"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MinimumPixelsProcessedPerTask)}");
344-
345-
Guard.NotNull(
346-
parallelSettings.MemoryAllocator,
347-
$"{nameof(parallelSettings)}.{nameof(ParallelExecutionSettings.MemoryAllocator)}");
348-
}
349314
}

src/ImageSharp/Configuration.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,9 @@ public Configuration(params IImageFormatConfigurationModule[] configurationModul
6565
/// <summary>
6666
/// Gets or sets the maximum number of concurrent tasks enabled in ImageSharp algorithms
6767
/// configured with this <see cref="Configuration"/> instance.
68-
/// Set to <c>-1</c> to leave the degree of parallelism unbounded.
69-
/// Initialized with <see cref="Environment.ProcessorCount"/> by default.
68+
/// A positive value limits the number of concurrent operations to the set value.
69+
/// If set to <c>-1</c>, there is no limit on the number of concurrently running operations.
70+
/// Defaults to <see cref="Environment.ProcessorCount"/>.
7071
/// </summary>
7172
public int MaxDegreeOfParallelism
7273
{

src/ImageSharp/Processing/Processors/Convolution/BokehBlurProcessor{TPixel}.cs

Lines changed: 14 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System.Runtime.CompilerServices;
66
using System.Runtime.InteropServices;
77
using SixLabors.ImageSharp.Advanced;
8+
using SixLabors.ImageSharp.ColorProfiles.Companding;
89
using SixLabors.ImageSharp.Memory;
910
using SixLabors.ImageSharp.PixelFormats;
1011
using SixLabors.ImageSharp.Processing.Processors.Convolution.Parameters;
@@ -112,7 +113,7 @@ protected override void OnFrameApply(ImageFrame<TPixel> source)
112113
}
113114
else
114115
{
115-
ApplyInverseGammaExposureRowOperation operation = new(sourceRectangle, source.PixelBuffer, processingBuffer, this.Configuration, 1 / this.gamma);
116+
ApplyInverseGammaExposureRowOperation operation = new(sourceRectangle, source.PixelBuffer, processingBuffer, this.Configuration, this.gamma);
116117
ParallelRowIterator.IterateRows(
117118
this.Configuration,
118119
sourceRectangle,
@@ -305,15 +306,9 @@ public void Invoke(int y, Span<Vector4> span)
305306
{
306307
Span<TPixel> targetRowSpan = this.targetPixels.DangerousGetRowSpan(y)[this.bounds.X..];
307308
PixelOperations<TPixel>.Instance.ToVector4(this.configuration, targetRowSpan[..span.Length], span, PixelConversionModifiers.Premultiply);
308-
ref Vector4 baseRef = ref MemoryMarshal.GetReference(span);
309309

310-
for (int x = 0; x < this.bounds.Width; x++)
311-
{
312-
ref Vector4 v = ref Unsafe.Add(ref baseRef, (uint)x);
313-
v.X = MathF.Pow(v.X, this.gamma);
314-
v.Y = MathF.Pow(v.Y, this.gamma);
315-
v.Z = MathF.Pow(v.Z, this.gamma);
316-
}
310+
// Input is premultiplied [0,1] so the LUT is safe here.
311+
GammaCompanding.Expand(span[..this.bounds.Width], this.gamma);
317312

318313
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, span, targetRowSpan);
319314
}
@@ -367,44 +362,34 @@ public void Invoke(int y, Span<Vector4> span)
367362
private readonly Buffer2D<TPixel> targetPixels;
368363
private readonly Buffer2D<Vector4> sourceValues;
369364
private readonly Configuration configuration;
370-
private readonly float inverseGamma;
365+
private readonly float gamma;
371366

372367
[MethodImpl(InliningOptions.ShortMethod)]
373368
public ApplyInverseGammaExposureRowOperation(
374369
Rectangle bounds,
375370
Buffer2D<TPixel> targetPixels,
376371
Buffer2D<Vector4> sourceValues,
377372
Configuration configuration,
378-
float inverseGamma)
373+
float gamma)
379374
{
380375
this.bounds = bounds;
381376
this.targetPixels = targetPixels;
382377
this.sourceValues = sourceValues;
383378
this.configuration = configuration;
384-
this.inverseGamma = inverseGamma;
379+
this.gamma = gamma;
385380
}
386381

387382
/// <inheritdoc/>
388383
[MethodImpl(InliningOptions.ShortMethod)]
389384
public void Invoke(int y)
390385
{
391-
Vector4 low = Vector4.Zero;
392-
Vector4 high = new(float.PositiveInfinity, float.PositiveInfinity, float.PositiveInfinity, float.PositiveInfinity);
393-
394386
Span<TPixel> targetPixelSpan = this.targetPixels.DangerousGetRowSpan(y)[this.bounds.X..];
395-
Span<Vector4> sourceRowSpan = this.sourceValues.DangerousGetRowSpan(y)[this.bounds.X..];
396-
ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceRowSpan);
387+
Span<Vector4> sourceRowSpan = this.sourceValues.DangerousGetRowSpan(y).Slice(this.bounds.X, this.bounds.Width);
397388

398-
for (int x = 0; x < this.bounds.Width; x++)
399-
{
400-
ref Vector4 v = ref Unsafe.Add(ref sourceRef, (uint)x);
401-
Vector4 clamp = Numerics.Clamp(v, low, high);
402-
v.X = MathF.Pow(clamp.X, this.inverseGamma);
403-
v.Y = MathF.Pow(clamp.Y, this.inverseGamma);
404-
v.Z = MathF.Pow(clamp.Z, this.inverseGamma);
405-
}
389+
Numerics.Clamp(MemoryMarshal.Cast<Vector4, float>(sourceRowSpan), 0, 1F);
390+
GammaCompanding.Compress(sourceRowSpan, this.gamma);
406391

407-
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, sourceRowSpan[..this.bounds.Width], targetPixelSpan, PixelConversionModifiers.Premultiply);
392+
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, sourceRowSpan, targetPixelSpan, PixelConversionModifiers.Premultiply);
408393
}
409394
}
410395

@@ -433,17 +418,16 @@ public ApplyInverseGamma3ExposureRowOperation(
433418

434419
/// <inheritdoc/>
435420
[MethodImpl(InliningOptions.ShortMethod)]
436-
public unsafe void Invoke(int y)
421+
public void Invoke(int y)
437422
{
438423
Span<Vector4> sourceRowSpan = this.sourceValues.DangerousGetRowSpan(y).Slice(this.bounds.X, this.bounds.Width);
439-
ref Vector4 sourceRef = ref MemoryMarshal.GetReference(sourceRowSpan);
440424

441-
Numerics.Clamp(MemoryMarshal.Cast<Vector4, float>(sourceRowSpan), 0, float.PositiveInfinity);
425+
Numerics.Clamp(MemoryMarshal.Cast<Vector4, float>(sourceRowSpan), 0, 1F);
442426
Numerics.CubeRootOnXYZ(sourceRowSpan);
443427

444428
Span<TPixel> targetPixelSpan = this.targetPixels.DangerousGetRowSpan(y)[this.bounds.X..];
445429

446-
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, sourceRowSpan[..this.bounds.Width], targetPixelSpan, PixelConversionModifiers.Premultiply);
430+
PixelOperations<TPixel>.Instance.FromVector4Destructive(this.configuration, sourceRowSpan, targetPixelSpan, PixelConversionModifiers.Premultiply);
447431
}
448432
}
449433
}

src/ImageSharp/Processing/Processors/Convolution/Convolution2DProcessor{TPixel}.cs

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
// Copyright (c) Six Labors.
22
// Licensed under the Six Labors Split License.
33

4+
using System.Buffers;
45
using System.Numerics;
5-
using SixLabors.ImageSharp.Advanced;
66
using SixLabors.ImageSharp.Memory;
77
using SixLabors.ImageSharp.PixelFormats;
88

@@ -79,10 +79,16 @@ protected override void OnFrameApply(ImageFrame<TPixel> source)
7979
this.Configuration,
8080
this.PreserveAlpha);
8181

82-
ParallelRowIterator.IterateRows<Convolution2DRowOperation<TPixel>, Vector4>(
83-
this.Configuration,
84-
interest,
85-
in operation);
82+
// Convolution is memory-bandwidth-bound with low arithmetic intensity.
83+
// Parallelization degrades performance due to cache line contention from
84+
// overlapping source row reads. See #3111.
85+
using IMemoryOwner<Vector4> buffer = allocator.Allocate<Vector4>(operation.GetRequiredBufferLength(interest));
86+
Span<Vector4> span = buffer.Memory.Span;
87+
88+
for (int y = interest.Top; y < interest.Bottom; y++)
89+
{
90+
operation.Invoke(y, span);
91+
}
8692
}
8793

8894
Buffer2D<TPixel>.SwapOrCopyContent(source.PixelBuffer, targetPixels);

src/ImageSharp/Processing/Processors/Transforms/CropProcessor{TPixel}.cs

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -59,16 +59,14 @@ protected override void OnFrameApply(ImageFrame<TPixel> source, ImageFrame<TPixe
5959

6060
Rectangle bounds = this.cropRectangle;
6161

62-
// Copying is cheap, we should process more pixels per task:
63-
ParallelExecutionSettings parallelSettings =
64-
ParallelExecutionSettings.FromConfiguration(this.Configuration).MultiplyMinimumPixelsPerTask(4);
65-
62+
// Copying is too cheap to benefit from parallelization;
63+
// the overhead exceeds the work per task. See #3111.
6664
RowOperation operation = new(bounds, source.PixelBuffer, destination.PixelBuffer);
6765

68-
ParallelRowIterator.IterateRows(
69-
bounds,
70-
in parallelSettings,
71-
in operation);
66+
for (int y = bounds.Top; y < bounds.Bottom; y++)
67+
{
68+
operation.Invoke(y);
69+
}
7270
}
7371

7472
/// <summary>

tests/ImageSharp.Tests/Helpers/ParallelRowIteratorTests.cs

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -224,24 +224,6 @@ public void IterateRows_MaxDegreeOfParallelismMinusOne_ShouldVisitAllRows()
224224
Assert.Equal(Enumerable.Repeat(1, rectangle.Height), actualData);
225225
}
226226

227-
[Fact]
228-
public void IterateRowsWithTempBuffer_DefaultSettingsRequireInitialization()
229-
{
230-
ParallelExecutionSettings parallelSettings = default;
231-
Rectangle rect = new(0, 0, 10, 10);
232-
233-
void RowAction(int y, Span<Rgba32> memory)
234-
{
235-
}
236-
237-
TestRowOperation<Rgba32> operation = new(RowAction);
238-
239-
ArgumentOutOfRangeException ex = Assert.Throws<ArgumentOutOfRangeException>(
240-
() => ParallelRowIterator.IterateRows<TestRowOperation<Rgba32>, Rgba32>(rect, in parallelSettings, in operation));
241-
242-
Assert.Contains(nameof(ParallelExecutionSettings.MaxDegreeOfParallelism), ex.Message);
243-
}
244-
245227
public static TheoryData<int, int, int, int, int, int, int> IterateRows_WithEffectiveMinimumPixelsLimit_Data =
246228
new()
247229
{
@@ -367,24 +349,6 @@ void RowAction(RowInterval rows, Span<Vector4> buffer)
367349
Assert.Equal(Enumerable.Repeat(1, rectangle.Height), actualData);
368350
}
369351

370-
[Fact]
371-
public void IterateRows_DefaultSettingsRequireInitialization()
372-
{
373-
ParallelExecutionSettings parallelSettings = default;
374-
Rectangle rect = new(0, 0, 10, 10);
375-
376-
void RowAction(int y)
377-
{
378-
}
379-
380-
TestRowActionOperation operation = new(RowAction);
381-
382-
ArgumentOutOfRangeException ex = Assert.Throws<ArgumentOutOfRangeException>(
383-
() => ParallelRowIterator.IterateRows(rect, in parallelSettings, in operation));
384-
385-
Assert.Contains(nameof(ParallelExecutionSettings.MaxDegreeOfParallelism), ex.Message);
386-
}
387-
388352
public static readonly TheoryData<int, int, int, int, int, int, int> IterateRectangularBuffer_Data =
389353
new()
390354
{
Lines changed: 2 additions & 2 deletions
Loading
Lines changed: 2 additions & 2 deletions
Loading
Lines changed: 2 additions & 2 deletions
Loading

0 commit comments

Comments
 (0)