Skip to content

Commit dc0982f

Browse files
Faster Linear Transforms (#1591)
* Attempt to use same weight generation algorithm as resize. * tests pass * Identical output * Update LinearTransformKernelFactory{TResampler}.cs * Use new low allocation iterator * Migrate projective transforms. * Optimizations * Smaller kernel * Fix sampling accuracy * Finalize and update refs * Revert unnecessary changes * Remove enumerator * Actually save output for debugging. * Use custom test png encoder for reduced memory environments * Convolution should use scaled vectors * Update TestEnvironmentTests.cs * Try using doubles * Moar double precision * Fix radius calculation * Test if issue is SIMD related. * Detect runtime to determine pipeline. * Fix stack overflow * fix condition * Try simplified scalar run * Simplify unpremultiply scalar * Update Numerics.cs * Fix runtime environment * Update ImageSharp.csproj * Duplicate the caller with scalar versions * Update method name, exclude from coverage. * Don't save output during coverage tests for perf. * Update src/ImageSharp/Common/Helpers/RuntimeEnvironment.cs Co-authored-by: Anton Firszov <[email protected]> Co-authored-by: Anton Firszov <[email protected]>
1 parent 97dde7f commit dc0982f

File tree

32 files changed

+604
-351
lines changed

32 files changed

+604
-351
lines changed

.gitattributes

+2-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@
8686
*.dll binary
8787
*.eot binary
8888
*.exe binary
89-
*.ktx binary
9089
*.otf binary
9190
*.pbm binary
9291
*.pdf binary
@@ -125,3 +124,5 @@
125124
*.tga filter=lfs diff=lfs merge=lfs -text
126125
*.webp filter=lfs diff=lfs merge=lfs -text
127126
*.dds filter=lfs diff=lfs merge=lfs -text
127+
*.ktx filter=lfs diff=lfs merge=lfs -text
128+
*.ktx2 filter=lfs diff=lfs merge=lfs -text
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Copyright (c) Six Labors.
2+
// Licensed under the Apache License, Version 2.0.
3+
4+
using System;
5+
using System.Runtime.InteropServices;
6+
7+
namespace SixLabors.ImageSharp
8+
{
9+
/// <summary>
10+
/// Provides information about the .NET runtime installation.
11+
/// Many methods defer to <see cref="RuntimeInformation"/> when available.
12+
/// </summary>
13+
internal static class RuntimeEnvironment
14+
{
15+
private static readonly Lazy<bool> IsNetCoreLazy = new Lazy<bool>(() => FrameworkDescription.StartsWith(".NET Core", StringComparison.OrdinalIgnoreCase));
16+
17+
/// <summary>
18+
/// Gets a value indicating whether the .NET installation is .NET Core 3.1 or lower.
19+
/// </summary>
20+
public static bool IsNetCore => IsNetCoreLazy.Value;
21+
22+
/// <summary>
23+
/// Gets the name of the .NET installation on which an app is running.
24+
/// </summary>
25+
public static string FrameworkDescription => RuntimeInformation.FrameworkDescription;
26+
27+
/// <summary>
28+
/// Indicates whether the current application is running on the specified platform.
29+
/// </summary>
30+
public static bool IsOSPlatform(OSPlatform osPlatform) => RuntimeInformation.IsOSPlatform(osPlatform);
31+
}
32+
}

src/ImageSharp/Processing/Processors/Transforms/Linear/AffineTransformProcessor{TPixel}.cs

+145-65
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
// Licensed under the Apache License, Version 2.0.
33

44
using System;
5+
using System.Buffers;
6+
using System.Diagnostics.CodeAnalysis;
57
using System.Numerics;
68
using System.Runtime.CompilerServices;
79
using System.Runtime.InteropServices;
@@ -80,32 +82,14 @@ public void ApplyTransform<TResampler>(in TResampler sampler)
8082
return;
8183
}
8284

83-
int yRadius = LinearTransformUtils.GetSamplingRadius(in sampler, source.Height, destination.Height);
84-
int xRadius = LinearTransformUtils.GetSamplingRadius(in sampler, source.Width, destination.Width);
85-
var radialExtents = new Vector2(xRadius, yRadius);
86-
int yLength = (yRadius * 2) + 1;
87-
int xLength = (xRadius * 2) + 1;
88-
89-
// We use 2D buffers so that we can access the weight spans per row in parallel.
90-
using Buffer2D<float> yKernelBuffer = configuration.MemoryAllocator.Allocate2D<float>(yLength, destination.Height);
91-
using Buffer2D<float> xKernelBuffer = configuration.MemoryAllocator.Allocate2D<float>(xLength, destination.Height);
92-
93-
int maxX = source.Width - 1;
94-
int maxY = source.Height - 1;
95-
var maxSourceExtents = new Vector4(maxX, maxY, maxX, maxY);
96-
9785
var operation = new AffineOperation<TResampler>(
9886
configuration,
9987
source,
10088
destination,
101-
yKernelBuffer,
102-
xKernelBuffer,
10389
in sampler,
104-
matrix,
105-
radialExtents,
106-
maxSourceExtents);
90+
matrix);
10791

108-
ParallelRowIterator.IterateRows<AffineOperation<TResampler>, Vector4>(
92+
ParallelRowIterator.IterateRowIntervals<AffineOperation<TResampler>, Vector4>(
10993
configuration,
11094
destination.Bounds(),
11195
in operation);
@@ -117,7 +101,6 @@ public void ApplyTransform<TResampler>(in TResampler sampler)
117101
private readonly ImageFrame<TPixel> destination;
118102
private readonly Rectangle bounds;
119103
private readonly Matrix3x2 matrix;
120-
private readonly int maxX;
121104

122105
[MethodImpl(InliningOptions.ShortMethod)]
123106
public NNAffineOperation(
@@ -129,100 +112,197 @@ public NNAffineOperation(
129112
this.destination = destination;
130113
this.bounds = source.Bounds();
131114
this.matrix = matrix;
132-
this.maxX = destination.Width;
133115
}
134116

135117
[MethodImpl(InliningOptions.ShortMethod)]
136118
public void Invoke(int y)
137119
{
120+
Buffer2D<TPixel> sourceBuffer = this.source.PixelBuffer;
138121
Span<TPixel> destRow = this.destination.GetPixelRowSpan(y);
139122

140-
for (int x = 0; x < this.maxX; x++)
123+
for (int x = 0; x < destRow.Length; x++)
141124
{
142125
var point = Vector2.Transform(new Vector2(x, y), this.matrix);
143126
int px = (int)MathF.Round(point.X);
144127
int py = (int)MathF.Round(point.Y);
145128

146129
if (this.bounds.Contains(px, py))
147130
{
148-
destRow[x] = this.source[px, py];
131+
destRow[x] = sourceBuffer.GetElementUnsafe(px, py);
149132
}
150133
}
151134
}
152135
}
153136

154-
private readonly struct AffineOperation<TResampler> : IRowOperation<Vector4>
137+
private readonly struct AffineOperation<TResampler> : IRowIntervalOperation<Vector4>
155138
where TResampler : struct, IResampler
156139
{
157140
private readonly Configuration configuration;
158141
private readonly ImageFrame<TPixel> source;
159142
private readonly ImageFrame<TPixel> destination;
160-
private readonly Buffer2D<float> yKernelBuffer;
161-
private readonly Buffer2D<float> xKernelBuffer;
162143
private readonly TResampler sampler;
163144
private readonly Matrix3x2 matrix;
164-
private readonly Vector2 radialExtents;
165-
private readonly Vector4 maxSourceExtents;
166-
private readonly int maxX;
145+
private readonly float yRadius;
146+
private readonly float xRadius;
167147

168148
[MethodImpl(InliningOptions.ShortMethod)]
169149
public AffineOperation(
170150
Configuration configuration,
171151
ImageFrame<TPixel> source,
172152
ImageFrame<TPixel> destination,
173-
Buffer2D<float> yKernelBuffer,
174-
Buffer2D<float> xKernelBuffer,
175153
in TResampler sampler,
176-
Matrix3x2 matrix,
177-
Vector2 radialExtents,
178-
Vector4 maxSourceExtents)
154+
Matrix3x2 matrix)
179155
{
180156
this.configuration = configuration;
181157
this.source = source;
182158
this.destination = destination;
183-
this.yKernelBuffer = yKernelBuffer;
184-
this.xKernelBuffer = xKernelBuffer;
185159
this.sampler = sampler;
186160
this.matrix = matrix;
187-
this.radialExtents = radialExtents;
188-
this.maxSourceExtents = maxSourceExtents;
189-
this.maxX = destination.Width;
161+
162+
this.yRadius = LinearTransformUtility.GetSamplingRadius(in sampler, source.Height, destination.Height);
163+
this.xRadius = LinearTransformUtility.GetSamplingRadius(in sampler, source.Width, destination.Width);
190164
}
191165

192166
[MethodImpl(InliningOptions.ShortMethod)]
193-
public void Invoke(int y, Span<Vector4> span)
167+
public void Invoke(in RowInterval rows, Span<Vector4> span)
194168
{
195-
Buffer2D<TPixel> sourceBuffer = this.source.PixelBuffer;
169+
if (RuntimeEnvironment.IsOSPlatform(OSPlatform.OSX)
170+
&& RuntimeEnvironment.IsNetCore)
171+
{
172+
// There's something wrong with the JIT in .NET Core 3.1 on certain
173+
// MacOSX machines so we have to use different pipelines.
174+
// It's:
175+
// - Not reproducable locally
176+
// - Doesn't seem to be triggered by the bulk Numerics.UnPremultiply method but by caller.
177+
// https://github.com/SixLabors/ImageSharp/pull/1591
178+
this.InvokeMacOSX(in rows, span);
179+
return;
180+
}
196181

197-
PixelOperations<TPixel>.Instance.ToVector4(
198-
this.configuration,
199-
this.destination.GetPixelRowSpan(y),
200-
span);
182+
Matrix3x2 matrix = this.matrix;
183+
TResampler sampler = this.sampler;
184+
float yRadius = this.yRadius;
185+
float xRadius = this.xRadius;
186+
int maxY = this.source.Height - 1;
187+
int maxX = this.source.Width - 1;
201188

202-
ref float yKernelSpanRef = ref MemoryMarshal.GetReference(this.yKernelBuffer.GetRowSpan(y));
203-
ref float xKernelSpanRef = ref MemoryMarshal.GetReference(this.xKernelBuffer.GetRowSpan(y));
189+
Buffer2D<TPixel> sourceBuffer = this.source.PixelBuffer;
204190

205-
for (int x = 0; x < this.maxX; x++)
191+
for (int y = rows.Min; y < rows.Max; y++)
206192
{
207-
// Use the single precision position to calculate correct bounding pixels
208-
// otherwise we get rogue pixels outside of the bounds.
209-
var point = Vector2.Transform(new Vector2(x, y), this.matrix);
210-
LinearTransformUtils.Convolve(
211-
in this.sampler,
212-
point,
213-
sourceBuffer,
193+
Span<TPixel> rowSpan = this.destination.GetPixelRowSpan(y);
194+
PixelOperations<TPixel>.Instance.ToVector4(
195+
this.configuration,
196+
rowSpan,
197+
span,
198+
PixelConversionModifiers.Scale);
199+
200+
for (int x = 0; x < span.Length; x++)
201+
{
202+
var point = Vector2.Transform(new Vector2(x, y), matrix);
203+
float pY = point.Y;
204+
float pX = point.X;
205+
206+
int top = LinearTransformUtility.GetRangeStart(yRadius, pY, maxY);
207+
int bottom = LinearTransformUtility.GetRangeEnd(yRadius, pY, maxY);
208+
int left = LinearTransformUtility.GetRangeStart(xRadius, pX, maxX);
209+
int right = LinearTransformUtility.GetRangeEnd(xRadius, pX, maxX);
210+
211+
if (bottom == top || right == left)
212+
{
213+
continue;
214+
}
215+
216+
Vector4 sum = Vector4.Zero;
217+
for (int yK = top; yK <= bottom; yK++)
218+
{
219+
float yWeight = sampler.GetValue(yK - pY);
220+
221+
for (int xK = left; xK <= right; xK++)
222+
{
223+
float xWeight = sampler.GetValue(xK - pX);
224+
225+
Vector4 current = sourceBuffer.GetElementUnsafe(xK, yK).ToScaledVector4();
226+
Numerics.Premultiply(ref current);
227+
sum += current * xWeight * yWeight;
228+
}
229+
}
230+
231+
span[x] = sum;
232+
}
233+
234+
Numerics.UnPremultiply(span);
235+
PixelOperations<TPixel>.Instance.FromVector4Destructive(
236+
this.configuration,
214237
span,
215-
x,
216-
ref yKernelSpanRef,
217-
ref xKernelSpanRef,
218-
this.radialExtents,
219-
this.maxSourceExtents);
238+
rowSpan,
239+
PixelConversionModifiers.Scale);
220240
}
241+
}
242+
243+
[ExcludeFromCodeCoverage]
244+
[MethodImpl(InliningOptions.ShortMethod)]
245+
private void InvokeMacOSX(in RowInterval rows, Span<Vector4> span)
246+
{
247+
Matrix3x2 matrix = this.matrix;
248+
TResampler sampler = this.sampler;
249+
float yRadius = this.yRadius;
250+
float xRadius = this.xRadius;
251+
int maxY = this.source.Height - 1;
252+
int maxX = this.source.Width - 1;
221253

222-
PixelOperations<TPixel>.Instance.FromVector4Destructive(
223-
this.configuration,
224-
span,
225-
this.destination.GetPixelRowSpan(y));
254+
Buffer2D<TPixel> sourceBuffer = this.source.PixelBuffer;
255+
256+
for (int y = rows.Min; y < rows.Max; y++)
257+
{
258+
Span<TPixel> rowSpan = this.destination.GetPixelRowSpan(y);
259+
PixelOperations<TPixel>.Instance.ToVector4(
260+
this.configuration,
261+
rowSpan,
262+
span,
263+
PixelConversionModifiers.Scale);
264+
265+
for (int x = 0; x < span.Length; x++)
266+
{
267+
var point = Vector2.Transform(new Vector2(x, y), matrix);
268+
float pY = point.Y;
269+
float pX = point.X;
270+
271+
int top = LinearTransformUtility.GetRangeStart(yRadius, pY, maxY);
272+
int bottom = LinearTransformUtility.GetRangeEnd(yRadius, pY, maxY);
273+
int left = LinearTransformUtility.GetRangeStart(xRadius, pX, maxX);
274+
int right = LinearTransformUtility.GetRangeEnd(xRadius, pX, maxX);
275+
276+
if (bottom == top || right == left)
277+
{
278+
continue;
279+
}
280+
281+
Vector4 sum = Vector4.Zero;
282+
for (int yK = top; yK <= bottom; yK++)
283+
{
284+
float yWeight = sampler.GetValue(yK - pY);
285+
286+
for (int xK = left; xK <= right; xK++)
287+
{
288+
float xWeight = sampler.GetValue(xK - pX);
289+
290+
Vector4 current = sourceBuffer.GetElementUnsafe(xK, yK).ToScaledVector4();
291+
Numerics.Premultiply(ref current);
292+
sum += current * xWeight * yWeight;
293+
}
294+
}
295+
296+
Numerics.UnPremultiply(ref sum);
297+
span[x] = sum;
298+
}
299+
300+
PixelOperations<TPixel>.Instance.FromVector4Destructive(
301+
this.configuration,
302+
span,
303+
rowSpan,
304+
PixelConversionModifiers.Scale);
305+
}
226306
}
227307
}
228308
}

0 commit comments

Comments
 (0)