Skip to content

[WIP] Improve performance of Utf8Parser.TryParseInt32D #32843

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,27 @@ public static bool IsDigit(int i)
return (uint)(i - '0') <= ('9' - '0');
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have to force the inlining of this method? I would assume that JIT is going to do this even without it

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW I've used https://godbolt.org/z/imxPHx to see if we could use some magic bit operation to get the same effect, but it looks like no ;)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The codegen turns this into three instructions: add, cmp, jcc. I'd say that's fairly efficient. :)

public static bool TryConvertToDigit(int byteValue, out int digit)
{
digit = byteValue - '0';
return (uint)digit <= 9;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool TryGetDigitAt(ReadOnlySpan<byte> source, IntPtr index, out int digit)
{
if (source.TryGetElementAt(index, out byte tempByte) && TryConvertToDigit(tempByte, out digit))
{
return true;
}
else
{
digit = default;
return false;
}
}

//
// Enable use of ThrowHelper from TryParse() routines without introducing dozens of non-code-coveraged "value= default; bytesConsumed = 0; return false" boilerplate.
//
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Diagnostics;

namespace System.Buffers.Text
{
public static partial class Utf8Parser
Expand Down Expand Up @@ -194,144 +196,128 @@ private static bool TryParseInt16D(ReadOnlySpan<byte> source, out short value, o

private static bool TryParseInt32D(ReadOnlySpan<byte> source, out int value, out int bytesConsumed)
{
if (source.Length < 1)
if (source.IsEmpty)
goto FalseExit;

int sign = 1;
int index = 0;
int num = source[index];
if (num == '-')
{
sign = -1;
index++;
if ((uint)index >= (uint)source.Length)
goto FalseExit;
num = source[index];
}
else if (num == '+')
{
index++;
if ((uint)index >= (uint)source.Length)
goto FalseExit;
num = source[index];
}
int sign = 0;
IntPtr index = IntPtr.Zero;
Copy link
Member

@jkotas jkotas Feb 26, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
IntPtr index = IntPtr.Zero;
nint index = 0;

int answer = source[0];

int answer = 0;
TryAgain:

if (ParserHelpers.IsDigit(num))
if (ParserHelpers.TryConvertToDigit(answer, out answer))
{
if (num == '0')
if (answer == 0)
{
do
{
index++;
if ((uint)index >= (uint)source.Length)
goto Done;
num = source[index];
} while (num == '0');
if (!ParserHelpers.IsDigit(num))
goto Done;
index += 1;
if (!source.TryGetElementAt(index, out byte thisByte))
goto ReturnZero;
answer = thisByte;
} while (answer == '0');
if (!ParserHelpers.TryConvertToDigit(answer, out answer))
goto ReturnZero;
}

answer = num - '0';
index++;
index += 1;

if ((uint)index >= (uint)source.Length)
goto Done;
num = source[index];
if (!ParserHelpers.IsDigit(num))
if (!ParserHelpers.TryGetDigitAt(source, index, out int num))
goto Done;
index++;
answer = 10 * answer + num - '0';
index += 1;
answer = 10 * answer + num;

if ((uint)index >= (uint)source.Length)
goto Done;
num = source[index];
if (!ParserHelpers.IsDigit(num))
if (!ParserHelpers.TryGetDigitAt(source, index, out num))
goto Done;
index++;
answer = 10 * answer + num - '0';
index += 1;
answer = 10 * answer + num;

if ((uint)index >= (uint)source.Length)
if (!ParserHelpers.TryGetDigitAt(source, index, out num))
goto Done;
num = source[index];
if (!ParserHelpers.IsDigit(num))
goto Done;
index++;
answer = 10 * answer + num - '0';
index += 1;
answer = 10 * answer + num;

if ((uint)index >= (uint)source.Length)
goto Done;
num = source[index];
if (!ParserHelpers.IsDigit(num))
if (!ParserHelpers.TryGetDigitAt(source, index, out num))
goto Done;
index++;
answer = 10 * answer + num - '0';
index += 1;
answer = 10 * answer + num;

if ((uint)index >= (uint)source.Length)
if (!ParserHelpers.TryGetDigitAt(source, index, out num))
goto Done;
num = source[index];
if (!ParserHelpers.IsDigit(num))
goto Done;
index++;
answer = 10 * answer + num - '0';
index += 1;
answer = 10 * answer + num;

if ((uint)index >= (uint)source.Length)
goto Done;
num = source[index];
if (!ParserHelpers.IsDigit(num))
if (!ParserHelpers.TryGetDigitAt(source, index, out num))
goto Done;
index++;
answer = 10 * answer + num - '0';
index += 1;
answer = 10 * answer + num;

if ((uint)index >= (uint)source.Length)
if (!ParserHelpers.TryGetDigitAt(source, index, out num))
goto Done;
num = source[index];
if (!ParserHelpers.IsDigit(num))
goto Done;
index++;
answer = 10 * answer + num - '0';
index += 1;
answer = 10 * answer + num;

if ((uint)index >= (uint)source.Length)
if (!ParserHelpers.TryGetDigitAt(source, index, out num))
goto Done;
num = source[index];
if (!ParserHelpers.IsDigit(num))
goto Done;
index++;
answer = 10 * answer + num - '0';
index += 1;
answer = 10 * answer + num;

// Potential overflow
if ((uint)index >= (uint)source.Length)
goto Done;
num = source[index];
if (!ParserHelpers.IsDigit(num))
if (!ParserHelpers.TryGetDigitAt(source, index, out num))
goto Done;
index++;
index += 1;
if (answer > int.MaxValue / 10)
goto FalseExit; // Overflow
answer = answer * 10 + num - '0';
// if sign < 0, (-1 * sign + 1) / 2 = 1
// else, (-1 * sign + 1) / 2 = 0
if ((uint)answer > (uint)int.MaxValue + (-1 * sign + 1) / 2)
answer = answer * 10 + num;

// if sign = 0, checks answer against 0x7FFFFFFF (int.MaxValue)
// if sign = -1, checks answer against 0x80000000 (int.MinValue)
if ((uint)answer > (uint)(int.MaxValue - sign))
goto FalseExit; // Overflow

if ((uint)index >= (uint)source.Length)
goto Done;
if (!ParserHelpers.IsDigit(source[index]))

if (!ParserHelpers.TryGetDigitAt(source, index, out _))
goto Done;

// Guaranteed overflow
goto FalseExit;
}
else
{
if (answer == '-' - '0')
{
sign = -1;
}
else if (answer != '+' - '0')
{
goto FalseExit;
}

if (index != IntPtr.Zero || (uint)1 >= (uint)source.Length)
{
goto FalseExit;
}

answer = source[1];
index += 1;
goto TryAgain;
}

FalseExit:
bytesConsumed = default;
value = default;
return false;

ReturnZero:
answer = 0;

Done:
bytesConsumed = index;
value = answer * sign;
bytesConsumed = index.ToInt32Unchecked();

// If sign = 0, value = (answer ^ 0) - 0 = answer
// If sign = -1, value = (answer ^ -1) - (-1) = ~answer + 1 = -answer
Debug.Assert(sign == 0 || sign == -1);
value = (answer ^ sign) - sign;
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,27 +127,27 @@ public static bool TryParse(ReadOnlySpan<byte> source, out short value, out int
/// </exceptions>
public static bool TryParse(ReadOnlySpan<byte> source, out int value, out int bytesConsumed, char standardFormat = default)
{
switch (standardFormat)
{
case default(char):
case 'g':
case 'G':
case 'd':
case 'D':
return TryParseInt32D(source, out value, out bytesConsumed);
int standardFormatLowercase;

case 'n':
case 'N':
return TryParseInt32N(source, out value, out bytesConsumed);
if (standardFormat == default
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This helps, but it would be even better to have overload of the API that does not have the standardFormat argument at all.

|| ((standardFormatLowercase = standardFormat | 0x20) == 'g')
|| (standardFormatLowercase == 'd'))
{
return TryParseInt32D(source, out value, out bytesConsumed);
}

case 'x':
case 'X':
value = default;
return TryParseUInt32X(source, out Unsafe.As<int, uint>(ref value), out bytesConsumed);
if (standardFormatLowercase == 'n')
{
return TryParseInt32N(source, out value, out bytesConsumed);
}

default:
return ParserHelpers.TryParseThrowFormatException(out value, out bytesConsumed);
if (standardFormatLowercase != 'x')
{
ThrowHelper.ThrowFormatException_BadFormatSpecifier();
}

value = default;
return TryParseUInt32X(source, out Unsafe.As<int, uint>(ref value), out bytesConsumed);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

after moving from the previous switch to this if statement it became much harder to read. Did we get any noticeable performance gain by doing it?

}

/// <summary>
Expand Down
6 changes: 6 additions & 0 deletions src/libraries/System.Private.CoreLib/src/System/IntPtr.cs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ public unsafe int ToInt32()
#endif
}

/// <summary>
/// Returns the low 32 bits of this <see cref="IntPtr"/> instance.
/// No arithmetic overflow check is performed.
/// </summary>
internal unsafe int ToInt32Unchecked() => (int)_value;

[NonVersionable]
public unsafe long ToInt64() =>
(nint)_value;
Expand Down
20 changes: 20 additions & 0 deletions src/libraries/System.Private.CoreLib/src/System/ReadOnlySpan.cs
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,26 @@ public bool TryCopyTo(Span<T> destination)
return retVal;
}

/// <summary>
/// If <paramref name="index"/> is within the span, dereferences the element at
/// that index, outs it via <paramref name="value"/>, and returns <see langword="true"/>.
/// Otherwise returns <see langword="false"/>.
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal unsafe bool TryGetElementAt(IntPtr index, out T value)
{
if ((nuint)(void*)index >= (uint)_length)
{
value = default!;
return false;
}
else
{
value = Unsafe.Add(ref _pointer.Value, index);
return true;
}
}

/// <summary>
/// Returns true if left and right point at the same memory and have the same length. Note that
/// this does *not* check to see if the *contents* are equal.
Expand Down