Skip to content

Commit ce06592

Browse files
authored
Use IndexOfAnyValues in Xml (#78664)
* Use IndexOfAnyValues in Xml * Avoid checking whitespace char twice * More spans
1 parent 2b87d85 commit ce06592

File tree

6 files changed

+98
-103
lines changed

6 files changed

+98
-103
lines changed

src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBaseReader.cs

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1514,20 +1514,14 @@ private int ReadBytes(Encoding encoding, int byteBlock, int charBlock, byte[] bu
15141514
catch (FormatException exception)
15151515
{
15161516
// Something was wrong with the format, see if we can strip the spaces
1517-
int i = 0;
1518-
int j = 0;
1519-
while (true)
1517+
int newCount = XmlConverter.StripWhitespace(chars.AsSpan(0, charCount));
1518+
if (newCount == charCount)
15201519
{
1521-
while (j < charCount && XmlConverter.IsWhitespace(chars[j]))
1522-
j++;
1523-
if (j == charCount)
1524-
break;
1525-
chars[i++] = chars[j++];
1526-
}
1527-
// No spaces, so don't try again
1528-
if (i == charCount)
1520+
// No spaces, so don't try again
15291521
throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new XmlException(exception.Message, exception.InnerException));
1530-
charCount = i;
1522+
}
1523+
1524+
charCount = newCount;
15311525
}
15321526
}
15331527
}

src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBaseWriter.cs

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -750,27 +750,20 @@ protected void StartContent(char[] chars, int offset, int count)
750750

751751
private static void VerifyWhitespace(char ch)
752752
{
753-
if (!IsWhitespace(ch))
754-
throw System.Runtime.Serialization.DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
753+
if (!XmlConverter.IsWhitespace(ch))
754+
throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
755755
}
756756

757757
private static void VerifyWhitespace(string s)
758758
{
759-
for (int i = 0; i < s.Length; i++)
760-
if (!IsWhitespace(s[i]))
761-
throw System.Runtime.Serialization.DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
759+
if (!XmlConverter.IsWhitespace(s))
760+
throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
762761
}
763762

764763
private static void VerifyWhitespace(char[] chars, int offset, int count)
765764
{
766-
for (int i = 0; i < count; i++)
767-
if (!IsWhitespace(chars[offset + i]))
768-
throw System.Runtime.Serialization.DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
769-
}
770-
771-
private static bool IsWhitespace(char ch)
772-
{
773-
return (ch == ' ' || ch == '\n' || ch == '\r' || ch == 't');
765+
if (!XmlConverter.IsWhitespace(chars.AsSpan(offset, count)))
766+
throw DiagnosticUtility.ExceptionUtility.ThrowHelperError(new InvalidOperationException(SR.XmlIllegalOutsideRoot));
774767
}
775768

776769
protected static void EndContent()

src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlBufferReader.cs

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -768,23 +768,12 @@ public int GetCharEntity(int offset, int length)
768768
public bool IsWhitespaceKey(int key)
769769
{
770770
string s = GetDictionaryString(key).Value;
771-
for (int i = 0; i < s.Length; i++)
772-
{
773-
if (!XmlConverter.IsWhitespace(s[i]))
774-
return false;
775-
}
776-
return true;
771+
return XmlConverter.IsWhitespace(s);
777772
}
778773

779774
public bool IsWhitespaceUTF8(int offset, int length)
780775
{
781-
byte[] buffer = _buffer;
782-
for (int i = 0; i < length; i++)
783-
{
784-
if (!XmlConverter.IsWhitespace((char)buffer[offset + i]))
785-
return false;
786-
}
787-
return true;
776+
return XmlConverter.IsWhitespace(_buffer.AsSpan(offset, length));
788777
}
789778

790779
public bool IsWhitespaceUnicode(int offset, int length)

src/libraries/System.Private.DataContractSerialization/src/System/Xml/XmlConverter.cs

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
using System.Runtime.Serialization;
1515
using System.Collections.Generic;
1616
using System.Collections.ObjectModel;
17-
17+
using System.Buffers;
1818

1919
namespace System.Xml
2020
{
@@ -30,6 +30,10 @@ internal static class XmlConverter
3030
public const int MaxUInt64Chars = 32;
3131
public const int MaxPrimitiveChars = MaxDateTimeChars;
3232

33+
// Matches IsWhitespace below
34+
private static readonly IndexOfAnyValues<char> s_whitespaceChars = IndexOfAnyValues.Create(" \t\r\n");
35+
private static readonly IndexOfAnyValues<byte> s_whitespaceBytes = IndexOfAnyValues.Create(" \t\r\n"u8);
36+
3337
public static bool ToBoolean(string value)
3438
{
3539
try
@@ -1082,45 +1086,62 @@ public static int ToChars(DateTime value, byte[] chars, int offset)
10821086
return offset - offsetMin;
10831087
}
10841088

1085-
public static bool IsWhitespace(string s)
1089+
public static bool IsWhitespace(ReadOnlySpan<char> chars) =>
1090+
chars.IndexOfAnyExcept(s_whitespaceChars) < 0;
1091+
1092+
public static bool IsWhitespace(ReadOnlySpan<byte> bytes) =>
1093+
bytes.IndexOfAnyExcept(s_whitespaceBytes) < 0;
1094+
1095+
public static bool IsWhitespace(char ch) =>
1096+
ch is <= ' ' and (' ' or '\t' or '\r' or '\n');
1097+
1098+
public static int StripWhitespace(Span<char> chars)
10861099
{
1087-
for (int i = 0; i < s.Length; i++)
1100+
int count = chars.IndexOfAny(s_whitespaceChars);
1101+
if (count < 0)
10881102
{
1089-
if (!IsWhitespace(s[i]))
1090-
return false;
1103+
return chars.Length;
10911104
}
1092-
return true;
1093-
}
10941105

1095-
public static bool IsWhitespace(char ch)
1096-
{
1097-
return (ch <= ' ' && (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'));
1106+
foreach (char c in chars.Slice(count + 1))
1107+
{
1108+
if (!IsWhitespace(c))
1109+
{
1110+
chars[count++] = c;
1111+
}
1112+
}
1113+
1114+
return count;
10981115
}
10991116

11001117
public static string StripWhitespace(string s)
11011118
{
1102-
int count = s.Length;
1103-
for (int i = 0; i < s.Length; i++)
1119+
int indexOfWhitespace = s.AsSpan().IndexOfAny(s_whitespaceChars);
1120+
if (indexOfWhitespace < 0)
11041121
{
1105-
if (IsWhitespace(s[i]))
1122+
return s;
1123+
}
1124+
1125+
int count = s.Length - 1;
1126+
foreach (char c in s.AsSpan(indexOfWhitespace + 1))
1127+
{
1128+
if (IsWhitespace(c))
11061129
{
11071130
count--;
11081131
}
11091132
}
1110-
if (count == s.Length)
1111-
return s;
11121133

1113-
return string.Create(count, s, (chars, s) =>
1134+
return string.Create(count, s, static (chars, s) =>
11141135
{
11151136
int count = 0;
1116-
for (int i = 0; i < s.Length; i++)
1137+
foreach (char c in s)
11171138
{
1118-
char ch = s[i];
1119-
if (!IsWhitespace(ch))
1139+
if (!IsWhitespace(c))
11201140
{
1121-
chars[count++] = ch;
1141+
chars[count++] = c;
11221142
}
11231143
}
1144+
Debug.Assert(count == chars.Length);
11241145
});
11251146
}
11261147
}

src/libraries/System.Private.Xml/src/System/Xml/XmlCharType.cs

Lines changed: 41 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4+
using System.Buffers;
45
using System.Diagnostics;
56
using System.Runtime.CompilerServices;
67
using System.Runtime.InteropServices;
@@ -13,6 +14,19 @@ namespace System.Xml
1314
/// </summary>
1415
internal static class XmlCharType
1516
{
17+
#if DEBUG
18+
static XmlCharType()
19+
{
20+
for (int i = 0; i < 128; i++)
21+
{
22+
char c = (char)i;
23+
Debug.Assert(PublicIdChars.Contains(c) == IsPubidChar(c));
24+
Debug.Assert(AsciiCharDataChars.Contains(c) == IsCharData(c));
25+
Debug.Assert(WhiteSpaceChars.Contains(c) == IsWhiteSpace(c));
26+
}
27+
}
28+
#endif
29+
1630
// Surrogate constants
1731
internal const int SurHighStart = 0xd800; // 1101 10xx
1832
internal const int SurHighEnd = 0xdbff;
@@ -39,6 +53,13 @@ internal static class XmlCharType
3953
// bitmap for public ID characters - 1 bit per character 0x0 - 0x80; no character > 0x80 is a PUBLIC ID char
4054
private const string PublicIdBitmap = "\u2400\u0000\uffbb\uafff\uffff\u87ff\ufffe\u07ff";
4155

56+
private const string PublicIdChars = "\n\r !#$%'()*+,-./0123456789:;=?@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz";
57+
private const string AsciiCharDataChars = "\t\n\r !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
58+
private const string WhiteSpaceChars = "\t\n\r ";
59+
60+
private static readonly IndexOfAnyValues<char> s_publicIdChars = IndexOfAnyValues.Create(PublicIdChars);
61+
private static readonly IndexOfAnyValues<char> s_asciiCharDataChars = IndexOfAnyValues.Create(AsciiCharDataChars);
62+
private static readonly IndexOfAnyValues<char> s_whitespaceChars = IndexOfAnyValues.Create(WhiteSpaceChars);
4263

4364
[MethodImpl(MethodImplOptions.AggressiveInlining)]
4465
public static bool IsWhiteSpace(char ch) => (GetCharProperties(ch) & Whitespace) != 0u;
@@ -109,46 +130,36 @@ internal static void SplitSurrogateChar(int combinedChar, out char lowChar, out
109130
highChar = (char)(SurHighStart + v / 1024);
110131
}
111132

112-
internal static bool IsOnlyWhitespace(string? str)
113-
{
114-
return IsOnlyWhitespaceWithPos(str) == -1;
115-
}
133+
internal static bool IsOnlyWhitespace(ReadOnlySpan<char> str) =>
134+
IsOnlyWhitespaceWithPos(str) < 0;
116135

117136
// Character checking on strings
118-
internal static int IsOnlyWhitespaceWithPos(string? str)
137+
internal static int IsOnlyWhitespaceWithPos(ReadOnlySpan<char> str) =>
138+
str.IndexOfAnyExcept(s_whitespaceChars);
139+
140+
internal static int IsOnlyCharData(ReadOnlySpan<char> str)
119141
{
120-
if (str != null)
142+
int i = str.IndexOfAnyExcept(s_asciiCharDataChars);
143+
if (i < 0)
121144
{
122-
for (int i = 0; i < str.Length; i++)
123-
{
124-
if ((GetCharProperties(str[i]) & Whitespace) == 0u)
125-
{
126-
return i;
127-
}
128-
}
145+
// Fast-path: All ASCII CharData chars
146+
return -1;
129147
}
130-
return -1;
131-
}
132148

133-
internal static int IsOnlyCharData(string str)
134-
{
135-
if (str != null)
149+
for (; (uint)i < (uint)str.Length; i++)
136150
{
137-
for (int i = 0; i < str.Length; i++)
151+
char c = str[i];
152+
if (!IsCharData(c))
138153
{
139-
if ((GetCharProperties(str[i]) & CharData) == 0u)
154+
if ((uint)(i + 1) >= (uint)str.Length || !char.IsSurrogatePair(c, str[i + 1]))
140155
{
141-
if (i + 1 >= str.Length || !(XmlCharType.IsHighSurrogate(str[i]) && XmlCharType.IsLowSurrogate(str[i + 1])))
142-
{
143-
return i;
144-
}
145-
else
146-
{
147-
i++;
148-
}
156+
return i;
149157
}
158+
159+
i++;
150160
}
151161
}
162+
152163
return -1;
153164
}
154165

@@ -161,20 +172,8 @@ internal static bool IsOnlyDigits(string str, int startPos, int len)
161172
return str.AsSpan(startPos, len).IndexOfAnyExceptInRange('0', '9') < 0;
162173
}
163174

164-
internal static int IsPublicId(string str)
165-
{
166-
if (str != null)
167-
{
168-
for (int i = 0; i < str.Length; i++)
169-
{
170-
if (!IsPubidChar(str[i]))
171-
{
172-
return i;
173-
}
174-
}
175-
}
176-
return -1;
177-
}
175+
internal static int IsPublicId(string str) =>
176+
str.AsSpan().IndexOfAnyExcept(s_publicIdChars);
178177

179178
// This method tests whether a value is in a given range with just one test; start and end should be constants
180179
private static bool InRange(int value, int start, int end)
@@ -4286,6 +4285,5 @@ private static bool InRange(int value, int start, int end)
42864285
/* FFE0 */ 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0,
42874286
/* FFF0 */ 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0xD0, 0x00, 0x00,
42884287
};
4289-
42904288
}
42914289
}

src/libraries/System.Private.Xml/src/System/Xml/XmlConvert.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -516,7 +516,7 @@ public static string VerifyPublicId(string publicId)
516516

517517
// returns the position of invalid character or -1
518518
int pos = XmlCharType.IsPublicId(publicId);
519-
if (pos != -1)
519+
if (pos >= 0)
520520
{
521521
throw CreateInvalidCharException(publicId, pos, ExceptionType.XmlException);
522522
}
@@ -572,7 +572,7 @@ public static bool IsXmlSurrogatePair(char lowChar, char highChar)
572572
return XmlCharType.IsHighSurrogate(highChar) && XmlCharType.IsLowSurrogate(lowChar);
573573
}
574574

575-
// Valid PUBLIC ID character - as defined in XML 1.0 spec (fifth edition) production [13] PublidChar
575+
// Valid PUBLIC ID character - as defined in XML 1.0 spec (fifth edition) production [13] PubidChar
576576
public static bool IsPublicIdChar(char ch)
577577
{
578578
return XmlCharType.IsPubidChar(ch);

0 commit comments

Comments
 (0)