Skip to content

Commit 1157294

Browse files
committed
Refactor Utf8Formatter for safety and clarity
Removed 'unsafe' keyword from several methods to enhance code safety. Updated return values to return 0 instead of pointer differences, improving buffer handling. Adjusted UTF-8 encoding logic, particularly for surrogate pairs and buffer checks, to maintain functionality while increasing code clarity.
1 parent 08e1ab7 commit 1157294

File tree

1 file changed

+16
-14
lines changed

1 file changed

+16
-14
lines changed

Hexa.NET.Utilities/Text/Utf8Formatter.cs

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ public static unsafe bool Is<T>(this TypedReference reference, [MaybeNullWhen(fa
4343
return false;
4444
}
4545
}
46+
4647
#endif
4748

4849
/// <summary>
@@ -1019,7 +1020,7 @@ public static unsafe int ConvertUtf16ToUtf8(char* utf16Chars, int utf16Length, b
10191020
for (int i = 0; i < utf16Length; i++)
10201021
{
10211022
if (utf8Bytes >= utf8BytesEnd)
1022-
return (int)(utf8Bytes - start);
1023+
return 0;
10231024

10241025
char utf16Char = utf16Chars[i];
10251026

@@ -1035,15 +1036,15 @@ public static unsafe int ConvertUtf16ToUtf8(char* utf16Chars, int utf16Length, b
10351036
case <= 0x7FF:
10361037
// 2-byte UTF-8
10371038
if (utf8Bytes + 1 >= utf8BytesEnd)
1038-
return (int)(utf8Bytes - start);
1039+
return 0;
10391040

10401041
*utf8Bytes = (byte)(0xC0 | (codePoint >> 6));
10411042
utf8Bytes++;
10421043
*utf8Bytes = (byte)(0x80 | (codePoint & 0x3F));
10431044
utf8Bytes++;
10441045
break;
10451046

1046-
case >= 0xD800 and <= 0xDFFF:
1047+
case >= 0xD800 and <= 0xDBFF:
10471048
if (i + 1 < utf16Length)
10481049
{
10491050
char lowSurrogate = utf16Chars[i + 1];
@@ -1054,7 +1055,7 @@ public static unsafe int ConvertUtf16ToUtf8(char* utf16Chars, int utf16Length, b
10541055

10551056
// This results in a 4-byte UTF-8 sequence
10561057
if (utf8Bytes + 3 >= utf8BytesEnd)
1057-
return (int)(utf8Bytes - start);
1058+
return 0;
10581059

10591060
*utf8Bytes = (byte)(0xF0 | (codePointSurrogate >> 18));
10601061
utf8Bytes++;
@@ -1071,12 +1072,12 @@ public static unsafe int ConvertUtf16ToUtf8(char* utf16Chars, int utf16Length, b
10711072
}
10721073
}
10731074

1074-
return (int)(utf8Bytes - start);
1075+
return 0;
10751076

10761077
default:
10771078
// 3-byte UTF-8
10781079
if (utf8Bytes + 2 >= utf8BytesEnd)
1079-
return (int)(utf8Bytes - start);
1080+
return 0;
10801081

10811082
*utf8Bytes = (byte)(0xE0 | (codePoint >> 12));
10821083
utf8Bytes++;
@@ -1520,35 +1521,36 @@ public static string GetTimeSpanPattern(string formatSpecifier = "G")
15201521
_ => throw new FormatException("Unknown format specifier")
15211522
};
15221523
}
1523-
public unsafe static int Format(TimeSpan timeSpan, Span<byte> buf)
1524+
1525+
public static unsafe int Format(TimeSpan timeSpan, Span<byte> buf)
15241526
{
15251527
fixed (byte* pBuf = buf)
15261528
return Format(timeSpan, pBuf, buf.Length, TimeSpanDefaultPattern, CultureInfo.CurrentCulture);
15271529
}
15281530

1529-
public unsafe static int Format(TimeSpan timeSpan, Span<byte> buf, string format)
1531+
public static unsafe int Format(TimeSpan timeSpan, Span<byte> buf, string format)
15301532
{
15311533
fixed (byte* pBuf = buf)
15321534
return Format(timeSpan, pBuf, buf.Length, format, CultureInfo.CurrentCulture);
15331535
}
15341536

1535-
public unsafe static int Format(TimeSpan timeSpan, Span<byte> buf, string format, CultureInfo cultureInfo)
1537+
public static unsafe int Format(TimeSpan timeSpan, Span<byte> buf, string format, CultureInfo cultureInfo)
15361538
{
15371539
fixed (byte* pBuf = buf)
15381540
return Format(timeSpan, pBuf, buf.Length, format, cultureInfo);
15391541
}
15401542

1541-
public unsafe static int Format(TimeSpan timeSpan, byte* buf, int bufSize)
1543+
public static unsafe int Format(TimeSpan timeSpan, byte* buf, int bufSize)
15421544
{
15431545
return Format(timeSpan, buf, bufSize, TimeSpanDefaultPattern, CultureInfo.CurrentCulture);
15441546
}
15451547

1546-
public unsafe static int Format(TimeSpan timeSpan, byte* buf, int bufSize, string format)
1548+
public static unsafe int Format(TimeSpan timeSpan, byte* buf, int bufSize, string format)
15471549
{
15481550
return Format(timeSpan, buf, bufSize, format, CultureInfo.CurrentCulture);
15491551
}
15501552

1551-
public unsafe static int Format(TimeSpan timeSpan, byte* buf, int bufSize, string format, CultureInfo cultureInfo)
1553+
public static unsafe int Format(TimeSpan timeSpan, byte* buf, int bufSize, string format, CultureInfo cultureInfo)
15521554
{
15531555
if (bufSize == 0)
15541556
{
@@ -1752,7 +1754,7 @@ private static unsafe int WriteTwoDigitInt(byte* buf, int bufSize, int padding,
17521754
}
17531755

17541756
[MethodImpl(MethodImplOptions.AggressiveInlining)]
1755-
private unsafe static int IndexOf(char* str, char* strEnd, char target)
1757+
private static unsafe int IndexOf(char* str, char* strEnd, char target)
17561758
{
17571759
char* start = str;
17581760
while (str != strEnd && *str != target)
@@ -1843,7 +1845,7 @@ private static unsafe bool Format(TimeSpan timeSpan, byte* buf, int* idx, int ma
18431845
}
18441846

18451847
[MethodImpl(MethodImplOptions.AggressiveInlining)]
1846-
private unsafe static int CountAhead(char** format, char* formatEnd, char target, int max)
1848+
private static unsafe int CountAhead(char** format, char* formatEnd, char target, int max)
18471849
{
18481850
int count = 0;
18491851
char* pChar = *format;

0 commit comments

Comments
 (0)