Skip to content

Commit 19ff978

Browse files
authored
Add internal Encoding.TryGetBytes (#84609)
1 parent aa27a07 commit 19ff978

File tree

7 files changed

+118
-15
lines changed

7 files changed

+118
-15
lines changed

src/libraries/System.Private.CoreLib/src/System/Text/ASCIIEncoding.cs

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -322,8 +322,31 @@ public override unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
322322
}
323323
}
324324

325+
// TODO https://github.com/dotnet/runtime/issues/84425: Make this public.
326+
/// <summary>Encodes into a span of bytes a set of characters from the specified read-only span if the destination is large enough.</summary>
327+
/// <param name="chars">The span containing the set of characters to encode.</param>
328+
/// <param name="bytes">The byte span to hold the encoded bytes.</param>
329+
/// <param name="bytesWritten">Upon successful completion of the operation, the number of bytes encoded into <paramref name="bytes"/>.</param>
330+
/// <returns><see langword="true"/> if all of the characters were encoded into the destination; <see langword="false"/> if the destination was too small to contain all the encoded bytes.</returns>
331+
internal override unsafe bool TryGetBytes(ReadOnlySpan<char> chars, Span<byte> bytes, out int bytesWritten)
332+
{
333+
fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
334+
fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
335+
{
336+
int written = GetBytesCommon(charsPtr, chars.Length, bytesPtr, bytes.Length, throwForDestinationOverflow: false);
337+
if (written >= 0)
338+
{
339+
bytesWritten = written;
340+
return true;
341+
}
342+
343+
bytesWritten = 0;
344+
return false;
345+
}
346+
}
347+
325348
[MethodImpl(MethodImplOptions.AggressiveInlining)]
326-
private unsafe int GetBytesCommon(char* pChars, int charCount, byte* pBytes, int byteCount)
349+
private unsafe int GetBytesCommon(char* pChars, int charCount, byte* pBytes, int byteCount, bool throwForDestinationOverflow = true)
327350
{
328351
// Common helper method for all non-EncoderNLS entry points to GetBytes.
329352
// A modification of this method should be copied in to each of the supported encodings: ASCII, UTF8, UTF16, UTF32.
@@ -347,7 +370,7 @@ private unsafe int GetBytesCommon(char* pChars, int charCount, byte* pBytes, int
347370
{
348371
// Simple narrowing conversion couldn't operate on entire buffer - invoke fallback.
349372

350-
return GetBytesWithFallback(pChars, charCount, pBytes, byteCount, charsConsumed, bytesWritten);
373+
return GetBytesWithFallback(pChars, charCount, pBytes, byteCount, charsConsumed, bytesWritten, throwForDestinationOverflow);
351374
}
352375
}
353376

@@ -360,7 +383,7 @@ private protected sealed override unsafe int GetBytesFast(char* pChars, int char
360383
return bytesWritten;
361384
}
362385

363-
private protected sealed override unsafe int GetBytesWithFallback(ReadOnlySpan<char> chars, int originalCharsLength, Span<byte> bytes, int originalBytesLength, EncoderNLS? encoder)
386+
private protected sealed override unsafe int GetBytesWithFallback(ReadOnlySpan<char> chars, int originalCharsLength, Span<byte> bytes, int originalBytesLength, EncoderNLS? encoder, bool throwForDestinationOverflow = true)
364387
{
365388
// We special-case EncoderReplacementFallback if it's telling us to write a single ASCII char,
366389
// since we believe this to be relatively common and we can handle it more efficiently than
@@ -406,7 +429,7 @@ private protected sealed override unsafe int GetBytesWithFallback(ReadOnlySpan<c
406429
}
407430
else
408431
{
409-
return base.GetBytesWithFallback(chars, originalCharsLength, bytes, originalBytesLength, encoder);
432+
return base.GetBytesWithFallback(chars, originalCharsLength, bytes, originalBytesLength, encoder, throwForDestinationOverflow);
410433
}
411434
}
412435

src/libraries/System.Private.CoreLib/src/System/Text/Encoding.Internal.cs

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ private protected virtual unsafe int GetBytesFast(char* pChars, int charsLength,
485485
/// If the destination buffer is not large enough to hold the entirety of the transcoded data.
486486
/// </exception>
487487
[MethodImpl(MethodImplOptions.NoInlining)]
488-
private protected unsafe int GetBytesWithFallback(char* pOriginalChars, int originalCharCount, byte* pOriginalBytes, int originalByteCount, int charsConsumedSoFar, int bytesWrittenSoFar)
488+
private protected unsafe int GetBytesWithFallback(char* pOriginalChars, int originalCharCount, byte* pOriginalBytes, int originalByteCount, int charsConsumedSoFar, int bytesWrittenSoFar, bool throwForDestinationOverflow = true)
489489
{
490490
// This is a stub method that's marked "no-inlining" so that it we don't stack-spill spans
491491
// into our immediate caller. Doing so increases the method prolog in what's supposed to
@@ -499,7 +499,8 @@ private protected unsafe int GetBytesWithFallback(char* pOriginalChars, int orig
499499
originalCharsLength: originalCharCount,
500500
bytes: new Span<byte>(pOriginalBytes, originalByteCount).Slice(bytesWrittenSoFar),
501501
originalBytesLength: originalByteCount,
502-
encoder: null);
502+
encoder: null,
503+
throwForDestinationOverflow);
503504
}
504505

505506
/// <summary>
@@ -508,7 +509,7 @@ private protected unsafe int GetBytesWithFallback(char* pOriginalChars, int orig
508509
/// and <paramref name="bytesWrittenSoFar"/> signal where in the provided buffers the fallback loop
509510
/// should begin operating. The behavior of this method is to drain any leftover data in the
510511
/// <see cref="EncoderNLS"/> instance, then to invoke the <see cref="GetBytesFast"/> virtual method
511-
/// after data has been drained, then to call <see cref="GetBytesWithFallback(ReadOnlySpan{char}, int, Span{byte}, int, EncoderNLS)"/>.
512+
/// after data has been drained, then to call <see cref="GetBytesWithFallback(ReadOnlySpan{char}, int, Span{byte}, int, EncoderNLS, bool)"/>.
512513
/// </summary>
513514
/// <returns>
514515
/// The total number of bytes written to <paramref name="pOriginalBytes"/>, including <paramref name="bytesWrittenSoFar"/>.
@@ -582,7 +583,7 @@ private unsafe int GetBytesWithFallback(char* pOriginalChars, int originalCharCo
582583
/// implementation, deferring to the base implementation if needed. This method calls <see cref="ThrowBytesOverflow"/>
583584
/// if necessary.
584585
/// </remarks>
585-
private protected virtual unsafe int GetBytesWithFallback(ReadOnlySpan<char> chars, int originalCharsLength, Span<byte> bytes, int originalBytesLength, EncoderNLS? encoder)
586+
private protected virtual unsafe int GetBytesWithFallback(ReadOnlySpan<char> chars, int originalCharsLength, Span<byte> bytes, int originalBytesLength, EncoderNLS? encoder, bool throwForDestinationOverflow = true)
586587
{
587588
Debug.Assert(!chars.IsEmpty, "Caller shouldn't invoke this method with an empty input buffer.");
588589
Debug.Assert(originalCharsLength >= 0, "Caller provided invalid parameter.");
@@ -678,8 +679,15 @@ private protected virtual unsafe int GetBytesWithFallback(ReadOnlySpan<char> cha
678679
// The line below will also throw if the encoder couldn't make any progress at all
679680
// because the output buffer wasn't large enough to contain the result of even
680681
// a single scalar conversion or fallback.
681-
682-
ThrowBytesOverflow(encoder, nothingEncoded: bytes.Length == originalBytesLength);
682+
if (throwForDestinationOverflow)
683+
{
684+
ThrowBytesOverflow(encoder, nothingEncoded: bytes.Length == originalBytesLength);
685+
}
686+
else
687+
{
688+
Debug.Assert(encoder is null);
689+
return -1;
690+
}
683691
}
684692

685693
// If an EncoderNLS instance is active, update its "total consumed character count" value.

src/libraries/System.Private.CoreLib/src/System/Text/Encoding.cs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,25 @@ public virtual unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
733733
}
734734
}
735735

736+
// TODO https://github.com/dotnet/runtime/issues/84425: Make this public.
737+
/// <summary>Encodes into a span of bytes a set of characters from the specified read-only span if the destination is large enough.</summary>
738+
/// <param name="chars">The span containing the set of characters to encode.</param>
739+
/// <param name="bytes">The byte span to hold the encoded bytes.</param>
740+
/// <param name="bytesWritten">Upon successful completion of the operation, the number of bytes encoded into <paramref name="bytes"/>.</param>
741+
/// <returns><see langword="true"/> if all of the characters were encoded into the destination; <see langword="false"/> if the destination was too small to contain all the encoded bytes.</returns>
742+
internal virtual bool TryGetBytes(ReadOnlySpan<char> chars, Span<byte> bytes, out int bytesWritten)
743+
{
744+
int required = GetByteCount(chars);
745+
if (required <= bytes.Length)
746+
{
747+
bytesWritten = GetBytes(chars, bytes);
748+
return true;
749+
}
750+
751+
bytesWritten = 0;
752+
return false;
753+
}
754+
736755
// Returns the number of characters produced by decoding the given byte
737756
// array.
738757
//

src/libraries/System.Private.CoreLib/src/System/Text/Latin1Encoding.cs

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,29 @@ public override unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
234234
}
235235
}
236236

237+
// TODO https://github.com/dotnet/runtime/issues/84425: Make this public.
238+
/// <summary>Encodes into a span of bytes a set of characters from the specified read-only span if the destination is large enough.</summary>
239+
/// <param name="chars">The span containing the set of characters to encode.</param>
240+
/// <param name="bytes">The byte span to hold the encoded bytes.</param>
241+
/// <param name="bytesWritten">Upon successful completion of the operation, the number of bytes encoded into <paramref name="bytes"/>.</param>
242+
/// <returns><see langword="true"/> if all of the characters were encoded into the destination; <see langword="false"/> if the destination was too small to contain all the encoded bytes.</returns>
243+
internal override unsafe bool TryGetBytes(ReadOnlySpan<char> chars, Span<byte> bytes, out int bytesWritten)
244+
{
245+
fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
246+
fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
247+
{
248+
int written = GetBytesCommon(charsPtr, chars.Length, bytesPtr, bytes.Length, throwForDestinationOverflow: false);
249+
if (written >= 0)
250+
{
251+
bytesWritten = written;
252+
return true;
253+
}
254+
255+
bytesWritten = 0;
256+
return false;
257+
}
258+
}
259+
237260
public override unsafe int GetBytes(string s, int charIndex, int charCount, byte[] bytes, int byteIndex)
238261
{
239262
if (s is null || bytes is null)
@@ -269,7 +292,7 @@ public override unsafe int GetBytes(string s, int charIndex, int charCount, byte
269292

270293

271294
[MethodImpl(MethodImplOptions.AggressiveInlining)]
272-
private unsafe int GetBytesCommon(char* pChars, int charCount, byte* pBytes, int byteCount)
295+
private unsafe int GetBytesCommon(char* pChars, int charCount, byte* pBytes, int byteCount, bool throwForDestinationOverflow = true)
273296
{
274297
// Common helper method for all non-EncoderNLS entry points to GetBytes.
275298
// A modification of this method should be copied in to each of the supported encodings: ASCII, UTF8, UTF16, UTF32.
@@ -293,7 +316,7 @@ private unsafe int GetBytesCommon(char* pChars, int charCount, byte* pBytes, int
293316
{
294317
// Simple narrowing conversion couldn't operate on entire buffer - invoke fallback.
295318

296-
return GetBytesWithFallback(pChars, charCount, pBytes, byteCount, charsConsumed, bytesWritten);
319+
return GetBytesWithFallback(pChars, charCount, pBytes, byteCount, charsConsumed, bytesWritten, throwForDestinationOverflow);
297320
}
298321
}
299322

src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.Sealed.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,13 @@ private unsafe string GetStringForSmallInput(byte[] bytes)
146146

147147
return new string(new ReadOnlySpan<char>(ref *pDestination, charsWritten)); // this overload of ROS ctor doesn't validate length
148148
}
149+
150+
// TODO https://github.com/dotnet/runtime/issues/84425: Make this public.
151+
// TODO: Make this [Intrinsic] and handle JIT-time UTF8 encoding of literal `chars`.
152+
internal override unsafe bool TryGetBytes(ReadOnlySpan<char> chars, Span<byte> bytes, out int bytesWritten)
153+
{
154+
return base.TryGetBytes(chars, bytes, out bytesWritten);
155+
}
149156
}
150157
}
151158
}

src/libraries/System.Private.CoreLib/src/System/Text/UTF8Encoding.cs

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -369,8 +369,31 @@ public override unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
369369
}
370370
}
371371

372+
// TODO https://github.com/dotnet/runtime/issues/84425: Make this public.
373+
/// <summary>Encodes into a span of bytes a set of characters from the specified read-only span if the destination is large enough.</summary>
374+
/// <param name="chars">The span containing the set of characters to encode.</param>
375+
/// <param name="bytes">The byte span to hold the encoded bytes.</param>
376+
/// <param name="bytesWritten">Upon successful completion of the operation, the number of bytes encoded into <paramref name="bytes"/>.</param>
377+
/// <returns><see langword="true"/> if all of the characters were encoded into the destination; <see langword="false"/> if the destination was too small to contain all the encoded bytes.</returns>
378+
internal override unsafe bool TryGetBytes(ReadOnlySpan<char> chars, Span<byte> bytes, out int bytesWritten)
379+
{
380+
fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
381+
fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
382+
{
383+
int written = GetBytesCommon(charsPtr, chars.Length, bytesPtr, bytes.Length, throwForDestinationOverflow: false);
384+
if (written >= 0)
385+
{
386+
bytesWritten = written;
387+
return true;
388+
}
389+
390+
bytesWritten = 0;
391+
return false;
392+
}
393+
}
394+
372395
[MethodImpl(MethodImplOptions.AggressiveInlining)]
373-
private unsafe int GetBytesCommon(char* pChars, int charCount, byte* pBytes, int byteCount)
396+
private unsafe int GetBytesCommon(char* pChars, int charCount, byte* pBytes, int byteCount, bool throwForDestinationOverflow = true)
374397
{
375398
// Common helper method for all non-EncoderNLS entry points to GetBytes.
376399
// A modification of this method should be copied in to each of the supported encodings: ASCII, UTF8, UTF16, UTF32.
@@ -394,7 +417,7 @@ private unsafe int GetBytesCommon(char* pChars, int charCount, byte* pBytes, int
394417
{
395418
// Simple narrowing conversion couldn't operate on entire buffer - invoke fallback.
396419

397-
return GetBytesWithFallback(pChars, charCount, pBytes, byteCount, charsConsumed, bytesWritten);
420+
return GetBytesWithFallback(pChars, charCount, pBytes, byteCount, charsConsumed, bytesWritten, throwForDestinationOverflow);
398421
}
399422
}
400423

src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ public bool AppendFormatted<T>(T value, int alignment, string? format)
441441
/// <param name="value">The span to write.</param>
442442
public bool AppendFormatted(scoped ReadOnlySpan<char> value)
443443
{
444-
if (FromUtf16(value, _destination.Slice(_pos), out _, out int bytesWritten) == OperationStatus.Done)
444+
if (Encoding.UTF8.TryGetBytes(value, _destination.Slice(_pos), out int bytesWritten))
445445
{
446446
_pos += bytesWritten;
447447
return true;

0 commit comments

Comments
 (0)