-
Notifications
You must be signed in to change notification settings - Fork 323
Dev/mdaigle/3974 nonascii tests #4008
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
3cc826e
e2dec42
9f44ae8
e10e2bc
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -224,9 +224,9 @@ private int ReadBytes(byte[] buffer, int offset, int count) | |||||||
|
|
||||||||
| // we are guaranteed that cb is < Int32.Max since we always pass in count which is of type Int32 to | ||||||||
| // our getbytes interface | ||||||||
| count -= (int)cb; | ||||||||
| offset += (int)cb; | ||||||||
| intCount += (int)cb; | ||||||||
| count -= cb; | ||||||||
| offset += cb; | ||||||||
| intCount += cb; | ||||||||
| } | ||||||||
| else | ||||||||
| { | ||||||||
|
|
@@ -387,9 +387,9 @@ public override int Read(byte[] buffer, int offset, int count) | |||||||
|
|
||||||||
| Buffer.BlockCopy(_cachedBytes[_currentArrayIndex], _currentPosition, buffer, offset, cb); | ||||||||
| _currentPosition += cb; | ||||||||
| count -= (int)cb; | ||||||||
| offset += (int)cb; | ||||||||
| intCount += (int)cb; | ||||||||
| count -= cb; | ||||||||
| offset += cb; | ||||||||
| intCount += cb; | ||||||||
| } | ||||||||
|
|
||||||||
| return intCount; | ||||||||
|
|
@@ -477,13 +477,19 @@ private long TotalLength | |||||||
|
|
||||||||
| sealed internal class SqlStreamingXml | ||||||||
| { | ||||||||
| private static readonly XmlWriterSettings s_writerSettings = new() { CloseOutput = true, ConformanceLevel = ConformanceLevel.Fragment }; | ||||||||
| private static readonly XmlWriterSettings s_writerSettings = new() { | ||||||||
| CloseOutput = true, | ||||||||
| ConformanceLevel = ConformanceLevel.Fragment, | ||||||||
| // Potentially limits XML to not supporting UTF-16 characters, but this is required to avoid writing | ||||||||
| // a byte order mark and is consistent with prior default used within StringWriter/StringBuilder. | ||||||||
| Encoding = new UTF8Encoding(false) }; | ||||||||
|
|
||||||||
| private readonly int _columnOrdinal; | ||||||||
| private SqlDataReader _reader; | ||||||||
| private XmlReader _xmlReader; | ||||||||
| private bool _canReadChunk; | ||||||||
| private XmlWriter _xmlWriter; | ||||||||
| private StringWriter _strWriter; | ||||||||
| private MemoryStream _memoryStream; | ||||||||
| private long _charsRemoved; | ||||||||
|
|
||||||||
| public SqlStreamingXml(int i, SqlDataReader reader) | ||||||||
|
|
@@ -495,11 +501,14 @@ public SqlStreamingXml(int i, SqlDataReader reader) | |||||||
| public void Close() | ||||||||
| { | ||||||||
| ((IDisposable)_xmlWriter).Dispose(); | ||||||||
| ((IDisposable)_memoryStream).Dispose(); | ||||||||
| ((IDisposable)_xmlReader).Dispose(); | ||||||||
| _reader = null; | ||||||||
| _xmlReader = null; | ||||||||
| _canReadChunk = false; | ||||||||
| _xmlWriter = null; | ||||||||
| _strWriter = null; | ||||||||
| _memoryStream = null; | ||||||||
| _charsRemoved = 0; | ||||||||
| } | ||||||||
|
|
||||||||
| public int ColumnOrdinal => _columnOrdinal; | ||||||||
|
|
@@ -508,96 +517,105 @@ public long GetChars(long dataIndex, char[] buffer, int bufferIndex, int length) | |||||||
| { | ||||||||
| if (_xmlReader == null) | ||||||||
| { | ||||||||
| SqlStream sqlStream = new(_columnOrdinal, _reader, addByteOrderMark: true, processAllRows:false, advanceReader:false); | ||||||||
| SqlStream sqlStream = new(_columnOrdinal, _reader, addByteOrderMark: true, processAllRows: false, advanceReader: false); | ||||||||
| _xmlReader = sqlStream.ToXmlReader(); | ||||||||
| _strWriter = new StringWriter((System.IFormatProvider)null); | ||||||||
| _xmlWriter = XmlWriter.Create(_strWriter, s_writerSettings); | ||||||||
| _canReadChunk = _xmlReader.CanReadValueChunk; | ||||||||
| _memoryStream = new MemoryStream(); | ||||||||
| _xmlWriter = XmlWriter.Create(_memoryStream, s_writerSettings); | ||||||||
| } | ||||||||
|
|
||||||||
| int charsToSkip = 0; | ||||||||
| int cnt = 0; | ||||||||
| long charsToSkip = 0; | ||||||||
| long cnt = 0; | ||||||||
| if (dataIndex < _charsRemoved) | ||||||||
| { | ||||||||
| throw ADP.NonSeqByteAccess(dataIndex, _charsRemoved, nameof(GetChars)); | ||||||||
| } | ||||||||
| else if (dataIndex > _charsRemoved) | ||||||||
| { | ||||||||
| charsToSkip = (int)(dataIndex - _charsRemoved); | ||||||||
| //dataIndex is zero-based, but _charsRemoved is one-based, so the difference is the number of chars to skip in the MemoryStream before we start copying data to the buffer | ||||||||
|
||||||||
| //dataIndex is zero-based, but _charsRemoved is one-based, so the difference is the number of chars to skip in the MemoryStream before we start copying data to the buffer | |
| // Both dataIndex and _charsRemoved are zero-based; _charsRemoved tracks how many chars have already been returned, | |
| // so their difference is the number of additional chars to skip in the MemoryStream before we start copying data to the buffer. |
Copilot
AI
Mar 5, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This treats UTF-8 encoded bytes in _memoryStream as if they were UTF-16 chars (1 byte == 1 char), which will corrupt non-ASCII data and makes length/charsToSkip comparisons incorrect. GetChars must operate on characters, so either keep a character store (e.g., StringBuilder/TextWriter) or decode bytes properly (e.g., read via a StreamReader with the same encoding and track positions in characters rather than bytes).
Copilot
AI
Mar 5, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This treats UTF-8 encoded bytes in _memoryStream as if they were UTF-16 chars (1 byte == 1 char), which will corrupt non-ASCII data and makes length/charsToSkip comparisons incorrect. GetChars must operate on characters, so either keep a character store (e.g., StringBuilder/TextWriter) or decode bytes properly (e.g., read via a StreamReader with the same encoding and track positions in characters rather than bytes).
Copilot
AI
Mar 5, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Allocating writeNodeBuffer on every XmlNodeType.Text element can create avoidable GC pressure when streaming larger XML. Consider storing a reusable buffer as a field (or renting from ArrayPool<char>) so repeated calls to WriteXmlElement() don't allocate a new array each time.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The comments are misleading/inaccurate: using UTF-8 does not 'limit XML to not supporting UTF-16 characters' (UTF-8 can represent the full Unicode range), and
_charsRemovedis being used as a zero-based 'already-consumed' count (it starts at 0 and is compared directly todataIndex). Please correct/remove these comments to match the actual semantics and avoid confusion for future maintainers.