Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -224,9 +224,9 @@ private int ReadBytes(byte[] buffer, int offset, int count)

// we are guaranteed that cb is < Int32.Max since we always pass in count which is of type Int32 to
// our getbytes interface
count -= (int)cb;
offset += (int)cb;
intCount += (int)cb;
count -= cb;
offset += cb;
intCount += cb;
}
else
{
Expand Down Expand Up @@ -387,9 +387,9 @@ public override int Read(byte[] buffer, int offset, int count)

Buffer.BlockCopy(_cachedBytes[_currentArrayIndex], _currentPosition, buffer, offset, cb);
_currentPosition += cb;
count -= (int)cb;
offset += (int)cb;
intCount += (int)cb;
count -= cb;
offset += cb;
intCount += cb;
}

return intCount;
Expand Down Expand Up @@ -477,13 +477,19 @@ private long TotalLength

sealed internal class SqlStreamingXml
{
private static readonly XmlWriterSettings s_writerSettings = new() { CloseOutput = true, ConformanceLevel = ConformanceLevel.Fragment };
private static readonly XmlWriterSettings s_writerSettings = new() {
CloseOutput = true,
ConformanceLevel = ConformanceLevel.Fragment,
// Potentially limits XML to not supporting UTF-16 characters, but this is required to avoid writing
// a byte order mark and is consistent with prior default used within StringWriter/StringBuilder.
Comment on lines +483 to +484
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comments are misleading/inaccurate: using UTF-8 does not 'limit XML to not supporting UTF-16 characters' (UTF-8 can represent the full Unicode range), and _charsRemoved is being used as a zero-based 'already-consumed' count (it starts at 0 and is compared directly to dataIndex). Please correct/remove these comments to match the actual semantics and avoid confusion for future maintainers.

Suggested change
// Potentially limits XML to not supporting UTF-16 characters, but this is required to avoid writing
// a byte order mark and is consistent with prior default used within StringWriter/StringBuilder.
// Use UTF-8 without BOM to avoid writing a byte order mark; this is consistent with the prior
// default used within StringWriter/StringBuilder.

Copilot uses AI. Check for mistakes.
Encoding = new UTF8Encoding(false) };

private readonly int _columnOrdinal;
private SqlDataReader _reader;
private XmlReader _xmlReader;
private bool _canReadChunk;
private XmlWriter _xmlWriter;
private StringWriter _strWriter;
private MemoryStream _memoryStream;
private long _charsRemoved;

public SqlStreamingXml(int i, SqlDataReader reader)
Expand All @@ -495,11 +501,14 @@ public SqlStreamingXml(int i, SqlDataReader reader)
public void Close()
{
((IDisposable)_xmlWriter).Dispose();
((IDisposable)_memoryStream).Dispose();
((IDisposable)_xmlReader).Dispose();
_reader = null;
_xmlReader = null;
_canReadChunk = false;
_xmlWriter = null;
_strWriter = null;
_memoryStream = null;
_charsRemoved = 0;
}

public int ColumnOrdinal => _columnOrdinal;
Expand All @@ -508,96 +517,105 @@ public long GetChars(long dataIndex, char[] buffer, int bufferIndex, int length)
{
if (_xmlReader == null)
{
SqlStream sqlStream = new(_columnOrdinal, _reader, addByteOrderMark: true, processAllRows:false, advanceReader:false);
SqlStream sqlStream = new(_columnOrdinal, _reader, addByteOrderMark: true, processAllRows: false, advanceReader: false);
_xmlReader = sqlStream.ToXmlReader();
_strWriter = new StringWriter((System.IFormatProvider)null);
_xmlWriter = XmlWriter.Create(_strWriter, s_writerSettings);
_canReadChunk = _xmlReader.CanReadValueChunk;
_memoryStream = new MemoryStream();
_xmlWriter = XmlWriter.Create(_memoryStream, s_writerSettings);
}

int charsToSkip = 0;
int cnt = 0;
long charsToSkip = 0;
long cnt = 0;
if (dataIndex < _charsRemoved)
{
throw ADP.NonSeqByteAccess(dataIndex, _charsRemoved, nameof(GetChars));
}
else if (dataIndex > _charsRemoved)
{
charsToSkip = (int)(dataIndex - _charsRemoved);
//dataIndex is zero-based, but _charsRemoved is one-based, so the difference is the number of chars to skip in the MemoryStream before we start copying data to the buffer
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comments are misleading/inaccurate: using UTF-8 does not 'limit XML to not supporting UTF-16 characters' (UTF-8 can represent the full Unicode range), and _charsRemoved is being used as a zero-based 'already-consumed' count (it starts at 0 and is compared directly to dataIndex). Please correct/remove these comments to match the actual semantics and avoid confusion for future maintainers.

Suggested change
//dataIndex is zero-based, but _charsRemoved is one-based, so the difference is the number of chars to skip in the MemoryStream before we start copying data to the buffer
// Both dataIndex and _charsRemoved are zero-based; _charsRemoved tracks how many chars have already been returned,
// so their difference is the number of additional chars to skip in the MemoryStream before we start copying data to the buffer.

Copilot uses AI. Check for mistakes.
charsToSkip = dataIndex - _charsRemoved;
}

// If buffer parameter is null, we have to return -1 since there is no way for us to know the
// total size up front without reading and converting the XML.
if (buffer == null)
{
return (long)(-1);
return -1;
}

StringBuilder strBldr = _strWriter.GetStringBuilder();
while (!_xmlReader.EOF)
long memoryStreamRemaining = _memoryStream.Length - _memoryStream.Position;
while (memoryStreamRemaining < (length + charsToSkip) && !_xmlReader.EOF)
Comment on lines +546 to +547
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This treats UTF-8 encoded bytes in _memoryStream as if they were UTF-16 chars (1 byte == 1 char), which will corrupt non-ASCII data and makes length/charsToSkip comparisons incorrect. GetChars must operate on characters, so either keep a character store (e.g., StringBuilder/TextWriter) or decode bytes properly (e.g., read via a StreamReader with the same encoding and track positions in characters rather than bytes).

Copilot uses AI. Check for mistakes.
{
if (strBldr.Length >= (length + charsToSkip))
// Check whether the MemoryStream has been fully read.
// If so, reset the MemoryStream for reuse and to avoid growing size too much.
if (_memoryStream.Length > 0 && memoryStreamRemaining == 0)
{
break;
// This also sets the Position back to 0.
_memoryStream.SetLength(0);
}
// Can't call _xmlWriter.WriteNode here, since it reads all of the data in before returning the first char.
// Do own implementation of WriteNode instead that reads just enough data to return the required number of chars
//_xmlWriter.WriteNode(_xmlReader, true);
// _xmlWriter.Flush();
WriteXmlElement();
// Update memoryStreamRemaining based on the number of bytes/chars just written to the MemoryStream
memoryStreamRemaining = _memoryStream.Length - _memoryStream.Position;
if (charsToSkip > 0)
{
// Aggressively remove the characters we want to skip to avoid growing StringBuilder size too much
cnt = strBldr.Length < charsToSkip ? strBldr.Length : charsToSkip;
strBldr.Remove(0, cnt);
cnt = memoryStreamRemaining < charsToSkip ? memoryStreamRemaining : charsToSkip;
// Move the Position forward
_memoryStream.Seek(cnt, SeekOrigin.Current);
memoryStreamRemaining -= cnt;
charsToSkip -= cnt;
_charsRemoved += (long)cnt;
_charsRemoved += cnt;
}
}

if (charsToSkip > 0)
{
cnt = strBldr.Length < charsToSkip ? strBldr.Length : charsToSkip;
strBldr.Remove(0, cnt);
cnt = memoryStreamRemaining < charsToSkip ? memoryStreamRemaining : charsToSkip;
// Move the Position forward
_memoryStream.Seek(cnt, SeekOrigin.Current);
memoryStreamRemaining -= cnt;
charsToSkip -= cnt;
_charsRemoved += (long)cnt;
_charsRemoved += cnt;
}

if (strBldr.Length == 0)
if (memoryStreamRemaining == 0)
{
return 0;
}
// At this point charsToSkip must be 0
Debug.Assert(charsToSkip == 0);

cnt = strBldr.Length < length ? strBldr.Length : length;
cnt = memoryStreamRemaining < length ? memoryStreamRemaining : length;
for (int i = 0; i < cnt; i++)
{
buffer[bufferIndex + i] = strBldr[i];
// ReadByte moves the Position forward
buffer[bufferIndex + i] = (char)_memoryStream.ReadByte();
}
Comment on lines +591 to 596
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This treats UTF-8 encoded bytes in _memoryStream as if they were UTF-16 chars (1 byte == 1 char), which will corrupt non-ASCII data and makes length/charsToSkip comparisons incorrect. GetChars must operate on characters, so either keep a character store (e.g., StringBuilder/TextWriter) or decode bytes properly (e.g., read via a StreamReader with the same encoding and track positions in characters rather than bytes).

Copilot uses AI. Check for mistakes.
// Remove the characters we have already returned
strBldr.Remove(0, cnt);
_charsRemoved += (long)cnt;
return (long)cnt;
_charsRemoved += cnt;
return cnt;
}

// This method duplicates the work of XmlWriter.WriteNode except that it reads one element at a time
// instead of reading the entire node like XmlWriter.
// Caller already ensures !_xmlReader.EOF
private void WriteXmlElement()
{
if (_xmlReader.EOF)
{
return;
}

bool canReadChunk = _xmlReader.CanReadValueChunk;
char[] writeNodeBuffer = null;

// Constants
const int WriteNodeBufferSize = 1024;

long memoryStreamPosition = _memoryStream.Position;
// Move the Position to the end of the MemoryStream since we are always appending.
_memoryStream.Seek(0, SeekOrigin.End);

_xmlReader.Read();
switch (_xmlReader.NodeType)
{
// Note: Whitespace, CDATA, EntityReference, XmlDeclaration, ProcessingInstruction, DocumentType, and Comment node types
// are not expected in the XML returned from SQL Server as it normalizes them out, but handle them just in case.
// SignificantWhitespace will occur when used with xml:space="preserve"
case XmlNodeType.Element:
_xmlWriter.WriteStartElement(_xmlReader.Prefix, _xmlReader.LocalName, _xmlReader.NamespaceURI);
_xmlWriter.WriteAttributes(_xmlReader, true);
Expand All @@ -608,12 +626,9 @@ private void WriteXmlElement()
}
break;
case XmlNodeType.Text:
if (canReadChunk)
if (_canReadChunk)
{
if (writeNodeBuffer == null)
{
writeNodeBuffer = new char[WriteNodeBufferSize];
}
char[] writeNodeBuffer = new char[WriteNodeBufferSize];
int read;
while ((read = _xmlReader.ReadValueChunk(writeNodeBuffer, 0, WriteNodeBufferSize)) > 0)
Comment on lines +629 to 633
Copy link

Copilot AI Mar 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Allocating writeNodeBuffer on every XmlNodeType.Text element can create avoidable GC pressure when streaming larger XML. Consider storing a reusable buffer as a field (or renting from ArrayPool<char>) so repeated calls to WriteXmlElement() don't allocate a new array each time.

Copilot uses AI. Check for mistakes.
{
Expand Down Expand Up @@ -650,6 +665,8 @@ private void WriteXmlElement()
break;
}
_xmlWriter.Flush();
// Reset the Position back to where it was before writing this element so that the caller can continue reading from the expected position.
_memoryStream.Position = memoryStreamPosition;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@
<Compile Include="SQL\SqlNotificationTest\SqlNotificationTest.cs" />
<Compile Include="SQL\SqlSchemaInfoTest\SqlSchemaInfoTest.cs" />
<Compile Include="SQL\SqlStatisticsTest\SqlStatisticsTest.cs" />
<Compile Include="SQL\SqlStreamingXmlTest\SqlStreamingXmlTest.cs" />
<Compile Include="SQL\TransactionTest\DistributedTransactionTest.cs" />
<Compile Include="SQL\TransactionTest\TransactionEnlistmentTest.cs" />
<Compile Include="SQL\TransactionTest\TransactionTest.cs" />
Expand Down
Loading
Loading