Skip to content

Commit f43d935

Browse files
committed
Add float32 BinaryVector serialization/deserialization with endian handling
Signed-off-by: Medha Tiwari <[email protected]>
1 parent 0ee1694 commit f43d935

File tree

2 files changed

+32
-46
lines changed

2 files changed

+32
-46
lines changed

src/MongoDB.Bson/Serialization/BinaryVectorReader.cs

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,10 @@
1414
*/
1515

1616
using System;
17-
using System.Buffers.Binary;
1817
using System.Collections.Generic;
1918
using System.Linq;
2019
using System.Runtime.InteropServices;
20+
using MongoDB.Bson.IO;
2121

2222
namespace MongoDB.Bson.Serialization
2323
{
@@ -27,6 +27,7 @@ public static BinaryVector<TItem> ReadBinaryVector<TItem>(ReadOnlyMemory<byte> v
2727
where TItem : struct
2828
{
2929
var (items, padding, vectorDataType) = ReadBinaryVectorAsArray<TItem>(vectorData);
30+
3031
return CreateBinaryVector(items, padding, vectorDataType);
3132
}
3233

@@ -41,39 +42,24 @@ public static (TItem[] Items, byte Padding, BinaryVectorDataType VectorDataType)
4142
switch (vectorDataType)
4243
{
4344
case BinaryVectorDataType.Float32:
45+
4446
if ((vectorDataBytes.Span.Length & 3) != 0)
4547
{
4648
throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes.");
4749
}
4850

49-
if (typeof(TItem) != typeof(float))
50-
{
51-
throw new NotSupportedException($"Expected float for Float32 vector type, but found {typeof(TItem)}.");
52-
}
53-
54-
int count = vectorDataBytes.Length / 4; // 4 bytes per float
55-
float[] floatArray = new float[count];
56-
57-
for (int i = 0; i < count; i++)
58-
{
59-
// Each float32 is 4 bytes. So to extract the i-th float, we slice 4 bytes from offset i * 4. Use little-endian or big-endian decoding based on platform.
60-
floatArray[i] = BitConverter.IsLittleEndian
61-
? MemoryMarshal.Read<float>(vectorDataBytes.Span.Slice(i * 4, 4)) // fast, unaligned read on little endian
62-
: BinaryPrimitives.ReadSingleBigEndian(vectorDataBytes.Span.Slice(i * 4, 4)); // correctly reassemble 4 bytes as big-endian float
63-
}
64-
51+
var floatArray = BitConverter.IsLittleEndian // We need not to use this condition here, just doing to keep the little endian logic intact
52+
? MemoryMarshal.Cast<byte, float>(vectorDataBytes.Span).ToArray()
53+
: ToFloatArrayBigEndian(vectorDataBytes.Span);
6554
items = (TItem[])(object)floatArray;
6655
break;
67-
6856
case BinaryVectorDataType.Int8:
6957
var itemsSpan = MemoryMarshal.Cast<byte, TItem>(vectorDataBytes.Span);
70-
items = itemsSpan.ToArray();
58+
items = (TItem[])(object)itemsSpan.ToArray();
7159
break;
72-
7360
case BinaryVectorDataType.PackedBit:
7461
items = (TItem[])(object)vectorDataBytes.ToArray();
7562
break;
76-
7763
default:
7864
throw new NotSupportedException($"Binary vector data type {vectorDataType} is not supported.");
7965
}
@@ -157,6 +143,15 @@ private static void ValidateItemTypeForBinaryVector<TItem, TItemExpectedType, TB
157143
throw new NotSupportedException($"Expected {typeof(TItemExpectedType)} for {typeof(TBinaryVectorType)}, but found {typeof(TItem)}.");
158144
}
159145
}
146+
private static float[] ToFloatArrayBigEndian(ReadOnlySpan<byte> span)
147+
{
148+
var count = span.Length / 4;
149+
var result = new float[count];
150+
for (int i = 0; i < count; i++)
151+
{
152+
result[i] = BinaryPrimitivesCompat.ReadSingleLittleEndian(span.Slice(i * 4, 4));
153+
}
154+
return result;
155+
}
160156
}
161157
}
162-

src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
*/
1515

1616
using System;
17-
using System.Buffers.Binary;
1817
using System.Runtime.InteropServices;
18+
using MongoDB.Bson.IO;
1919

2020
namespace MongoDB.Bson.Serialization
2121
{
@@ -36,44 +36,35 @@ public static byte[] WriteToBytes<TItem>(BinaryVector<TItem> binaryVector)
3636
public static byte[] WriteToBytes<TItem>(ReadOnlySpan<TItem> vectorData, BinaryVectorDataType binaryVectorDataType, byte padding)
3737
where TItem : struct
3838
{
39-
if (BitConverter.IsLittleEndian)
40-
{
41-
var vectorDataBytes = MemoryMarshal.Cast<TItem, byte>(vectorData);
42-
byte[] result = [(byte)binaryVectorDataType, padding, .. vectorDataBytes];
43-
return result;
44-
}
45-
4639
byte[] resultBytes;
40+
4741
switch (binaryVectorDataType)
4842
{
4943
case BinaryVectorDataType.Float32:
5044
int length = vectorData.Length * sizeof(float);
51-
resultBytes = new byte[2 + length]; // Allocate output buffer:
52-
resultBytes[0] = (byte)binaryVectorDataType; // - [0]: vector type
53-
resultBytes[1] = padding; // - [1]: padding
54-
var floatSpan = MemoryMarshal.Cast<TItem, float>(vectorData);
55-
Span<byte> floatOutput = resultBytes.AsSpan(2); // - [2...]: actual float data , skipping header
56-
foreach (var value in floatSpan)
45+
resultBytes = new byte[2 + length];
46+
resultBytes[0] = (byte)binaryVectorDataType;
47+
resultBytes[1] = padding;
48+
49+
var floatSpan = MemoryMarshal.Cast<TItem, float>(vectorData);
50+
Span<byte> floatOutput = resultBytes.AsSpan(2);
51+
52+
for (int i = 0; i < floatSpan.Length; i++)
5753
{
58-
// Each float is 4 bytes - write in Big Endian format
59-
BinaryPrimitives.WriteSingleBigEndian(floatOutput, value);
60-
floatOutput = floatOutput.Slice(4); // advance to next 4-byte block
54+
BinaryPrimitivesCompat.WriteSingleLittleEndian(floatOutput, floatSpan[i]);
55+
floatOutput = floatOutput.Slice(4);
6156
}
57+
6258
return resultBytes;
6359

6460
case BinaryVectorDataType.Int8:
6561
case BinaryVectorDataType.PackedBit:
6662
var vectorDataBytes = MemoryMarshal.Cast<TItem, byte>(vectorData);
67-
resultBytes = new byte[2 + vectorDataBytes.Length];
68-
resultBytes[0] = (byte)binaryVectorDataType;
69-
resultBytes[1] = padding;
70-
vectorDataBytes.CopyTo(resultBytes.AsSpan(2));
71-
return resultBytes;
63+
return [(byte)binaryVectorDataType, padding, .. vectorDataBytes];
7264

7365
default:
74-
throw new NotSupportedException($"Binary vector serialization is not supported for {binaryVectorDataType} on Big Endian architecture yet.");
66+
throw new NotSupportedException($"Binary vector serialization is not supported for {binaryVectorDataType}.");
7567
}
7668
}
7769
}
7870
}
79-

0 commit comments

Comments
 (0)