Add float32 BinaryVector serialization/deserialization with endian handling

medhatiwari · medhatiwari · commit f43d935f961a · 2025-05-28T14:50:49.000+02:00
Signed-off-by: Medha Tiwari &lt;medhavns1@gmail.com&gt;
diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs b/src/MongoDB.Bson/Serialization/BinaryVectorReader.cs
@@ -14,10 +14,10 @@
 */
 
 using System;
-using System.Buffers.Binary;
 using System.Collections.Generic;
 using System.Linq;
 using System.Runtime.InteropServices;
+using MongoDB.Bson.IO;
 
 namespace MongoDB.Bson.Serialization
 {
@@ -27,6 +27,7 @@ public static BinaryVector<TItem> ReadBinaryVector<TItem>(ReadOnlyMemory<byte> v
             where TItem : struct
         {
             var (items, padding, vectorDataType) = ReadBinaryVectorAsArray<TItem>(vectorData);
+
             return CreateBinaryVector(items, padding, vectorDataType);
         }
 
@@ -41,39 +42,24 @@ public static (TItem[] Items, byte Padding, BinaryVectorDataType VectorDataType)
             switch (vectorDataType)
             {
                 case BinaryVectorDataType.Float32:
+
                     if ((vectorDataBytes.Span.Length & 3) != 0)
                     {
                         throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes.");
                     }
 
-                    if (typeof(TItem) != typeof(float))
-                    {
-                        throw new NotSupportedException($"Expected float for Float32 vector type, but found {typeof(TItem)}.");
-                    }
-
-                    int count = vectorDataBytes.Length / 4; // 4 bytes per float
-                    float[] floatArray = new float[count];
-
-                    for (int i = 0; i < count; i++)
-                    {
-                        // Each float32 is 4 bytes. So to extract the i-th float, we slice 4 bytes from offset i * 4. Use little-endian or big-endian decoding based on platform.
-                        floatArray[i] = BitConverter.IsLittleEndian
-                            ? MemoryMarshal.Read<float>(vectorDataBytes.Span.Slice(i * 4, 4))   // fast, unaligned read on little endian
-                            : BinaryPrimitives.ReadSingleBigEndian(vectorDataBytes.Span.Slice(i * 4, 4));   // correctly reassemble 4 bytes as big-endian float
-                    }
-
+                    var floatArray = BitConverter.IsLittleEndian                                 // We need not to use this condition here, just doing to keep the little endian logic intact
+                        ? MemoryMarshal.Cast<byte, float>(vectorDataBytes.Span).ToArray()
+                        : ToFloatArrayBigEndian(vectorDataBytes.Span);
                     items = (TItem[])(object)floatArray;
                     break;
-
                 case BinaryVectorDataType.Int8:
                     var itemsSpan = MemoryMarshal.Cast<byte, TItem>(vectorDataBytes.Span);
-                    items = itemsSpan.ToArray();
+                    items = (TItem[])(object)itemsSpan.ToArray();
                     break;
-
                 case BinaryVectorDataType.PackedBit:
                     items = (TItem[])(object)vectorDataBytes.ToArray();
                     break;
-
                 default:
                     throw new NotSupportedException($"Binary vector data type {vectorDataType} is not supported.");
             }
@@ -157,6 +143,15 @@ private static void ValidateItemTypeForBinaryVector<TItem, TItemExpectedType, TB
                 throw new NotSupportedException($"Expected {typeof(TItemExpectedType)} for {typeof(TBinaryVectorType)}, but found {typeof(TItem)}.");
             }
         }
+        private static float[] ToFloatArrayBigEndian(ReadOnlySpan<byte> span)
+        {
+            var count = span.Length / 4;
+            var result = new float[count];
+            for (int i = 0; i < count; i++)
+            {
+                result[i] = BinaryPrimitivesCompat.ReadSingleLittleEndian(span.Slice(i * 4, 4));
+            }
+            return result;
+        }
     }
 }
-
diff --git a/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs b/src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs
@@ -14,8 +14,8 @@
 */
 
 using System;
-using System.Buffers.Binary;
 using System.Runtime.InteropServices;
+using MongoDB.Bson.IO;
 
 namespace MongoDB.Bson.Serialization
 {
@@ -36,44 +36,35 @@ public static byte[] WriteToBytes<TItem>(BinaryVector<TItem> binaryVector)
         public static byte[] WriteToBytes<TItem>(ReadOnlySpan<TItem> vectorData, BinaryVectorDataType binaryVectorDataType, byte padding)
             where TItem : struct
         {
-            if (BitConverter.IsLittleEndian)
-            {
-                var vectorDataBytes = MemoryMarshal.Cast<TItem, byte>(vectorData);
-                byte[] result = [(byte)binaryVectorDataType, padding, .. vectorDataBytes];
-                return result;
-            }
-
             byte[] resultBytes;
+
             switch (binaryVectorDataType)
             {
                 case BinaryVectorDataType.Float32:
                     int length = vectorData.Length * sizeof(float);
-                    resultBytes = new byte[2 + length]; 				          // Allocate output buffer:
-                    resultBytes[0] = (byte)binaryVectorDataType; 			      // - [0]: vector type
-                    resultBytes[1] = padding;						              // - [1]: padding
-                    var floatSpan = MemoryMarshal.Cast<TItem, float>(vectorData);	
-                    Span<byte> floatOutput = resultBytes.AsSpan(2);			      // - [2...]: actual float data , skipping header
-                    foreach (var value in floatSpan)
+                    resultBytes = new byte[2 + length];
+                    resultBytes[0] = (byte)binaryVectorDataType;
+                    resultBytes[1] = padding;
+
+                    var floatSpan = MemoryMarshal.Cast<TItem, float>(vectorData);
+                    Span<byte> floatOutput = resultBytes.AsSpan(2);
+
+                    for (int i = 0; i < floatSpan.Length; i++)
                     {
-			            // Each float is 4 bytes - write in Big Endian format
-                        BinaryPrimitives.WriteSingleBigEndian(floatOutput, value);
-                        floatOutput = floatOutput.Slice(4); // advance to next 4-byte block
+                        BinaryPrimitivesCompat.WriteSingleLittleEndian(floatOutput, floatSpan[i]);
+                        floatOutput = floatOutput.Slice(4);
                     }
+
                     return resultBytes;
 
                 case BinaryVectorDataType.Int8:
                 case BinaryVectorDataType.PackedBit:
                     var vectorDataBytes = MemoryMarshal.Cast<TItem, byte>(vectorData);
-                    resultBytes = new byte[2 + vectorDataBytes.Length];
-                    resultBytes[0] = (byte)binaryVectorDataType;
-                    resultBytes[1] = padding;
-                    vectorDataBytes.CopyTo(resultBytes.AsSpan(2));
-                    return resultBytes;
+                    return [(byte)binaryVectorDataType, padding, .. vectorDataBytes];
 
                 default:
-                    throw new NotSupportedException($"Binary vector serialization is not supported for {binaryVectorDataType} on Big Endian architecture yet.");
+                    throw new NotSupportedException($"Binary vector serialization is not supported for {binaryVectorDataType}.");
             }
         }
     }
 }
-