Skip to content

Commit be00d29

Browse files
manekinekkoRobertCraigie
authored andcommitted
perf(embedding): default embedding creation to base64 (#1312)
* perf(embedding): always request embedding creation as base64 Requesting base64 encoded embeddings returns smaller body sizes, on average ~60% smaller than float32 encoded. In other words, the size of the response body containing embeddings in float32 is ~2.3x bigger than base64 encoded embedding. We always request embedding creating encoded as base64, and then decoded them to float32 based on the user's provided encoding_format parameter. Closes #1310 Co-authored-by: Robert Craigie <[email protected]>
1 parent 3676d34 commit be00d29

File tree

3 files changed

+92
-2
lines changed

3 files changed

+92
-2
lines changed

src/core.ts

+21
Original file line numberDiff line numberDiff line change
@@ -1287,6 +1287,27 @@ export const toBase64 = (str: string | null | undefined): string => {
12871287
throw new OpenAIError('Cannot generate b64 string; Expected `Buffer` or `btoa` to be defined');
12881288
};
12891289

1290+
/**
1291+
* Converts a Base64 encoded string to a Float32Array.
1292+
* @param base64Str - The Base64 encoded string.
1293+
* @returns An Array of numbers interpreted as Float32 values.
1294+
*/
1295+
export const toFloat32Array = (base64Str: string): Array<number> => {
1296+
if (typeof Buffer !== 'undefined') {
1297+
// for Node.js environment
1298+
return Array.from(new Float32Array(Buffer.from(base64Str, 'base64').buffer));
1299+
} else {
1300+
// for legacy web platform APIs
1301+
const binaryStr = atob(base64Str);
1302+
const len = binaryStr.length;
1303+
const bytes = new Uint8Array(len);
1304+
for (let i = 0; i < len; i++) {
1305+
bytes[i] = binaryStr.charCodeAt(i);
1306+
}
1307+
return Array.from(new Float32Array(bytes.buffer));
1308+
}
1309+
};
1310+
12901311
export function isObj(obj: unknown): obj is Record<string, unknown> {
12911312
return obj != null && typeof obj === 'object' && !Array.isArray(obj);
12921313
}

src/resources/embeddings.ts

+40-2
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,47 @@ export class Embeddings extends APIResource {
99
*/
1010
create(
1111
body: EmbeddingCreateParams,
12-
options?: Core.RequestOptions,
12+
options?: Core.RequestOptions<EmbeddingCreateParams>,
1313
): Core.APIPromise<CreateEmbeddingResponse> {
14-
return this._client.post('/embeddings', { body, ...options });
14+
const hasUserProvidedEncodingFormat = !!body.encoding_format;
15+
// No encoding_format specified, defaulting to base64 for performance reasons
16+
// See https://github.com/openai/openai-node/pull/1312
17+
let encoding_format: EmbeddingCreateParams['encoding_format'] =
18+
hasUserProvidedEncodingFormat ? body.encoding_format : 'base64';
19+
20+
if (hasUserProvidedEncodingFormat) {
21+
Core.debug('Request', 'User defined encoding_format:', body.encoding_format);
22+
}
23+
24+
const response: Core.APIPromise<CreateEmbeddingResponse> = this._client.post('/embeddings', {
25+
body: {
26+
...body,
27+
encoding_format: encoding_format as EmbeddingCreateParams['encoding_format'],
28+
},
29+
...options,
30+
});
31+
32+
// if the user specified an encoding_format, return the response as-is
33+
if (hasUserProvidedEncodingFormat) {
34+
return response;
35+
}
36+
37+
// in this stage, we are sure the user did not specify an encoding_format
38+
// and we defaulted to base64 for performance reasons
39+
// we are sure then that the response is base64 encoded, let's decode it
40+
// the returned result will be a float32 array since this is OpenAI API's default encoding
41+
Core.debug('response', 'Decoding base64 embeddings to float32 array');
42+
43+
return (response as Core.APIPromise<CreateEmbeddingResponse>)._thenUnwrap((response) => {
44+
if (response && response.data) {
45+
response.data.forEach((embeddingBase64Obj) => {
46+
const embeddingBase64Str = embeddingBase64Obj.embedding as unknown as string;
47+
embeddingBase64Obj.embedding = Core.toFloat32Array(embeddingBase64Str);
48+
});
49+
}
50+
51+
return response;
52+
});
1553
}
1654
}
1755

tests/api-resources/embeddings.test.ts

+31
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,35 @@ describe('resource embeddings', () => {
3232
user: 'user-1234',
3333
});
3434
});
35+
36+
test('create: encoding_format=float should create float32 embeddings', async () => {
37+
const response = await client.embeddings.create({
38+
input: 'The quick brown fox jumped over the lazy dog',
39+
model: 'text-embedding-3-small',
40+
});
41+
42+
expect(response.data?.at(0)?.embedding).toBeInstanceOf(Array);
43+
expect(Number.isFinite(response.data?.at(0)?.embedding.at(0))).toBe(true);
44+
});
45+
46+
test('create: encoding_format=base64 should create float32 embeddings', async () => {
47+
const response = await client.embeddings.create({
48+
input: 'The quick brown fox jumped over the lazy dog',
49+
model: 'text-embedding-3-small',
50+
encoding_format: 'base64',
51+
});
52+
53+
expect(response.data?.at(0)?.embedding).toBeInstanceOf(Array);
54+
expect(Number.isFinite(response.data?.at(0)?.embedding.at(0))).toBe(true);
55+
});
56+
57+
test('create: encoding_format=default should create float32 embeddings', async () => {
58+
const response = await client.embeddings.create({
59+
input: 'The quick brown fox jumped over the lazy dog',
60+
model: 'text-embedding-3-small',
61+
});
62+
63+
expect(response.data?.at(0)?.embedding).toBeInstanceOf(Array);
64+
expect(Number.isFinite(response.data?.at(0)?.embedding.at(0))).toBe(true);
65+
});
3566
});

0 commit comments

Comments
 (0)