Skip to content

Commit 38312d0

Browse files
madgetrMishig
andauthored
🐛 Add support for v1 gguf (#569)
It seems that v1 gguf was never supported. This pr addresses that by selectively reading bytes as Uint32 for v1 and UInt64 for v2 and v3. Fixes #568 --------- Co-authored-by: Mishig <[email protected]>
1 parent d64df3b commit 38312d0

File tree

2 files changed

+81
-29
lines changed

2 files changed

+81
-29
lines changed

packages/gguf/src/gguf.spec.ts

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ const URL_MISTRAL_7B =
77
const URL_GEMMA_2B = "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/a0b140b/gemma-2b-it-q4_k_m.gguf";
88
const URL_BIG_ENDIAN =
99
"https://huggingface.co/ggml-org/models/resolve/1213976/bert-bge-small/ggml-model-f16-big-endian.gguf";
10+
const URL_V1 =
11+
"https://huggingface.co/tmadge/testing/resolve/66c078028d1ff92d7a9264a1590bc61ba6437933/tinyllamas-stories-260k-f32.gguf";
1012

1113
describe("gguf", () => {
1214
it("should parse a llama2 7b", async () => {
@@ -177,4 +179,45 @@ describe("gguf", () => {
177179
dtype: GGMLQuantizationType.F16,
178180
});
179181
});
182+
183+
it("should parse a v1 file", async () => {
184+
const { metadata, tensorInfos } = await gguf(URL_V1);
185+
186+
/// metadata
187+
188+
expect(metadata).toMatchObject({
189+
version: 1,
190+
tensor_count: 48n,
191+
kv_count: 18n,
192+
"general.architecture": "llama",
193+
"general.name": "tinyllamas-stories-260k",
194+
"llama.attention.head_count": 8,
195+
"llama.attention.head_count_kv": 4,
196+
"llama.attention.layer_norm_rms_epsilon": 0.000009999999747378752,
197+
"llama.block_count": 5,
198+
"llama.context_length": 512,
199+
"llama.embedding_length": 64,
200+
"llama.feed_forward_length": 172,
201+
"llama.rope.dimension_count": 8,
202+
"llama.tensor_data_layout": "Meta AI original pth",
203+
"tokenizer.ggml.bos_token_id": 1,
204+
"tokenizer.ggml.eos_token_id": 2,
205+
"tokenizer.ggml.model": "llama",
206+
"tokenizer.ggml.padding_token_id": 0,
207+
});
208+
209+
/// Tensor infos
210+
211+
expect(tensorInfos.length).toEqual(48);
212+
expect(tensorInfos[0]).toMatchObject({
213+
name: "token_embd.weight",
214+
shape: [64n, 512n],
215+
dtype: GGMLQuantizationType.F32,
216+
});
217+
expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({
218+
name: "output.weight",
219+
shape: [64n, 512n],
220+
dtype: GGMLQuantizationType.F32,
221+
});
222+
});
180223
});

packages/gguf/src/gguf.ts

Lines changed: 38 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -78,35 +78,42 @@ class RangeView {
7878
}
7979
}
8080

81+
interface Slice<T> {
82+
value: T;
83+
length: number;
84+
}
85+
8186
/**
8287
* Note: A good article about binary data in JS: https://javascript.info/arraybuffer-binary-arrays
8388
*/
8489

85-
function readVersionedSize(view: DataView, byteOffset: number, version: Version, littleEndian: boolean): bigint {
90+
function readVersionedSize(view: DataView, byteOffset: number, version: Version, littleEndian: boolean): Slice<bigint> {
8691
switch (version) {
8792
case 1: {
8893
const n = view.getUint32(byteOffset, littleEndian);
89-
return BigInt(n);
94+
return { value: BigInt(n), length: 4 };
9095
}
9196
case 2:
9297
case 3: {
93-
return view.getBigUint64(byteOffset, littleEndian);
98+
return { value: view.getBigUint64(byteOffset, littleEndian), length: 8 };
9499
}
95100
}
96101
}
97102

98-
function readString(view: DataView, offset: number, littleEndian: boolean): { value: string; length: number } {
99-
const length = view.getBigUint64(offset, littleEndian);
100-
const value = new TextDecoder().decode(view.buffer.slice(offset + 8, offset + 8 + Number(length)));
101-
return { value, length: 8 + Number(length) };
103+
function readString(view: DataView, offset: number, version: Version, littleEndian: boolean): Slice<string> {
104+
const length = readVersionedSize(view, offset, version, littleEndian);
105+
const off = length.length;
106+
const value = new TextDecoder().decode(view.buffer.slice(offset + off, offset + off + Number(length.value)));
107+
return { value, length: off + Number(length.value) };
102108
}
103109

104110
function readMetadataValue(
105111
view: DataView,
106112
type: GGUFValueType,
107113
offset: number,
114+
version: Version,
108115
littleEndian: boolean
109-
): { value: MetadataValue; length: number } {
116+
): Slice<MetadataValue> {
110117
switch (type) {
111118
case GGUFValueType.UINT8:
112119
return { value: view.getUint8(offset), length: 1 };
@@ -125,16 +132,16 @@ function readMetadataValue(
125132
case GGUFValueType.BOOL:
126133
return { value: view.getUint8(offset) !== 0, length: 1 };
127134
case GGUFValueType.STRING:
128-
return readString(view, offset, littleEndian);
135+
return readString(view, offset, version, littleEndian);
129136
case GGUFValueType.ARRAY: {
130137
const arrayType = view.getUint32(offset, littleEndian);
131-
const arrayLength = view.getBigUint64(offset + 4, littleEndian);
132-
let length = 12;
138+
const arrayLength = readVersionedSize(view, offset + 4, version, littleEndian);
139+
let length = 4 + arrayLength.length;
133140
const arrayValues: MetadataValue[] = [];
134-
for (let i = 0; i < arrayLength; i++) {
135-
const { value, length: _length } = readMetadataValue(view, arrayType, offset + length, littleEndian);
136-
arrayValues.push(value);
137-
length += _length;
141+
for (let i = 0; i < arrayLength.value; i++) {
142+
const metadataValue = readMetadataValue(view, arrayType, offset + length, version, littleEndian);
143+
arrayValues.push(metadataValue.value);
144+
length += metadataValue.length;
138145
}
139146
return { value: arrayValues, length };
140147
}
@@ -187,22 +194,23 @@ export async function gguf(
187194
if (!isVersion(version)) {
188195
throw new Error(`not a valid gguf file: unsupported version "${version}"`);
189196
}
190-
const tensorCount = readVersionedSize(r.view, 8, version, littleEndian);
191-
const numKv = readVersionedSize(r.view, 16, version, littleEndian);
192-
197+
// initial offset after header
198+
let offset = 8;
199+
const tensorCount = readVersionedSize(r.view, offset, version, littleEndian);
200+
offset += tensorCount.length;
201+
const numKv = readVersionedSize(r.view, offset, version, littleEndian);
202+
offset += numKv.length;
193203
const metadata: GGUFMetadata = {
194204
version,
195-
tensor_count: tensorCount,
196-
kv_count: numKv,
205+
tensor_count: tensorCount.value,
206+
kv_count: numKv.value,
197207
};
198-
// initial offset after header
199-
let offset = 24;
200208

201-
for (let i = 0; i < numKv; i++) {
209+
for (let i = 0; i < numKv.value; i++) {
202210
await r.fetchChunkIfNeeded(offset);
203211

204212
// read key
205-
const keyResult = readString(r.view, offset, littleEndian);
213+
const keyResult = readString(r.view, offset, version, littleEndian);
206214
offset += keyResult.length;
207215

208216
// read value type
@@ -217,7 +225,7 @@ export async function gguf(
217225
while (!valueResult) {
218226
try {
219227
// read value
220-
valueResult = readMetadataValue(r.view, valueType, offset, littleEndian);
228+
valueResult = readMetadataValue(r.view, valueType, offset, version, littleEndian);
221229
} catch (err) {
222230
if (err instanceof RangeError) {
223231
await r.fetchChunk();
@@ -232,20 +240,21 @@ export async function gguf(
232240

233241
const tensorInfos: GGUFTensorInfo[] = [];
234242

235-
for (let i = 0; i < tensorCount; i++) {
243+
for (let i = 0; i < tensorCount.value; i++) {
236244
await r.fetchChunkIfNeeded(offset);
237245

238246
// read tensor name
239-
const keyResult = readString(r.view, offset, littleEndian);
247+
const keyResult = readString(r.view, offset, version, littleEndian);
240248
offset += keyResult.length;
241249

242250
const nDims = r.view.getUint32(offset, littleEndian);
243251
offset += 4;
244252

245253
const shape: bigint[] = [];
246254
for (let dim = 0; dim < nDims; dim++) {
247-
shape.push(r.view.getBigUint64(offset, littleEndian));
248-
offset += 8;
255+
const shapeDim = readVersionedSize(r.view, offset, version, littleEndian);
256+
shape.push(shapeDim.value);
257+
offset += shapeDim.length;
249258
}
250259

251260
const type = r.view.getUint32(offset, littleEndian);

0 commit comments

Comments
 (0)