Skip to content

Commit

Permalink
🐛 Add support for v1 gguf (#569)
Browse files Browse the repository at this point in the history
It seems that v1 gguf was never supported. This pr addresses that by
selectively reading bytes as Uint32 for v1 and UInt64 for v2 and v3.

Fixes #568

---------

Co-authored-by: Mishig <[email protected]>
  • Loading branch information
madgetr and Mishig authored Mar 21, 2024
1 parent d64df3b commit 38312d0
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 29 deletions.
43 changes: 43 additions & 0 deletions packages/gguf/src/gguf.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ const URL_MISTRAL_7B =
const URL_GEMMA_2B = "https://huggingface.co/lmstudio-ai/gemma-2b-it-GGUF/resolve/a0b140b/gemma-2b-it-q4_k_m.gguf";
const URL_BIG_ENDIAN =
"https://huggingface.co/ggml-org/models/resolve/1213976/bert-bge-small/ggml-model-f16-big-endian.gguf";
const URL_V1 =
"https://huggingface.co/tmadge/testing/resolve/66c078028d1ff92d7a9264a1590bc61ba6437933/tinyllamas-stories-260k-f32.gguf";

describe("gguf", () => {
it("should parse a llama2 7b", async () => {
Expand Down Expand Up @@ -177,4 +179,45 @@ describe("gguf", () => {
dtype: GGMLQuantizationType.F16,
});
});

it("should parse a v1 file", async () => {
const { metadata, tensorInfos } = await gguf(URL_V1);

/// metadata

expect(metadata).toMatchObject({
version: 1,
tensor_count: 48n,
kv_count: 18n,
"general.architecture": "llama",
"general.name": "tinyllamas-stories-260k",
"llama.attention.head_count": 8,
"llama.attention.head_count_kv": 4,
"llama.attention.layer_norm_rms_epsilon": 0.000009999999747378752,
"llama.block_count": 5,
"llama.context_length": 512,
"llama.embedding_length": 64,
"llama.feed_forward_length": 172,
"llama.rope.dimension_count": 8,
"llama.tensor_data_layout": "Meta AI original pth",
"tokenizer.ggml.bos_token_id": 1,
"tokenizer.ggml.eos_token_id": 2,
"tokenizer.ggml.model": "llama",
"tokenizer.ggml.padding_token_id": 0,
});

/// Tensor infos

expect(tensorInfos.length).toEqual(48);
expect(tensorInfos[0]).toMatchObject({
name: "token_embd.weight",
shape: [64n, 512n],
dtype: GGMLQuantizationType.F32,
});
expect(tensorInfos[tensorInfos.length - 1]).toMatchObject({
name: "output.weight",
shape: [64n, 512n],
dtype: GGMLQuantizationType.F32,
});
});
});
67 changes: 38 additions & 29 deletions packages/gguf/src/gguf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,35 +78,42 @@ class RangeView {
}
}

interface Slice<T> {
value: T;
length: number;
}

/**
* Note: A good article about binary data in JS: https://javascript.info/arraybuffer-binary-arrays
*/

function readVersionedSize(view: DataView, byteOffset: number, version: Version, littleEndian: boolean): bigint {
function readVersionedSize(view: DataView, byteOffset: number, version: Version, littleEndian: boolean): Slice<bigint> {
switch (version) {
case 1: {
const n = view.getUint32(byteOffset, littleEndian);
return BigInt(n);
return { value: BigInt(n), length: 4 };
}
case 2:
case 3: {
return view.getBigUint64(byteOffset, littleEndian);
return { value: view.getBigUint64(byteOffset, littleEndian), length: 8 };
}
}
}

function readString(view: DataView, offset: number, littleEndian: boolean): { value: string; length: number } {
const length = view.getBigUint64(offset, littleEndian);
const value = new TextDecoder().decode(view.buffer.slice(offset + 8, offset + 8 + Number(length)));
return { value, length: 8 + Number(length) };
function readString(view: DataView, offset: number, version: Version, littleEndian: boolean): Slice<string> {
const length = readVersionedSize(view, offset, version, littleEndian);
const off = length.length;
const value = new TextDecoder().decode(view.buffer.slice(offset + off, offset + off + Number(length.value)));
return { value, length: off + Number(length.value) };
}

function readMetadataValue(
view: DataView,
type: GGUFValueType,
offset: number,
version: Version,
littleEndian: boolean
): { value: MetadataValue; length: number } {
): Slice<MetadataValue> {
switch (type) {
case GGUFValueType.UINT8:
return { value: view.getUint8(offset), length: 1 };
Expand All @@ -125,16 +132,16 @@ function readMetadataValue(
case GGUFValueType.BOOL:
return { value: view.getUint8(offset) !== 0, length: 1 };
case GGUFValueType.STRING:
return readString(view, offset, littleEndian);
return readString(view, offset, version, littleEndian);
case GGUFValueType.ARRAY: {
const arrayType = view.getUint32(offset, littleEndian);
const arrayLength = view.getBigUint64(offset + 4, littleEndian);
let length = 12;
const arrayLength = readVersionedSize(view, offset + 4, version, littleEndian);
let length = 4 + arrayLength.length;
const arrayValues: MetadataValue[] = [];
for (let i = 0; i < arrayLength; i++) {
const { value, length: _length } = readMetadataValue(view, arrayType, offset + length, littleEndian);
arrayValues.push(value);
length += _length;
for (let i = 0; i < arrayLength.value; i++) {
const metadataValue = readMetadataValue(view, arrayType, offset + length, version, littleEndian);
arrayValues.push(metadataValue.value);
length += metadataValue.length;
}
return { value: arrayValues, length };
}
Expand Down Expand Up @@ -187,22 +194,23 @@ export async function gguf(
if (!isVersion(version)) {
throw new Error(`not a valid gguf file: unsupported version "${version}"`);
}
const tensorCount = readVersionedSize(r.view, 8, version, littleEndian);
const numKv = readVersionedSize(r.view, 16, version, littleEndian);

// initial offset after header
let offset = 8;
const tensorCount = readVersionedSize(r.view, offset, version, littleEndian);
offset += tensorCount.length;
const numKv = readVersionedSize(r.view, offset, version, littleEndian);
offset += numKv.length;
const metadata: GGUFMetadata = {
version,
tensor_count: tensorCount,
kv_count: numKv,
tensor_count: tensorCount.value,
kv_count: numKv.value,
};
// initial offset after header
let offset = 24;

for (let i = 0; i < numKv; i++) {
for (let i = 0; i < numKv.value; i++) {
await r.fetchChunkIfNeeded(offset);

// read key
const keyResult = readString(r.view, offset, littleEndian);
const keyResult = readString(r.view, offset, version, littleEndian);
offset += keyResult.length;

// read value type
Expand All @@ -217,7 +225,7 @@ export async function gguf(
while (!valueResult) {
try {
// read value
valueResult = readMetadataValue(r.view, valueType, offset, littleEndian);
valueResult = readMetadataValue(r.view, valueType, offset, version, littleEndian);
} catch (err) {
if (err instanceof RangeError) {
await r.fetchChunk();
Expand All @@ -232,20 +240,21 @@ export async function gguf(

const tensorInfos: GGUFTensorInfo[] = [];

for (let i = 0; i < tensorCount; i++) {
for (let i = 0; i < tensorCount.value; i++) {
await r.fetchChunkIfNeeded(offset);

// read tensor name
const keyResult = readString(r.view, offset, littleEndian);
const keyResult = readString(r.view, offset, version, littleEndian);
offset += keyResult.length;

const nDims = r.view.getUint32(offset, littleEndian);
offset += 4;

const shape: bigint[] = [];
for (let dim = 0; dim < nDims; dim++) {
shape.push(r.view.getBigUint64(offset, littleEndian));
offset += 8;
const shapeDim = readVersionedSize(r.view, offset, version, littleEndian);
shape.push(shapeDim.value);
offset += shapeDim.length;
}

const type = r.view.getUint32(offset, littleEndian);
Expand Down

0 comments on commit 38312d0

Please sign in to comment.