Skip to content

Commit

Permalink
[GGUF] Parse Quant Label (#967)
Browse files Browse the repository at this point in the history
Moving this function from moon to hf.js/gguf as I need it for
#687

```ts
const quantLabel = parseGGUFQuantLabel("abc-Q4.gguf")
console.log(quantLabel)
// Q4
```

### Order of operations
- [ ] merge #967 (this
PR) & deploy `@hf.js/gguf`
- [ ] merge #687 &
deploy `@hf.js/tasks`
- [ ] merge moon

---------

Co-authored-by: Xuan Son Nguyen <[email protected]>
Co-authored-by: Julien Chaumond <[email protected]>
  • Loading branch information
3 people authored Oct 16, 2024
1 parent 683cbd0 commit 0d96120
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 2 deletions.
19 changes: 18 additions & 1 deletion packages/gguf/src/gguf.spec.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import { beforeAll, describe, expect, it } from "vitest";
import type { GGUFParseOutput } from "./gguf";
import { GGMLFileQuantizationType, GGMLQuantizationType, gguf, ggufAllShards, parseGgufShardFilename } from "./gguf";
import {
GGMLFileQuantizationType,
GGMLQuantizationType,
gguf,
ggufAllShards,
parseGgufShardFilename,
parseGGUFQuantLabel,
} from "./gguf";
import fs from "node:fs";

const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf";
Expand Down Expand Up @@ -266,4 +273,14 @@ describe("gguf", () => {
const { parameterCount } = await ggufAllShards(URL_SHARDED_GROK);
expect(parameterCount).toEqual(316_490_127_360); // 316B
});

it("parse quant label", async () => {
expect(parseGGUFQuantLabel("Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")).toEqual("Q4_K_M");
expect(parseGGUFQuantLabel("subdir/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf")).toEqual("Q4_K_M");
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-Q2_K.gguf")).toEqual("Q2_K");
expect(parseGGUFQuantLabel("Codestral-22B-v0.1.gguf")).toEqual(undefined);
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-F32-Q2_K.gguf")).toEqual("Q2_K"); // gguf name with two quant labels [F32, Q2_K]
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-IQ3_XS.gguf")).toEqual(undefined); // TODO: investigate IQ3_XS
expect(parseGGUFQuantLabel("Codestral-22B-v0.1-Q4_0_4_4.gguf")).toEqual("Q4_0"); // TODO: investigate Q4_0_4_4
});
});
11 changes: 10 additions & 1 deletion packages/gguf/src/gguf.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
import { GGUFValueType } from "./types";
import { GGMLQuantizationType, GGUFValueType } from "./types";
import { isBackend } from "./utils/isBackend";
import { promisesQueue } from "./utils/promisesQueue";

Expand Down Expand Up @@ -29,6 +29,15 @@ export function parseGgufShardFilename(filename: string): GgufShardFileInfo | nu
return null;
}

const ggufQuants = Object.values(GGMLQuantizationType).filter((v): v is string => typeof v === "string");
export const GGUF_QUANT_RE = new RegExp(`(?<quant>${ggufQuants.join("|")})` + "(_(?<sizeVariation>[A-Z]+))?");
export const GGUF_QUANT_RE_GLOBAL = new RegExp(GGUF_QUANT_RE, "g");

export function parseGGUFQuantLabel(fname: string): string | undefined {
const quantLabel = fname.toUpperCase().match(GGUF_QUANT_RE_GLOBAL)?.at(-1); // if there is multiple quant substrings in a name, we prefer the last one
return quantLabel;
}

const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3;

/**
Expand Down

0 comments on commit 0d96120

Please sign in to comment.