Skip to content

Commit

Permalink
gguf: Add ability to load local file (#656)
Browse files Browse the repository at this point in the history
Being able to load a local gguf file can be useful when we want to debug
a gguf file.

**Without this PR**, this ability could be done by using
[file-fetch](https://www.npmjs.com/package/file-fetch). However, that
won't work with big models, since the whole file is loaded into RAM.

This PR add a new `RangeViewLocalFile` internal class that extends
`RangeView`. It redirects calls to `fetchChunk()` to
`fs.createReadStream` with the appropriate byte range. This allows the
library to read specific chunk from a local file.

For security reason, this ability is locked under `localFile: boolean`
param. By default, it is disabled (i.e. when this library is run on hub
backend, this param is disabled if unspecified)

- [x] Add test case to `gguf.spec.ts`
- [x] Being able to build with target=browser (only build, but will
throw error on browser if being used)
  • Loading branch information
ngxson authored May 10, 2024
1 parent 34c8ec1 commit ab84639
Show file tree
Hide file tree
Showing 6 changed files with 205 additions and 9 deletions.
6 changes: 5 additions & 1 deletion packages/gguf/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
}
},
"browser": {
"./src/utils/FileBlob.ts": false,
"./dist/index.js": "./dist/browser/index.js",
"./dist/index.mjs": "./dist/browser/index.mjs"
},
Expand Down Expand Up @@ -47,5 +48,8 @@
"gguf"
],
"author": "Hugging Face",
"license": "MIT"
"license": "MIT",
"devDependencies": {
"@types/node": "^20.12.8"
}
}
17 changes: 17 additions & 0 deletions packages/gguf/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions packages/gguf/src/gguf.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { describe, expect, it } from "vitest";
import { GGMLQuantizationType, gguf, ggufAllShards, parseGgufShardFilename } from "./gguf";
import fs from "node:fs";

const URL_LLAMA = "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/resolve/191239b/llama-2-7b-chat.Q2_K.gguf";
const URL_MISTRAL_7B =
Expand Down Expand Up @@ -226,6 +227,19 @@ describe("gguf", () => {
});
});

it("should parse a local file", async () => {
// download the file and save to .cache folder
if (!fs.existsSync(".cache")) {
fs.mkdirSync(".cache");
}
const res = await fetch(URL_V1);
const arrayBuf = await res.arrayBuffer();
fs.writeFileSync(".cache/model.gguf", Buffer.from(arrayBuf));

const { metadata } = await gguf(".cache/model.gguf", { allowLocalFile: true });
expect(metadata["general.name"]).toEqual("tinyllamas-stories-260k");
});

it("should detect sharded gguf filename", async () => {
const ggufPath = "grok-1/grok-1-q4_0-00003-of-00009.gguf"; // https://huggingface.co/ggml-org/models/blob/fcf344adb9686474c70e74dd5e55465e9e6176ef/grok-1/grok-1-q4_0-00003-of-00009.gguf
const ggufShardFileInfo = parseGgufShardFilename(ggufPath);
Expand Down
53 changes: 45 additions & 8 deletions packages/gguf/src/gguf.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
import { GGUFValueType } from "./types";
import { isBackend } from "./utils/isBackend";
import { promisesQueue } from "./utils/promisesQueue";

export type { MetadataBaseValue, MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
Expand Down Expand Up @@ -49,7 +50,7 @@ const HTTP_TOTAL_MAX_SIZE = 50 * 10 ** 6; /// 50MB
* Internal stateful instance to fetch ranges of HTTP data when needed
*/
class RangeView {
private chunk: number;
protected chunk: number;
private buffer: ArrayBuffer;
private dataView: DataView;

Expand All @@ -58,7 +59,7 @@ class RangeView {
}

constructor(
public url: string,
public uri: string,
private params?: {
/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
Expand All @@ -81,7 +82,7 @@ class RangeView {
const range = [this.chunk * HTTP_CHUNK_SIZE, (this.chunk + 1) * HTTP_CHUNK_SIZE - 1];
const buf = new Uint8Array(
await (
await (this.params?.fetch ?? fetch)(this.url, {
await (this.params?.fetch ?? fetch)(this.uri, {
headers: {
...(this.params?.additionalFetchHeaders ?? {}),
Range: `bytes=${range[0]}-${range[1]}`,
Expand Down Expand Up @@ -128,6 +129,23 @@ class RangeView {
}
}

/**
* Internal stateful instance to read ranges of local file when needed.
* Only usable in with nodejs FS API.
*/
class RangeViewLocalFile extends RangeView {
/**
* Read a new chunk from local file system.
*/
override async fetchChunk(): Promise<void> {
const { FileBlob } = await import("./utils/FileBlob");
const blob = await FileBlob.create(this.uri);
const range = [this.chunk * HTTP_CHUNK_SIZE, (this.chunk + 1) * HTTP_CHUNK_SIZE - 1];
const buffer = await blob.slice(range[0], range[1]).arrayBuffer();
this.appendBuffer(new Uint8Array(buffer));
}
}

interface Slice<T> {
value: T;
length: number;
Expand Down Expand Up @@ -205,38 +223,57 @@ function readMetadataValue(
}

export async function gguf(
url: string,
uri: string,
params: {
/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
*/
fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
computeParametersCount: true;
allowLocalFile?: boolean;
}
): Promise<GGUFParseOutput & { parameterCount: number }>;
export async function gguf(
url: string,
uri: string,
params?: {
/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
*/
fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
allowLocalFile?: boolean;
}
): Promise<GGUFParseOutput>;
export async function gguf(
url: string,
uri: string,
params?: {
/**
* Custom fetch function to use instead of the default one, for example to use a proxy or edit headers.
*/
fetch?: typeof fetch;
additionalFetchHeaders?: Record<string, string>;
computeParametersCount?: boolean;
allowLocalFile?: boolean;
}
): Promise<GGUFParseOutput & { parameterCount?: number }> {
const r = new RangeView(url, params);
let r: RangeView;
if (isBackend) {
/// On backend, we switch between remote/local file based on protocol
if (uri.match(/^https?:\/\//)) {
r = new RangeView(uri, params);
} else if (params?.allowLocalFile) {
r = new RangeViewLocalFile(uri, params);
} else {
throw new Error("Access to local file is not enabled, please set allowLocalFile to true");
}
} else {
/// On frontend, we only allow using remote file
if (params?.allowLocalFile) {
throw new Error("allowLocalFile cannot be used on browser");
}
r = new RangeView(uri, params);
}
await r.fetchChunk();

const checkBuffer = (buffer: Uint8Array, header: Uint8Array) => {
Expand Down Expand Up @@ -377,7 +414,7 @@ export async function ggufAllShards(

const PARALLEL_DOWNLOADS = 20;
const shards = await promisesQueue(
urls.map((shardUrl) => () => gguf(shardUrl, { computeParametersCount: true })),
urls.map((shardUrl) => () => gguf(shardUrl, { ...params, computeParametersCount: true })),
PARALLEL_DOWNLOADS
);
return {
Expand Down
118 changes: 118 additions & 0 deletions packages/gguf/src/utils/FileBlob.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import { createReadStream } from "node:fs";
import { open, stat } from "node:fs/promises";
import { Readable } from "node:stream";
import type { FileHandle } from "node:fs/promises";
import { fileURLToPath } from "node:url";

/**
* @internal
*
* A FileBlob is a replacement for the Blob class that allows to lazy read files
* in order to preserve memory.
*
* It is a drop-in replacement for the Blob class, so you can use it as a Blob.
*
* The main difference is the instantiation, which is done asynchronously using the `FileBlob.create` method.
*
* @example
* const fileBlob = await FileBlob.create("path/to/package.json");
*
* await fetch("https://aschen.tech", { method: "POST", body: fileBlob });
*/
export class FileBlob extends Blob {
/**
* Creates a new FileBlob on the provided file.
*
* @param path Path to the file to be lazy readed
*/
static async create(path: string | URL): Promise<FileBlob> {
path = path instanceof URL ? fileURLToPath(path) : path;

const { size } = await stat(path);

const fileBlob = new FileBlob(path, 0, size);

return fileBlob;
}

private path: string;
private start: number;
private end: number;

private constructor(path: string, start: number, end: number) {
super();

this.path = path;
this.start = start;
this.end = end;
}

/**
* Returns the size of the blob.
*/
override get size(): number {
return this.end - this.start;
}

/**
* Returns a new instance of FileBlob that is a slice of the current one.
*
* The slice is inclusive of the start and exclusive of the end.
*
* The slice method does not supports negative start/end.
*
* @param start beginning of the slice
* @param end end of the slice
*/
override slice(start = 0, end = this.size): FileBlob {
if (start < 0 || end < 0) {
new TypeError("Unsupported negative start/end on FileBlob.slice");
}

const slice = new FileBlob(this.path, this.start + start, Math.min(this.start + end, this.end));

return slice;
}

/**
* Read the part of the file delimited by the FileBlob and returns it as an ArrayBuffer.
*/
override async arrayBuffer(): Promise<ArrayBuffer> {
const slice = await this.execute((file) => file.read(Buffer.alloc(this.size), 0, this.size, this.start));

return slice.buffer;
}

/**
* Read the part of the file delimited by the FileBlob and returns it as a string.
*/
override async text(): Promise<string> {
const buffer = (await this.arrayBuffer()) as Buffer;

return buffer.toString("utf8");
}

/**
* Returns a stream around the part of the file delimited by the FileBlob.
*/
override stream(): ReturnType<Blob["stream"]> {
return Readable.toWeb(createReadStream(this.path, { start: this.start, end: this.end - 1 })) as ReturnType<
Blob["stream"]
>;
}

/**
* We are opening and closing the file for each action to prevent file descriptor leaks.
*
* It is an intended choice of developer experience over performances.
*/
private async execute<T>(action: (file: FileHandle) => Promise<T>) {
const file = await open(this.path, "r");

try {
return await action(file);
} finally {
await file.close();
}
}
}
6 changes: 6 additions & 0 deletions packages/gguf/src/utils/isBackend.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
const isBrowser = typeof window !== "undefined" && typeof window.document !== "undefined";

const isWebWorker =
typeof self === "object" && self.constructor && self.constructor.name === "DedicatedWorkerGlobalScope";

export const isBackend = !isBrowser && !isWebWorker;

0 comments on commit ab84639

Please sign in to comment.