-
Notifications
You must be signed in to change notification settings - Fork 299
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[gguf] Add descriptions to quantization types (#615)
I have not found a single place where all different data/quant types of gguf is documented. Therefore, creating this description object that would be useful to the community for understanding different data/quant types. Afterwards, I plan to make the description available at: 1. [hf.co/docs/hub/gguf](https://huggingface.co/docs/hub/gguf) 2. [GGUF tensor inspector ](https://twitter.com/mishig25/status/1769731425949884594/video/1) 3. More importantly, community can have a source of information that can be used in their projects --------- Co-authored-by: Younes Belkada <[email protected]> Co-authored-by: FL33TW00D <[email protected]>
- Loading branch information
1 parent
c87b481
commit 0ed8d60
Showing
2 changed files
with
99 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import { GGMLQuantizationType } from "./types"; | ||
|
||
export const QUANT_DESCRIPTIONS: Record<GGMLQuantizationType, { txt: string; src_url?: string }> = { | ||
[GGMLQuantizationType.F32]: { | ||
txt: "32-bit standard IEEE 754 single-precision floating-point number.", | ||
src_url: "https://en.wikipedia.org/wiki/Single-precision_floating-point_format", | ||
}, | ||
[GGMLQuantizationType.F16]: { | ||
txt: "16-bit standard IEEE 754 half-precision floating-point number.", | ||
src_url: "https://en.wikipedia.org/wiki/Half-precision_floating-point_format", | ||
}, | ||
[GGMLQuantizationType.Q4_0]: { | ||
txt: "4-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today)", | ||
src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557654249", | ||
}, | ||
[GGMLQuantizationType.Q4_1]: { | ||
txt: "4-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today)", | ||
src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557682290", | ||
}, | ||
[GGMLQuantizationType.Q5_0]: { | ||
txt: "5-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today)", | ||
src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557654249", | ||
}, | ||
[GGMLQuantizationType.Q5_1]: { | ||
txt: "5-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today)", | ||
src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557682290", | ||
}, | ||
[GGMLQuantizationType.Q8_0]: { | ||
txt: "8-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale. Legacy quantization method (not used widely as of today)", | ||
src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557654249", | ||
}, | ||
[GGMLQuantizationType.Q8_1]: { | ||
txt: "8-bit round-to-nearest quantization (q). Each block has 32 weights. Weight formula: w = q * block_scale + block_minimum. Legacy quantization method (not used widely as of today)", | ||
src_url: "https://github.com/huggingface/huggingface.js/pull/615#discussion_r1557682290", | ||
}, | ||
[GGMLQuantizationType.Q2_K]: { | ||
txt: `2-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weight. Weight formula: w = q * block_scale(4-bit) + block_min(4-bit), resulting in 2.5625 bits-per-weight.`, | ||
src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", | ||
}, | ||
[GGMLQuantizationType.Q3_K]: { | ||
txt: `3-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Weight formula: w = q * block_scale(6-bit), resulting. 3.4375 bits-per-weight`, | ||
src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", | ||
}, | ||
[GGMLQuantizationType.Q4_K]: { | ||
txt: `4-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Weight formula: w = q * block_scale(6-bit) + block_min(6-bit), resulting in 4.5 bits-per-weight.`, | ||
src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", | ||
}, | ||
[GGMLQuantizationType.Q5_K]: { | ||
txt: `5-bit quantization (q). Super-blocks with 8 blocks, each block has 32 weights. Weight formula: w = q * block_scale(6-bit) + block_min(6-bit), resulting in 5.5 bits-per-weight.`, | ||
src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", | ||
}, | ||
[GGMLQuantizationType.Q6_K]: { | ||
txt: `6-bit quantization (q). Super-blocks with 16 blocks, each block has 16 weights. Weight formula: w = q * block_scale(8-bit), resulting in 6.5625 bits-per-weight.`, | ||
src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", | ||
}, | ||
[GGMLQuantizationType.Q8_K]: { | ||
txt: `8-bit quantization (q). Each block has 256 weights. Only used for quantizing intermediate results. All 2-6 bit dot products are implemented for this quantization type. Weight formula: w = q * block_scale.`, | ||
src_url: "https://github.com/ggerganov/llama.cpp/pull/1684#issue-1739619305", | ||
}, | ||
[GGMLQuantizationType.IQ2_XXS]: { | ||
txt: "2-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 2.06 bits-per-weight.", | ||
src_url: | ||
"https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", | ||
}, | ||
[GGMLQuantizationType.IQ2_XS]: { | ||
txt: "2-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 2.31 bits-per-weight.", | ||
src_url: | ||
"https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", | ||
}, | ||
[GGMLQuantizationType.IQ3_XXS]: { | ||
txt: "3-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 3.06 bits-per-weight.", | ||
src_url: | ||
"https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", | ||
}, | ||
[GGMLQuantizationType.IQ1_S]: { | ||
txt: "1-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 1.56 bits-per-weight.", | ||
src_url: | ||
"https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", | ||
}, | ||
[GGMLQuantizationType.IQ4_NL]: { | ||
txt: "4-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix", | ||
}, | ||
[GGMLQuantizationType.IQ3_S]: { | ||
txt: "3-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 3.44 bits-per-weight.", | ||
src_url: | ||
"https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", | ||
}, | ||
[GGMLQuantizationType.IQ2_S]: { | ||
txt: "2-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 2.5 bits-per-weight.", | ||
src_url: | ||
"https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", | ||
}, | ||
[GGMLQuantizationType.IQ4_XS]: { | ||
txt: "4-bit quantization (q). Super-blocks with 256 weights. Weight w is obtained using super_block_scale & importance matrix, resulting in 4.25 bits-per-weight.", | ||
src_url: | ||
"https://huggingface.co/CISCai/OpenCodeInterpreter-DS-6.7B-SOTA-GGUF/blob/main/README.md?code=true#L59-L70", | ||
}, | ||
}; |