Skip to content

Commit 147ba0f

Browse files
author
Dmitrii Kuvaiskii
committed
Add Candle ML framework example
Candle is a minimalist ML framework for Rust with a focus on performance and ease of use. This commit adds the Quantized LLaMA example. Signed-off-by: Dmitrii Kuvaiskii <[email protected]>
1 parent ceba8e9 commit 147ba0f

File tree

4 files changed

+130
-0
lines changed

4 files changed

+130
-0
lines changed

candle/.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
/candle_quantized
2+
/src
3+
4+
# model
5+
/*.bin
6+
/*.json

candle/Makefile

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Copyright (C) 2024 Gramine contributors
2+
# SPDX-License-Identifier: BSD-3-Clause
3+
4+
ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine)
5+
6+
ifeq ($(DEBUG),1)
7+
GRAMINE_LOG_LEVEL = debug
8+
else
9+
GRAMINE_LOG_LEVEL = error
10+
endif
11+
12+
SRCDIR = src
13+
14+
.PHONY: all
15+
all: candle_quantized candle_quantized.manifest
16+
ifeq ($(SGX),1)
17+
all: candle_quantized.manifest.sgx candle_quantized.sig
18+
endif
19+
20+
llama-2-7b.ggmlv3.q4_0.bin:
21+
../common_tools/download --output $@ \
22+
--sha256 bfa26d855e44629c4cf919985e90bd7fa03b77eea1676791519e39a4d45fd4d5 \
23+
--url https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/$@
24+
25+
tokenizer.json:
26+
../common_tools/download --output $@ \
27+
--sha256 8eea70c4866c4f1320ba096fc986ac82038a8374dbe135212ba7628835b4a6f1 \
28+
--url https://huggingface.co/hf-internal-testing/llama-tokenizer/raw/main/$@
29+
30+
$(SRCDIR)/candle_quantized/target/release/examples/quantized: llama-2-7b.ggmlv3.q4_0.bin tokenizer.json
31+
mkdir -p $(SRCDIR) && cd $(SRCDIR) && \
32+
git clone https://github.com/huggingface/candle.git candle_quantized && \
33+
cd candle_quantized && \
34+
cargo build --example quantized --release
35+
36+
candle_quantized: $(SRCDIR)/candle_quantized/target/release/examples/quantized
37+
cp $< $@
38+
39+
candle_quantized.manifest: candle_quantized.manifest.template
40+
gramine-manifest \
41+
-Dlog_level=$(GRAMINE_LOG_LEVEL) \
42+
-Darch_libdir=$(ARCH_LIBDIR) \
43+
$< > $@
44+
45+
candle_quantized.manifest.sgx candle_quantized.sig: candle_quantized_sgx_sign
46+
@:
47+
48+
.INTERMEDIATE: candle_quantized_sgx_sign
49+
candle_quantized_sgx_sign: candle_quantized.manifest candle_quantized
50+
gramine-sgx-sign \
51+
--manifest $< \
52+
--output $<.sgx
53+
.PHONY: clean
54+
clean:
55+
$(RM) *.token *.sig *.manifest.sgx *.manifest candle_quantized
56+
57+
.PHONY: distclean
58+
distclean: clean
59+
$(RM) -r $(SRCDIR) *.tar.gz *.bin *.json

candle/README.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Candle
2+
3+
[Candle](https://github.com/huggingface/candle) is a minimalist ML framework for
4+
Rust with a focus on performance (including GPU support) and ease of use.
5+
6+
This directory contains the Makefile and the template manifest for the most
7+
recent version of Candle as of this writing (v0.6.0).
8+
9+
# Warning
10+
11+
The `candle_quantized` app will download ~4GB of data (model + tokenizer). This
12+
happens automatically in the Makefile.
13+
14+
# Quick Start
15+
16+
```sh
17+
# build Candle (uses Rust Cargo) and the final manifest
18+
make SGX=1
19+
20+
# run Quantized LLaMA (quantized version of the LLaMA model)
21+
# note that for Gramine, the cmdline args are already defined in the manifest file
22+
# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#check-out-our-examples
23+
RAYON_NUM_THREADS=36 ./candle_quantized \
24+
--model llama-2-7b.ggmlv3.q4_0.bin --tokenizer tokenizer.json --sample-len 200
25+
RAYON_NUM_THREADS=36 gramine-direct ./candle_quantized
26+
RAYON_NUM_THREADS=36 gramine-sgx ./candle_quantized
27+
```
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Copyright (C) 2024 Gramine contributors
2+
# SPDX-License-Identifier: BSD-3-Clause
3+
4+
loader.entrypoint = "file:{{ gramine.libos }}"
5+
libos.entrypoint = "/candle_quantized"
6+
7+
loader.log_level = "{{ log_level }}"
8+
9+
loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}"
10+
loader.env.RAYON_NUM_THREADS = { passthrough = true }
11+
12+
loader.argv = [ "candle_quantized", "--model", "llama-2-7b.ggmlv3.q4_0.bin",
13+
"--tokenizer", "tokenizer.json", "--sample-len", "200" ]
14+
15+
fs.mounts = [
16+
{ path = "/candle_quantized", uri = "file:candle_quantized" },
17+
{ path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
18+
{ path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" },
19+
20+
{ path = "/llama-2-7b.ggmlv3.q4_0.bin", uri = "file:llama-2-7b.ggmlv3.q4_0.bin" },
21+
{ path = "/tokenizer.json", uri = "file:tokenizer.json" },
22+
]
23+
24+
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
25+
sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '256' }}
26+
sgx.enclave_size = "32G"
27+
28+
sgx.trusted_files = [
29+
"file:candle_quantized",
30+
"file:{{ gramine.libos }}",
31+
"file:{{ gramine.runtimedir() }}/",
32+
"file:{{ arch_libdir }}/libcrypto.so.3",
33+
"file:{{ arch_libdir }}/libgcc_s.so.1",
34+
"file:{{ arch_libdir }}/libssl.so.3",
35+
36+
"file:llama-2-7b.ggmlv3.q4_0.bin",
37+
"file:tokenizer.json",
38+
]

0 commit comments

Comments
 (0)