Skip to content

Commit c11d244

Browse files
author
Dmitrii Kuvaiskii
committed
Add Candle ML framework example
Candle is a minimalist ML framework for Rust with a focus on performance and ease of use. This commit adds two examples with Candle: simple matrix multiplication (to quickly test functionality) and Quantized LLaMA (to test performance). Signed-off-by: Dmitrii Kuvaiskii <[email protected]>
1 parent ceba8e9 commit c11d244

File tree

6 files changed

+202
-0
lines changed

6 files changed

+202
-0
lines changed

candle/.gitignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
/candle_matmul
2+
/candle_quantized
3+
/src
4+
5+
# model
6+
/*.bin
7+
/*.json

candle/Makefile

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# Copyright (C) 2024 Gramine contributors
2+
# SPDX-License-Identifier: BSD-3-Clause
3+
4+
ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine)
5+
6+
ifeq ($(DEBUG),1)
7+
GRAMINE_LOG_LEVEL = debug
8+
else
9+
GRAMINE_LOG_LEVEL = error
10+
endif
11+
12+
SRCDIR = src
13+
14+
.PHONY: all
15+
all: candle_matmul candle_matmul.manifest candle_quantized candle_quantized.manifest
16+
ifeq ($(SGX),1)
17+
all: candle_matmul.manifest.sgx candle_matmul.sig candle_quantized.manifest.sgx candle_quantized.sig
18+
endif
19+
20+
######################### Simple Matrix Multiplication #########################
21+
22+
$(SRCDIR)/candle_matmul/target/debug/candle_matmul:
23+
mkdir -p $(SRCDIR) && cd $(SRCDIR) && \
24+
cargo new candle_matmul && cd candle_matmul && \
25+
cargo add --git https://github.com/huggingface/candle.git candle-core && \
26+
cp ../../prepared_matmul_src/main.rs ./src/main.rs && \
27+
cargo build
28+
29+
candle_matmul: $(SRCDIR)/candle_matmul/target/debug/candle_matmul
30+
cp $< $@
31+
32+
candle_matmul.manifest: candle_matmul.manifest.template
33+
gramine-manifest \
34+
-Dlog_level=$(GRAMINE_LOG_LEVEL) \
35+
-Darch_libdir=$(ARCH_LIBDIR) \
36+
$< > $@
37+
38+
candle_matmul.manifest.sgx candle_matmul.sig: candle_matmul_sgx_sign
39+
@:
40+
41+
.INTERMEDIATE: candle_matmul_sgx_sign
42+
candle_matmul_sgx_sign: candle_matmul.manifest candle_matmul
43+
gramine-sgx-sign \
44+
--manifest $< \
45+
--output $<.sgx
46+
47+
############################## Quantized LLaMA #################################
48+
49+
llama-2-7b.ggmlv3.q4_0.bin:
50+
../common_tools/download --output $@ \
51+
--sha256 bfa26d855e44629c4cf919985e90bd7fa03b77eea1676791519e39a4d45fd4d5 \
52+
--url https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/$@
53+
54+
tokenizer.json:
55+
../common_tools/download --output $@ \
56+
--sha256 8eea70c4866c4f1320ba096fc986ac82038a8374dbe135212ba7628835b4a6f1 \
57+
--url https://huggingface.co/hf-internal-testing/llama-tokenizer/raw/main/$@
58+
59+
$(SRCDIR)/candle_quantized/target/release/examples/quantized: llama-2-7b.ggmlv3.q4_0.bin tokenizer.json
60+
mkdir -p $(SRCDIR) && cd $(SRCDIR) && \
61+
git clone https://github.com/huggingface/candle.git candle_quantized && \
62+
cd candle_quantized && \
63+
cargo build --example quantized --release
64+
65+
candle_quantized: $(SRCDIR)/candle_quantized/target/release/examples/quantized
66+
cp $< $@
67+
68+
candle_quantized.manifest: candle_quantized.manifest.template
69+
gramine-manifest \
70+
-Dlog_level=$(GRAMINE_LOG_LEVEL) \
71+
-Darch_libdir=$(ARCH_LIBDIR) \
72+
$< > $@
73+
74+
candle_quantized.manifest.sgx candle_quantized.sig: candle_quantized_sgx_sign
75+
@:
76+
77+
.INTERMEDIATE: candle_quantized_sgx_sign
78+
candle_quantized_sgx_sign: candle_quantized.manifest candle_quantized
79+
gramine-sgx-sign \
80+
--manifest $< \
81+
--output $<.sgx
82+
.PHONY: clean
83+
clean:
84+
$(RM) *.token *.sig *.manifest.sgx *.manifest candle_matmul candle_quantized
85+
86+
.PHONY: distclean
87+
distclean: clean
88+
$(RM) -r $(SRCDIR) *.tar.gz *.bin *.json

candle/README.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Candle
2+
3+
Candle is a minimalist ML framework for Rust with a focus on performance
4+
(including GPU support) and ease of use: https://github.com/huggingface/candle
5+
6+
This directory contains the Makefile and the template manifest for the most
7+
recent version of Candle as of this writing (v0.6.0).
8+
9+
# Warning
10+
11+
The `candle_quantized` app will download ~4GB of data (model + tokenizer). This
12+
happens automatically in the Makefile.
13+
14+
# Quick Start
15+
16+
```sh
17+
# build Candle (uses Rust Cargo) and the final manifest
18+
make SGX=1
19+
20+
# run simple matrix multiplication
21+
# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started
22+
./candle_matmul
23+
gramine-direct ./candle_matmul
24+
gramine-sgx ./candle_matmul
25+
26+
# run Quantized LLaMA (quantized version of the LLaMA model)
27+
# note that for Gramine, the cmdline args are already defined in the manifest file
28+
# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#check-out-our-examples
29+
RAYON_NUM_THREADS=36 ./candle_quantized \
30+
--model llama-2-7b.ggmlv3.q4_0.bin --tokenizer tokenizer.json --sample-len 200
31+
RAYON_NUM_THREADS=36 gramine-direct ./candle_quantized
32+
RAYON_NUM_THREADS=36 gramine-sgx ./candle_quantized
33+
```
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Copyright (C) 2024 Gramine contributors
2+
# SPDX-License-Identifier: BSD-3-Clause
3+
4+
libos.entrypoint = "/candle_matmul"
5+
6+
loader.log_level = "{{ log_level }}"
7+
8+
loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}"
9+
10+
fs.mounts = [
11+
{ path = "/candle_matmul", uri = "file:candle_matmul" },
12+
{ path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
13+
{ path = "{{ arch_libdir }}/libgcc_s.so.1", uri = "file:{{ arch_libdir }}/libgcc_s.so.1" },
14+
]
15+
16+
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
17+
sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '16' }}
18+
sgx.enclave_size = "1G"
19+
20+
sgx.trusted_files = [
21+
"file:candle_matmul",
22+
"file:{{ gramine.runtimedir() }}/",
23+
"file:{{ arch_libdir }}/libgcc_s.so.1",
24+
]
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright (C) 2024 Gramine contributors
2+
# SPDX-License-Identifier: BSD-3-Clause
3+
4+
libos.entrypoint = "/candle_quantized"
5+
6+
loader.log_level = "{{ log_level }}"
7+
8+
loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}"
9+
loader.env.RAYON_NUM_THREADS = { passthrough = true }
10+
11+
loader.argv = [ "candle_quantized", "--model", "llama-2-7b.ggmlv3.q4_0.bin",
12+
"--tokenizer", "tokenizer.json", "--sample-len", "200" ]
13+
14+
fs.mounts = [
15+
{ path = "/candle_quantized", uri = "file:candle_quantized" },
16+
{ path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
17+
{ path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" },
18+
19+
{ path = "/llama-2-7b.ggmlv3.q4_0.bin", uri = "file:llama-2-7b.ggmlv3.q4_0.bin" },
20+
{ path = "/tokenizer.json", uri = "file:tokenizer.json" },
21+
]
22+
23+
sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
24+
sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '256' }}
25+
sgx.enclave_size = "32G"
26+
27+
sgx.trusted_files = [
28+
"file:candle_quantized",
29+
"file:{{ gramine.runtimedir() }}/",
30+
"file:{{ arch_libdir }}/libcrypto.so.3",
31+
"file:{{ arch_libdir }}/libgcc_s.so.1",
32+
"file:{{ arch_libdir }}/libssl.so.3",
33+
34+
"file:llama-2-7b.ggmlv3.q4_0.bin",
35+
"file:tokenizer.json",
36+
]

candle/prepared_matmul_src/main.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started
2+
3+
use candle_core::{Device, Tensor};
4+
5+
fn main() -> Result<(), Box<dyn std::error::Error>> {
6+
let device = Device::Cpu;
7+
8+
let a = Tensor::randn(0f32, 1., (2, 3), &device)?;
9+
let b = Tensor::randn(0f32, 1., (3, 4), &device)?;
10+
11+
let c = a.matmul(&b)?;
12+
println!("{c}");
13+
Ok(())
14+
}

0 commit comments

Comments
 (0)