Add Candle ML framework example

Dmitrii Kuvaiskii · Dmitrii Kuvaiskii · commit c11d2444820f · 2024-07-25T23:06:39.000-07:00
Candle is a minimalist ML framework for Rust with a focus on performance
and ease of use. This commit adds two examples with Candle: simple
matrix multiplication (to quickly test functionality) and Quantized
LLaMA (to test performance).

Signed-off-by: Dmitrii Kuvaiskii &lt;dmitrii.kuvaiskii@intel.com&gt;
diff --git a/candle/.gitignore b/candle/.gitignore
@@ -0,0 +1,7 @@
+/candle_matmul
+/candle_quantized
+/src
+
+# model
+/*.bin
+/*.json
diff --git a/candle/Makefile b/candle/Makefile
@@ -0,0 +1,88 @@
+# Copyright (C) 2024 Gramine contributors
+# SPDX-License-Identifier: BSD-3-Clause
+
+ARCH_LIBDIR ?= /lib/$(shell $(CC) -dumpmachine)
+
+ifeq ($(DEBUG),1)
+GRAMINE_LOG_LEVEL = debug
+else
+GRAMINE_LOG_LEVEL = error
+endif
+
+SRCDIR = src
+
+.PHONY: all
+all: candle_matmul candle_matmul.manifest candle_quantized candle_quantized.manifest
+ifeq ($(SGX),1)
+all: candle_matmul.manifest.sgx candle_matmul.sig candle_quantized.manifest.sgx candle_quantized.sig
+endif
+
+######################### Simple Matrix Multiplication #########################
+
+$(SRCDIR)/candle_matmul/target/debug/candle_matmul:
+	mkdir -p $(SRCDIR) && cd $(SRCDIR) && \
+		cargo new candle_matmul && cd candle_matmul && \
+		cargo add --git https://github.com/huggingface/candle.git candle-core && \
+		cp ../../prepared_matmul_src/main.rs ./src/main.rs && \
+		cargo build
+
+candle_matmul: $(SRCDIR)/candle_matmul/target/debug/candle_matmul
+	cp $< $@
+
+candle_matmul.manifest: candle_matmul.manifest.template
+	gramine-manifest \
+		-Dlog_level=$(GRAMINE_LOG_LEVEL) \
+		-Darch_libdir=$(ARCH_LIBDIR) \
+		$< > $@
+
+candle_matmul.manifest.sgx candle_matmul.sig: candle_matmul_sgx_sign
+	@:
+
+.INTERMEDIATE: candle_matmul_sgx_sign
+candle_matmul_sgx_sign: candle_matmul.manifest candle_matmul
+	gramine-sgx-sign \
+		--manifest $< \
+		--output $<.sgx
+
+############################## Quantized LLaMA #################################
+
+llama-2-7b.ggmlv3.q4_0.bin:
+	../common_tools/download --output $@ \
+		--sha256 bfa26d855e44629c4cf919985e90bd7fa03b77eea1676791519e39a4d45fd4d5 \
+		--url https://huggingface.co/TheBloke/Llama-2-7B-GGML/resolve/main/$@
+
+tokenizer.json:
+	../common_tools/download --output $@ \
+		--sha256 8eea70c4866c4f1320ba096fc986ac82038a8374dbe135212ba7628835b4a6f1 \
+		--url https://huggingface.co/hf-internal-testing/llama-tokenizer/raw/main/$@
+
+$(SRCDIR)/candle_quantized/target/release/examples/quantized: llama-2-7b.ggmlv3.q4_0.bin tokenizer.json
+	mkdir -p $(SRCDIR) && cd $(SRCDIR) && \
+		git clone https://github.com/huggingface/candle.git candle_quantized && \
+		cd candle_quantized && \
+		cargo build --example quantized --release
+
+candle_quantized: $(SRCDIR)/candle_quantized/target/release/examples/quantized
+	cp $< $@
+
+candle_quantized.manifest: candle_quantized.manifest.template
+	gramine-manifest \
+		-Dlog_level=$(GRAMINE_LOG_LEVEL) \
+		-Darch_libdir=$(ARCH_LIBDIR) \
+		$< > $@
+
+candle_quantized.manifest.sgx candle_quantized.sig: candle_quantized_sgx_sign
+	@:
+
+.INTERMEDIATE: candle_quantized_sgx_sign
+candle_quantized_sgx_sign: candle_quantized.manifest candle_quantized
+	gramine-sgx-sign \
+		--manifest $< \
+		--output $<.sgx
+.PHONY: clean
+clean:
+	$(RM) *.token *.sig *.manifest.sgx *.manifest candle_matmul candle_quantized
+
+.PHONY: distclean
+distclean: clean
+	$(RM) -r $(SRCDIR) *.tar.gz *.bin *.json
diff --git a/candle/README.md b/candle/README.md
@@ -0,0 +1,33 @@
+# Candle
+
+Candle is a minimalist ML framework for Rust with a focus on performance
+(including GPU support) and ease of use: https://github.com/huggingface/candle
+
+This directory contains the Makefile and the template manifest for the most
+recent version of Candle as of this writing (v0.6.0).
+
+# Warning
+
+The `candle_quantized` app will download ~4GB of data (model + tokenizer). This
+happens automatically in the Makefile.
+
+# Quick Start
+
+```sh
+# build Candle (uses Rust Cargo) and the final manifest
+make SGX=1
+
+# run simple matrix multiplication
+# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started
+./candle_matmul
+gramine-direct ./candle_matmul
+gramine-sgx ./candle_matmul
+
+# run Quantized LLaMA (quantized version of the LLaMA model)
+# note that for Gramine, the cmdline args are already defined in the manifest file
+# example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#check-out-our-examples
+RAYON_NUM_THREADS=36 ./candle_quantized \
+    --model llama-2-7b.ggmlv3.q4_0.bin --tokenizer tokenizer.json --sample-len 200
+RAYON_NUM_THREADS=36 gramine-direct ./candle_quantized
+RAYON_NUM_THREADS=36 gramine-sgx ./candle_quantized
+```
diff --git a/candle/candle_matmul.manifest.template b/candle/candle_matmul.manifest.template
@@ -0,0 +1,24 @@
+# Copyright (C) 2024 Gramine contributors
+# SPDX-License-Identifier: BSD-3-Clause
+
+libos.entrypoint = "/candle_matmul"
+
+loader.log_level = "{{ log_level }}"
+
+loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}"
+
+fs.mounts = [
+  { path = "/candle_matmul", uri = "file:candle_matmul" },
+  { path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
+  { path = "{{ arch_libdir }}/libgcc_s.so.1", uri = "file:{{ arch_libdir }}/libgcc_s.so.1" },
+]
+
+sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
+sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '16' }}
+sgx.enclave_size = "1G"
+
+sgx.trusted_files = [
+  "file:candle_matmul",
+  "file:{{ gramine.runtimedir() }}/",
+  "file:{{ arch_libdir }}/libgcc_s.so.1",
+]
diff --git a/candle/candle_quantized.manifest.template b/candle/candle_quantized.manifest.template
@@ -0,0 +1,36 @@
+# Copyright (C) 2024 Gramine contributors
+# SPDX-License-Identifier: BSD-3-Clause
+
+libos.entrypoint = "/candle_quantized"
+
+loader.log_level = "{{ log_level }}"
+
+loader.env.LD_LIBRARY_PATH = "/lib:{{ arch_libdir }}"
+loader.env.RAYON_NUM_THREADS = { passthrough = true }
+
+loader.argv = [ "candle_quantized", "--model", "llama-2-7b.ggmlv3.q4_0.bin",
+                "--tokenizer", "tokenizer.json", "--sample-len", "200" ]
+
+fs.mounts = [
+  { path = "/candle_quantized", uri = "file:candle_quantized" },
+  { path = "/lib", uri = "file:{{ gramine.runtimedir() }}" },
+  { path = "{{ arch_libdir }}", uri = "file:{{ arch_libdir }}" },
+
+  { path = "/llama-2-7b.ggmlv3.q4_0.bin", uri = "file:llama-2-7b.ggmlv3.q4_0.bin" },
+  { path = "/tokenizer.json", uri = "file:tokenizer.json" },
+]
+
+sgx.edmm_enable = {{ 'true' if env.get('EDMM', '0') == '1' else 'false' }}
+sgx.max_threads = {{ '1' if env.get('EDMM', '0') == '1' else '256' }}
+sgx.enclave_size = "32G"
+
+sgx.trusted_files = [
+  "file:candle_quantized",
+  "file:{{ gramine.runtimedir() }}/",
+  "file:{{ arch_libdir }}/libcrypto.so.3",
+  "file:{{ arch_libdir }}/libgcc_s.so.1",
+  "file:{{ arch_libdir }}/libssl.so.3",
+
+  "file:llama-2-7b.ggmlv3.q4_0.bin",
+  "file:tokenizer.json",
+]
diff --git a/candle/prepared_matmul_src/main.rs b/candle/prepared_matmul_src/main.rs
@@ -0,0 +1,14 @@
+// example taken from https://github.com/huggingface/candle/tree/0.6.0?tab=readme-ov-file#get-started
+
+use candle_core::{Device, Tensor};
+
+fn main() -> Result<(), Box<dyn std::error::Error>> {
+    let device = Device::Cpu;
+
+    let a = Tensor::randn(0f32, 1., (2, 3), &device)?;
+    let b = Tensor::randn(0f32, 1., (3, 4), &device)?;
+
+    let c = a.matmul(&b)?;
+    println!("{c}");
+    Ok(())
+}

-Original file line number
+Diff line change
@@ @@ -0,0 +1,7 @@ @@
 +/candle_matmul
 +/candle_quantized
 +/src
++
 +# model
 +/*.bin
 +/*.json