utilityai
diff --git a/‎Cargo.lock
Lines changed: 3 additions & 2 deletions b/‎Cargo.lock
Lines changed: 3 additions & 2 deletions
diff --git a/‎README.md
Lines changed: 1 addition & 1 deletion b/‎README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎embeddings/src/main.rs
Lines changed: 5 additions & 5 deletions b/‎embeddings/src/main.rs
Lines changed: 5 additions & 5 deletions
diff --git a/‎llama-cpp-2/Cargo.toml
Lines changed: 2 additions & 2 deletions b/‎llama-cpp-2/Cargo.toml
Lines changed: 2 additions & 2 deletions
diff --git a/‎llama-cpp-2/src/lib.rs
Lines changed: 1 addition & 1 deletion b/‎llama-cpp-2/src/lib.rs
Lines changed: 1 addition & 1 deletion
diff --git a/‎llama-cpp-sys-2/Cargo.toml
Lines changed: 2 additions & 1 deletion b/‎llama-cpp-sys-2/Cargo.toml
Lines changed: 2 additions & 1 deletion
diff --git a/‎llama-cpp-sys-2/README.md
Lines changed: 2 additions & 2 deletions b/‎llama-cpp-sys-2/README.md
Lines changed: 2 additions & 2 deletions
@@ -35,7 +35,7 @@ git clone --recursive https://github.com/utilityai/llama-cpp-rs
 cd llama-cpp-rs
 ```
 
-Run the simple example (add `--featues cublas` if you have a cuda gpu)
+Run the simple example (add `--featues cuda` if you have a cuda gpu)
 
 ```bash
 cargo run --release --bin simple "The way to kill a linux process is" hf-model TheBloke/Llama-2-7B-GGUF llama-2-7b.Q4_K_M.gguf
 
@@ -20,8 +20,8 @@ use llama_cpp_2::ggml_time_us;
 use llama_cpp_2::llama_backend::LlamaBackend;
 use llama_cpp_2::llama_batch::LlamaBatch;
 use llama_cpp_2::model::params::LlamaModelParams;
-use llama_cpp_2::model::{AddBos, Special};
 use llama_cpp_2::model::LlamaModel;
+use llama_cpp_2::model::{AddBos, Special};
 
 #[derive(clap::Parser, Debug, Clone)]
 struct Args {
@@ -35,7 +35,7 @@ struct Args {
     #[clap(short)]
     normalise: bool,
     /// Disable offloading layers to the gpu
-    #[cfg(feature = "cublas")]
+    #[cfg(feature = "cuda")]
     #[clap(long)]
     disable_gpu: bool,
 }
@@ -78,7 +78,7 @@ fn main() -> Result<()> {
         model,
         prompt,
         normalise,
-        #[cfg(feature = "cublas")]
+        #[cfg(feature = "cuda")]
         disable_gpu,
     } = Args::parse();
 
@@ -87,13 +87,13 @@ fn main() -> Result<()> {
 
     // offload all layers to the gpu
     let model_params = {
-        #[cfg(feature = "cublas")]
+        #[cfg(feature = "cuda")]
         if !disable_gpu {
             LlamaModelParams::default().with_n_gpu_layers(1000)
         } else {
             LlamaModelParams::default()
         }
-        #[cfg(not(feature = "cublas"))]
+        #[cfg(not(feature = "cuda"))]
         LlamaModelParams::default()
     };
 
 
@@ -14,7 +14,7 @@ thiserror = { workspace = true }
 tracing = { workspace = true }
 
 [features]
-cublas = ["llama-cpp-sys-2/cublas"]
+cuda = ["llama-cpp-sys-2/cuda"]
 metal = ["llama-cpp-sys-2/metal"]
 sampler = []
 
@@ -25,4 +25,4 @@ llama-cpp-sys-2 = { path = "../llama-cpp-sys-2", features=["metal"], version = "
 workspace = true
 
 [package.metadata.docs.rs]
-features = ["sampler"]
+features = ["sampler"]
@@ -11,7 +11,7 @@
 //!
 //! # Feature Flags
 //!
-//! - `cublas` enables CUDA gpu support.
+//! - `cuda` enables CUDA gpu support.
 //! - `sampler` adds the [`context::sample::sampler`] struct for a more rusty way of sampling.
 use std::ffi::NulError;
 use std::fmt::Debug;
 
@@ -48,8 +48,9 @@ include = [
 [build-dependencies]
 bindgen = { workspace = true }
 cc = { workspace = true, features = ["parallel"] }
+once_cell = "1.19.0"
 
 [features]
-cublas = []
+cuda = []
 metal = []
 
@@ -1,5 +1,5 @@
 # llama-cpp-sys
 
-Raw bindings to llama.cpp with cublas support.
+Raw bindings to llama.cpp with cuda support.
 
-See [llama-cpp-2](https://crates.io/crates/llama-cpp-2) for a safe API.
+See [llama-cpp-2](https://crates.io/crates/llama-cpp-2) for a safe API.