Skip to content

Commit 466485e

Browse files
authored
Merge pull request #248 from transformerlab/fix/llama-cpp-install
Llama CPP Upgrade
2 parents 4b26ed0 + 84e910f commit 466485e

File tree

4 files changed

+32
-3
lines changed

4 files changed

+32
-3
lines changed

run.sh

+7
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,13 @@ else
6868
# echo "✅ Uvicorn is installed."
6969
fi
7070

71+
# Check if NVIDIA GPU is available and add necessary paths
72+
if command -v nvidia-smi &> /dev/null; then
73+
echo "✅ NVIDIA GPU detected, adding CUDA libraries to path"
74+
# Add common NVIDIA library paths
75+
export LD_LIBRARY_PATH=${ENV_DIR}/lib:$LD_LIBRARY_PATH
76+
fi
77+
7178
echo "▶️ Starting the API server:"
7279
if [ "$RELOAD" = true ]; then
7380
echo "🔁 Reload the server on file changes"

transformerlab/plugins/llama_cpp_server/index.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"description": "Runs llama-cpp-python server that can run GGUF models that work well on CPU only machines.",
55
"plugin-format": "python",
66
"type": "loader",
7-
"version": "0.1.6",
7+
"version": "0.1.8",
88
"model_architectures": ["GGUF"],
99
"supported_hardware_architectures": [
1010
"cpu",

transformerlab/plugins/llama_cpp_server/main.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
1212
Right now only generate_stream works -- need to do work to make generate work
1313
"""
14+
import torch
1415

1516
import argparse
1617
import asyncio
@@ -20,7 +21,6 @@
2021
from typing import List
2122

2223
import llama_cpp
23-
import torch
2424
import uvicorn
2525
from fastapi import BackgroundTasks, FastAPI, Request
2626
from fastapi.concurrency import run_in_threadpool

transformerlab/plugins/llama_cpp_server/setup.sh

+23-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,26 @@
44
# So we will install llama-cpp-python only and implement our
55
# own server using FastAPI
66

7-
uv pip install llama-cpp-python==0.2.79 --upgrade --force-reinstall --no-cache-dir
7+
echo "Setting up llama-cpp-python..."
8+
9+
# Detect OS
10+
if [[ "$(uname)" == "Darwin" ]]; then
11+
# macOS - check for Metal support
12+
if [[ "$(uname -m)" == "arm64" || "$(sysctl -n machdep.cpu.brand_string)" == *"Apple"* ]]; then
13+
echo "Detected Mac with Apple Silicon - installing with Metal support"
14+
CMAKE_ARGS="-DGGML_METAL=on" uv pip install llama-cpp-python --upgrade --force-reinstall --no-cache-dir
15+
else
16+
echo "Detected Mac with Intel CPU - installing with OpenBLAS support"
17+
CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" uv pip install llama-cpp-python --upgrade --force-reinstall --no-cache-dir
18+
fi
19+
elif command -v nvidia-smi &> /dev/null; then
20+
# Linux/Other with CUDA detected
21+
echo "CUDA GPU detected. Installing based on CUDA setup using GGML CUDA"
22+
CMAKE_ARGS="-DGGML_CUDA=on" FORCE_CMAKE=1 uv pip install llama-cpp-python --force-reinstall --no-cache-dir
23+
else
24+
# Linux/Other without CUDA - try using OpenBLAS
25+
echo "No GPU detected - installing with OpenBLAS support"
26+
CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" uv pip install llama-cpp-python --upgrade --no-cache-dir
27+
fi
28+
29+
echo "llama-cpp-python installation complete."

0 commit comments

Comments
 (0)