File tree 4 files changed +32
-3
lines changed
transformerlab/plugins/llama_cpp_server
4 files changed +32
-3
lines changed Original file line number Diff line number Diff line change 68
68
# echo "✅ Uvicorn is installed."
69
69
fi
70
70
71
+ # Check if NVIDIA GPU is available and add necessary paths
72
+ if command -v nvidia-smi & > /dev/null; then
73
+ echo " ✅ NVIDIA GPU detected, adding CUDA libraries to path"
74
+ # Add common NVIDIA library paths
75
+ export LD_LIBRARY_PATH=${ENV_DIR} /lib:$LD_LIBRARY_PATH
76
+ fi
77
+
71
78
echo " ▶️ Starting the API server:"
72
79
if [ " $RELOAD " = true ]; then
73
80
echo " 🔁 Reload the server on file changes"
Original file line number Diff line number Diff line change 4
4
"description" : " Runs llama-cpp-python server that can run GGUF models that work well on CPU only machines." ,
5
5
"plugin-format" : " python" ,
6
6
"type" : " loader" ,
7
- "version" : " 0.1.6 " ,
7
+ "version" : " 0.1.8 " ,
8
8
"model_architectures" : [" GGUF" ],
9
9
"supported_hardware_architectures" : [
10
10
" cpu" ,
Original file line number Diff line number Diff line change 11
11
12
12
Right now only generate_stream works -- need to do work to make generate work
13
13
"""
14
+ import torch
14
15
15
16
import argparse
16
17
import asyncio
20
21
from typing import List
21
22
22
23
import llama_cpp
23
- import torch
24
24
import uvicorn
25
25
from fastapi import BackgroundTasks , FastAPI , Request
26
26
from fastapi .concurrency import run_in_threadpool
Original file line number Diff line number Diff line change 4
4
# So we will install llama-cpp-python only and implement our
5
5
# own server using FastAPI
6
6
7
- uv pip install llama-cpp-python==0.2.79 --upgrade --force-reinstall --no-cache-dir
7
+ echo " Setting up llama-cpp-python..."
8
+
9
+ # Detect OS
10
+ if [[ " $( uname) " == " Darwin" ]]; then
11
+ # macOS - check for Metal support
12
+ if [[ " $( uname -m) " == " arm64" || " $( sysctl -n machdep.cpu.brand_string) " == * " Apple" * ]]; then
13
+ echo " Detected Mac with Apple Silicon - installing with Metal support"
14
+ CMAKE_ARGS=" -DGGML_METAL=on" uv pip install llama-cpp-python --upgrade --force-reinstall --no-cache-dir
15
+ else
16
+ echo " Detected Mac with Intel CPU - installing with OpenBLAS support"
17
+ CMAKE_ARGS=" -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" uv pip install llama-cpp-python --upgrade --force-reinstall --no-cache-dir
18
+ fi
19
+ elif command -v nvidia-smi & > /dev/null; then
20
+ # Linux/Other with CUDA detected
21
+ echo " CUDA GPU detected. Installing based on CUDA setup using GGML CUDA"
22
+ CMAKE_ARGS=" -DGGML_CUDA=on" FORCE_CMAKE=1 uv pip install llama-cpp-python --force-reinstall --no-cache-dir
23
+ else
24
+ # Linux/Other without CUDA - try using OpenBLAS
25
+ echo " No GPU detected - installing with OpenBLAS support"
26
+ CMAKE_ARGS=" -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" uv pip install llama-cpp-python --upgrade --no-cache-dir
27
+ fi
28
+
29
+ echo " llama-cpp-python installation complete."
You can’t perform that action at this time.
0 commit comments