Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -148,3 +148,6 @@ poetry.toml
/run-vim.sh
/run-chat.sh
.ccache/

# emscripten
a.out.*
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not just a.out* ?

6 changes: 5 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,11 @@ option(LLAMA_USE_SYSTEM_GGML "Use system libggml" OFF)
if (EMSCRIPTEN)
set(BUILD_SHARED_LIBS_DEFAULT OFF)

option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON)
option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" OFF)
option(LLAMA_BUILD_HTML "llama: build HTML file" ON)
if (LLAMA_BUILD_HTML)
set(CMAKE_EXECUTABLE_SUFFIX ".html")
endif()
else()
if (MINGW)
set(BUILD_SHARED_LIBS_DEFAULT OFF)
Expand Down
2 changes: 2 additions & 0 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -871,6 +871,8 @@ std::string fs_get_cache_directory() {
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
#elif defined(_WIN32)
cache_directory = std::getenv("LOCALAPPDATA");
#elif defined(__EMSCRIPTEN__)
GGML_ABORT("not implemented on this platform");
#else
# error Unknown architecture
#endif
Expand Down
1 change: 1 addition & 0 deletions ggml/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation"
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
option(GGML_WEBGPU "ggml: use WebGPU" OFF)
option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF)
option(GGML_WEBGPU_JSPI "ggml: use JSPI for WebGPU" ON)
option(GGML_ZDNN "ggml: use zDNN" OFF)
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF)
Expand Down
24 changes: 22 additions & 2 deletions ggml/src/ggml-webgpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,35 @@ add_dependencies(ggml-webgpu generate_shaders)
if(EMSCRIPTEN)
set(EMDAWNWEBGPU_DIR "" CACHE PATH "Path to emdawnwebgpu_pkg")

target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
target_link_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
if(NOT EMDAWNWEBGPU_DIR)
# default built-in port
target_compile_options(ggml-webgpu PRIVATE "--use-port=emdawnwebgpu")
target_link_options(ggml-webgpu INTERFACE "--use-port=emdawnwebgpu")
else()
# custom port
target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
target_link_options(ggml-webgpu INTERFACE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py")
endif()

if (GGML_WEBGPU_JSPI)
target_compile_options(ggml-webgpu PRIVATE "-fwasm-exceptions")
target_link_options(ggml-webgpu INTERFACE "-sJSPI" "-fwasm-exceptions")
else()
target_compile_options(ggml-webgpu PRIVATE "-fexceptions")
target_link_options(ggml-webgpu INTERFACE "-sASYNCIFY" "-exceptions")
endif()

set(DawnWebGPU_TARGET webgpu_cpp)
else()
find_package(Dawn REQUIRED)
set(DawnWebGPU_TARGET dawn::webgpu_dawn)
endif()

if (GGML_WEBGPU_DEBUG)
target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1)
if(EMSCRIPTEN)
target_link_options(ggml-webgpu INTERFACE "-sASSERTIONS=2")
endif()
endif()

target_include_directories(ggml-webgpu PRIVATE ${SHADER_OUTPUT_DIR})
Expand Down
19 changes: 17 additions & 2 deletions ggml/src/ggml-webgpu/ggml-webgpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
#include "ggml-impl.h"
#include "ggml-wgsl-shaders.hpp"

#ifdef __EMSCRIPTEN__
#include <emscripten/emscripten.h>
#endif

#include <webgpu/webgpu_cpp.h>

#include <condition_variable>
Expand Down Expand Up @@ -1173,8 +1177,12 @@ static ggml_backend_dev_t ggml_backend_webgpu_reg_get_device(ggml_backend_reg_t
ctx->adapter.GetInfo(&info);

// Initialize device
std::vector<wgpu::FeatureName> required_features = { wgpu::FeatureName::ShaderF16,
wgpu::FeatureName::ImplicitDeviceSynchronization };
std::vector<wgpu::FeatureName> required_features = {
wgpu::FeatureName::ShaderF16,
#ifndef __EMSCRIPTEN__
wgpu::FeatureName::ImplicitDeviceSynchronization,
#endif
};
wgpu::DeviceDescriptor dev_desc;
dev_desc.requiredLimits = &ctx->limits;
dev_desc.requiredFeatures = required_features.data();
Expand Down Expand Up @@ -1287,6 +1295,13 @@ ggml_backend_reg_t ggml_backend_webgpu_reg() {
instance_descriptor.requiredFeatures = instance_features.data();
instance_descriptor.requiredFeatureCount = instance_features.size();
webgpu_ctx->instance = wgpu::CreateInstance(&instance_descriptor);

#ifdef __EMSCRIPTEN__
if (webgpu_ctx->instance == nullptr) {
GGML_LOG_ERROR("ggml_webgpu: Failed to create WebGPU instance. Make sure either -sASYNCIFY or -sJSPI is set\n");
return nullptr;
}
#endif
GGML_ASSERT(webgpu_ctx->instance != nullptr);

static ggml_backend_reg reg = {
Expand Down
110 changes: 110 additions & 0 deletions scripts/serve-static.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
const http = require('http');
const fs = require('fs').promises;
const path = require('path');

// This file is used for testing wasm build from emscripten
// Example build command:
// emcmake cmake -B build-wasm -DGGML_WEBGPU=ON -DLLAMA_CURL=OFF
// cmake --build build-wasm --target test-backend-ops -j

const PORT = 8080;
const STATIC_DIR = path.join(__dirname, '../build-wasm/bin');
console.log(`Serving static files from: ${STATIC_DIR}`);

const mimeTypes = {
'.html': 'text/html',
'.js': 'text/javascript',
'.css': 'text/css',
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.gif': 'image/gif',
'.svg': 'image/svg+xml',
'.json': 'application/json',
'.woff': 'font/woff',
'.woff2': 'font/woff2',
};

async function generateDirListing(dirPath, reqUrl) {
const files = await fs.readdir(dirPath);
let html = `
<!DOCTYPE html>
<html>
<head>
<title>Directory Listing</title>
<style>
body { font-family: Arial, sans-serif; padding: 20px; }
ul { list-style: none; padding: 0; }
li { margin: 5px 0; }
a { text-decoration: none; color: #0066cc; }
a:hover { text-decoration: underline; }
</style>
</head>
<body>
<h1>Directory: ${reqUrl}</h1>
<ul>
`;

if (reqUrl !== '/') {
html += `<li><a href="../">../ (Parent Directory)</a></li>`;
}

for (const file of files) {
const filePath = path.join(dirPath, file);
const stats = await fs.stat(filePath);
const link = encodeURIComponent(file) + (stats.isDirectory() ? '/' : '');
html += `<li><a href="${link}">${file}${stats.isDirectory() ? '/' : ''}</a></li>`;
}

html += `
</ul>
</body>
</html>
`;
return html;
}

const server = http.createServer(async (req, res) => {
try {
// Set COOP and COEP headers
res.setHeader('Cross-Origin-Opener-Policy', 'same-origin');
res.setHeader('Cross-Origin-Embedder-Policy', 'require-corp');
res.setHeader('Cache-Control', 'no-store, no-cache, must-revalidate, proxy-revalidate');
res.setHeader('Pragma', 'no-cache');
res.setHeader('Expires', '0');

const filePath = path.join(STATIC_DIR, decodeURIComponent(req.url));
const stats = await fs.stat(filePath);

if (stats.isDirectory()) {
const indexPath = path.join(filePath, 'index.html');
try {
const indexData = await fs.readFile(indexPath);
res.writeHeader(200, { 'Content-Type': 'text/html' });
res.end(indexData);
} catch {
// No index.html, generate directory listing
const dirListing = await generateDirListing(filePath, req.url);
res.writeHeader(200, { 'Content-Type': 'text/html' });
res.end(dirListing);
}
} else {
const ext = path.extname(filePath).toLowerCase();
const contentType = mimeTypes[ext] || 'application/octet-stream';
const data = await fs.readFile(filePath);
res.writeHeader(200, { 'Content-Type': contentType });
res.end(data);
}
} catch (err) {
if (err.code === 'ENOENT') {
res.writeHeader(404, { 'Content-Type': 'text/plain' });
res.end('404 Not Found');
} else {
res.writeHeader(500, { 'Content-Type': 'text/plain' });
res.end('500 Internal Server Error');
}
}
});

server.listen(PORT, () => {
console.log(`Server running at http://localhost:${PORT}/`);
});
38 changes: 26 additions & 12 deletions tests/test-backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <ggml-alloc.h>
#include <ggml-backend.h>
#include <ggml-cpp.h>
#include <ggml-cpu.h>

#include <algorithm>
#include <array>
Expand All @@ -40,12 +41,18 @@
#include <thread>
#include <vector>

#ifdef __EMSCRIPTEN__
# define N_THREADS 1
#else
# define N_THREADS std::thread::hardware_concurrency()
#endif

static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float max = 1.0f) {
size_t nels = ggml_nelements(tensor);
std::vector<float> data(nels);
{
// parallel initialization
static const size_t n_threads = std::thread::hardware_concurrency();
static const size_t n_threads = N_THREADS;
// static RNG initialization (revisit if n_threads stops being constant)
static std::vector<std::default_random_engine> generators = []() {
std::random_device rd;
Expand All @@ -64,15 +71,19 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
}
};

std::vector<std::future<void>> tasks;
tasks.reserve(n_threads);
for (size_t i = 0; i < n_threads; i++) {
size_t start = i*nels/n_threads;
size_t end = (i+1)*nels/n_threads;
tasks.push_back(std::async(std::launch::async, init_thread, i, start, end));
}
for (auto & t : tasks) {
t.get();
if (n_threads == 1) {
init_thread(0, 0, nels);
} else {
std::vector<std::future<void>> tasks;
tasks.reserve(n_threads);
for (size_t i = 0; i < n_threads; i++) {
size_t start = i*nels/n_threads;
size_t end = (i+1)*nels/n_threads;
tasks.push_back(std::async(std::launch::async, init_thread, i, start, end));
}
for (auto & t : tasks) {
t.get();
}
}
}

Expand Down Expand Up @@ -104,7 +115,7 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
};

const size_t min_blocks_per_thread = 1;
const size_t n_threads = std::min<size_t>(std::thread::hardware_concurrency()/2,
const size_t n_threads = std::min<size_t>(N_THREADS/2,
std::max<size_t>(1, n_blocks / min_blocks_per_thread));
std::vector<std::future<void>> tasks;
tasks.reserve(n_threads);
Expand Down Expand Up @@ -6694,6 +6705,9 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
return false;
}

// TODO: find a better way to set the number of threads for the CPU backend
ggml_backend_cpu_set_n_threads(backend_cpu, N_THREADS);

size_t n_ok = 0;
for (auto & test : test_cases) {
if (test->eval(backend, backend_cpu, op_names_filter, output_printer)) {
Expand Down Expand Up @@ -6934,7 +6948,7 @@ int main(int argc, char ** argv) {
auto ggml_backend_set_n_threads_fn = (ggml_backend_set_n_threads_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_set_n_threads");
if (ggml_backend_set_n_threads_fn) {
// TODO: better value for n_threads
ggml_backend_set_n_threads_fn(backend, std::thread::hardware_concurrency());
ggml_backend_set_n_threads_fn(backend, N_THREADS);
}

size_t free, total; // NOLINT
Expand Down
Loading