Skip to content

Commit

Permalink
Support NCNN CPU inference (#1867)
Browse files Browse the repository at this point in the history
This is a useful option on systems with strong CPU and weak GPU, where
PyTorch is nontrivial to install (e.g. ppc64le systems).

Currently CPU inference is keyed on which NCNN package is
installed; a future PR could allow CPU inference even when the Vulkan
NCNN package is installed.

Co-authored-by: Jeremy Rand <[email protected]>
  • Loading branch information
JeremyRand and Jeremy Rand authored Jun 24, 2023
1 parent c975719 commit 6fd553e
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 33 deletions.
34 changes: 23 additions & 11 deletions backend/src/nodes/impl/ncnn/auto_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,15 @@
import gc

import numpy as np
from ncnn_vulkan import ncnn

try:
from ncnn_vulkan import ncnn

use_gpu = True
except ImportError:
from ncnn import ncnn

use_gpu = False
from sanic.log import logger

from ...utils.utils import get_h_w_c
Expand All @@ -22,9 +30,10 @@ def ncnn_auto_split(
) -> np.ndarray:
def upscale(img: np.ndarray, _):
ex = net.create_extractor()
ex.set_blob_vkallocator(blob_vkallocator)
ex.set_workspace_vkallocator(blob_vkallocator)
ex.set_staging_vkallocator(staging_vkallocator)
if use_gpu:
ex.set_blob_vkallocator(blob_vkallocator)
ex.set_workspace_vkallocator(blob_vkallocator)
ex.set_staging_vkallocator(staging_vkallocator)
# ex.set_light_mode(True)
try:
lr_c = get_h_w_c(img)[2]
Expand All @@ -49,17 +58,19 @@ def upscale(img: np.ndarray, _):
result = np.array(mat_out).transpose(1, 2, 0).astype(np.float32)
del ex, mat_in, mat_out
gc.collect()
# Clear VRAM
blob_vkallocator.clear()
staging_vkallocator.clear()
if use_gpu:
# Clear VRAM
blob_vkallocator.clear()
staging_vkallocator.clear()
return result
except Exception as e:
if "vkQueueSubmit" in str(e):
ex = None
del ex
gc.collect()
blob_vkallocator.clear()
staging_vkallocator.clear()
if use_gpu:
blob_vkallocator.clear()
staging_vkallocator.clear()
# TODO: Have someone running into this issue enable this and see if it fixes anything
# ncnn.destroy_gpu_instance()
raise RuntimeError(
Expand All @@ -72,8 +83,9 @@ def upscale(img: np.ndarray, _):
ex = None
del ex
gc.collect()
blob_vkallocator.clear()
staging_vkallocator.clear()
if use_gpu:
blob_vkallocator.clear()
staging_vkallocator.clear()
return Split()
else:
# Re-raise the exception if not an OOM error
Expand Down
32 changes: 26 additions & 6 deletions backend/src/nodes/impl/ncnn/session.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
from __future__ import annotations

import tempfile
from weakref import WeakKeyDictionary

from ncnn_vulkan import ncnn
try:
from ncnn_vulkan import ncnn

use_gpu = True
except ImportError:
from ncnn import ncnn

use_gpu = False

from ...utils.exec_options import ExecutionOptions
from .model import NcnnModelWrapper
Expand All @@ -22,13 +30,25 @@ def create_ncnn_net(
net.opt.use_fp16_storage = False
net.opt.use_fp16_arithmetic = False

# Use vulkan compute
net.opt.use_vulkan_compute = True
net.set_vulkan_device(exec_options.ncnn_gpu_index)
if use_gpu:
# Use vulkan compute
net.opt.use_vulkan_compute = True
net.set_vulkan_device(exec_options.ncnn_gpu_index)

# Load model param and bin
net.load_param_mem(model.model.write_param())
net.load_model_mem(model.model.bin)
if use_gpu:
net.load_param_mem(model.model.write_param())
net.load_model_mem(model.model.bin)
else:
with tempfile.TemporaryDirectory() as tmp_model_dir:
param_filename = tmp_model_dir + "/ncnn-model.param"
bin_filename = tmp_model_dir + "/ncnn-model.bin"

model.model.write_param(param_filename)
model.model.write_bin(bin_filename)

net.load_param(param_filename)
net.load_model(bin_filename)

return net

Expand Down
54 changes: 40 additions & 14 deletions backend/src/packages/chaiNNer_ncnn/ncnn/processing/upscale_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,15 @@

import cv2
import numpy as np
from ncnn_vulkan import ncnn

try:
from ncnn_vulkan import ncnn

use_gpu = True
except ImportError:
from ncnn import ncnn

use_gpu = False
from sanic.log import logger

from nodes.impl.ncnn.auto_split import ncnn_auto_split
Expand Down Expand Up @@ -61,26 +69,44 @@ def upscale_impl(
net = get_ncnn_net(model, exec_options)
# Try/except block to catch errors
try:
vkdev = ncnn.get_gpu_device(exec_options.ncnn_gpu_index)
if use_gpu:
vkdev = ncnn.get_gpu_device(exec_options.ncnn_gpu_index)

def estimate():
heap_budget = vkdev.get_heap_budget() * 1024 * 1024 * 0.8
return MaxTileSize(
estimate_tile_size(heap_budget, model.model.bin_length, img, 4)
)
def estimate_gpu():
heap_budget = vkdev.get_heap_budget() * 1024 * 1024 * 0.8
return MaxTileSize(
estimate_tile_size(heap_budget, model.model.bin_length, img, 4)
)

with ncnn_allocators(vkdev) as (
blob_vkallocator,
staging_vkallocator,
):
return ncnn_auto_split(
img,
net,
input_name=input_name,
output_name=output_name,
blob_vkallocator=blob_vkallocator,
staging_vkallocator=staging_vkallocator,
tiler=parse_tile_size_input(tile_size, estimate_gpu),
)
else:

def estimate_cpu():
# TODO: Improve tile size estimation in CPU mode.
raise ValueError(
"Tile size estimation not supported with NCNN CPU inference"
)

with ncnn_allocators(vkdev) as (
blob_vkallocator,
staging_vkallocator,
):
return ncnn_auto_split(
img,
net,
input_name=input_name,
output_name=output_name,
blob_vkallocator=blob_vkallocator,
staging_vkallocator=staging_vkallocator,
tiler=parse_tile_size_input(tile_size, estimate),
blob_vkallocator=None,
staging_vkallocator=None,
tiler=parse_tile_size_input(tile_size, estimate_cpu),
)
except (RuntimeError, ValueError):
raise
Expand Down
11 changes: 9 additions & 2 deletions backend/src/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,8 +361,15 @@ async def list_ncnn_gpus(_request: Request):
result.append(ncnn.get_gpu_info(i).device_name())
return json(result)
except Exception as exception:
logger.error(exception, exc_info=True)
return json([])
try:
from ncnn import ncnn

result = ["cpu"]
return json(result)
except Exception as exception2:
logger.error(exception, exc_info=True)
logger.error(exception2, exc_info=True)
return json([])


@app.route("/listgpus/nvidia", methods=["GET"])
Expand Down

0 comments on commit 6fd553e

Please sign in to comment.