Skip to content

Commit

Permalink
llvm: Use Numpy ndpointer as default for pointer/array arguments (#3028)
Browse files Browse the repository at this point in the history
Remove support for compiled multi-context execution.
Do not store an explicit reference to ctype binary structures after converting to Numpy.
Consolidate shared code between CPU and GPU execution.
Use Context instead of execution id when constructing Composition execution.
  • Loading branch information
jvesely authored Aug 6, 2024
2 parents e93b787 + 45d8ccd commit 6b899b4
Show file tree
Hide file tree
Showing 13 changed files with 203 additions and 674 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -831,7 +831,7 @@ def _is_static(it:SampleIterator):
num_evals = np.prod([d._num for d in self.search_space])

# Map allocations to values
comp_exec = pnlvm.execution.CompExecution(ocm.agent_rep, [context.execution_id])
comp_exec = pnlvm.execution.CompExecution(ocm.agent_rep, context)
execution_mode = ocm.parameters.comp_execution_mode._get(context)
if execution_mode == "PTX":
outcomes = comp_exec.cuda_evaluate(inputs, num_inputs_sets, num_evals, get_results)
Expand Down Expand Up @@ -2103,7 +2103,7 @@ def _function(self,
# select_min params are:
# params, state, min_sample_ptr, sample_ptr, min_value_ptr, value_ptr, opt_count_ptr, count
min_tags = frozenset({"select_min", "evaluate_type_objective"})
bin_func = pnlvm.LLVMBinaryFunction.from_obj(self, tags=min_tags, numpy_args=(2, 4, 6))
bin_func = pnlvm.LLVMBinaryFunction.from_obj(self, tags=min_tags, ctype_ptr_args=(0, 1, 3, 5))

ct_param = bin_func.byref_arg_types[0](*self._get_param_initializer(context))
ct_state = bin_func.byref_arg_types[1](*self._get_state_initializer(context))
Expand Down
39 changes: 13 additions & 26 deletions psyneulink/core/llvm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _llvm_build(target_generation=_binary_generation + 1):


class LLVMBinaryFunction:
def __init__(self, name: str, *, numpy_args=()):
def __init__(self, name: str, *, ctype_ptr_args=()):
self.name = name

self.__c_func = None
Expand All @@ -143,16 +143,18 @@ def __init__(self, name: str, *, numpy_args=()):
# Create ctype function instance
start = time.perf_counter()
return_type = _convert_llvm_ir_to_ctype(f.return_value.type)

self.np_arg_dtypes = [_convert_llvm_ir_to_dtype(getattr(a.type, "pointee", a.type)) for a in f.args]

args = [_convert_llvm_ir_to_ctype(a.type) for a in f.args]

# '_type_' special attribute stores pointee type for pointers
# https://docs.python.org/3/library/ctypes.html#ctypes._Pointer._type_
self.byref_arg_types = [a._type_ if hasattr(a, "contents") else None for a in args]
self.np_params = [_convert_llvm_ir_to_dtype(getattr(a.type, "pointee", a.type)) for a in f.args]

for a in numpy_args:
assert self.byref_arg_types[a] is not None
args[a] = np.ctypeslib.ndpointer(dtype=self.np_params[a].base, shape=self.np_params[a].shape)
for i, arg in enumerate(self.np_arg_dtypes):
if i not in ctype_ptr_args and self.byref_arg_types[i] is not None:
args[i] = np.ctypeslib.ndpointer(dtype=arg.base, shape=arg.shape)

middle = time.perf_counter()
self.__c_func_type = ctypes.CFUNCTYPE(return_type, *args)
Expand All @@ -176,11 +178,6 @@ def c_func(self):
def __call__(self, *args, **kwargs):
return self.c_func(*args, **kwargs)

def wrap_call(self, *pargs):
cpargs = (ctypes.byref(p) if p is not None else None for p in pargs)
args = zip(cpargs, self.c_func.argtypes)
self(*(ctypes.cast(p, t) for p, t in args))

@property
def _cuda_kernel(self):
if self.__cuda_kernel is None:
Expand Down Expand Up @@ -228,32 +225,22 @@ def cuda_wrap_call(self, *args, **kwargs):

def np_buffer_for_arg(self, arg_num, *, extra_dimensions=(), fill_value=np.nan):

out_base = self.np_params[arg_num].base
out_shape = extra_dimensions + self.np_params[arg_num].shape
out_base = self.np_arg_dtypes[arg_num].base
out_shape = extra_dimensions + self.np_arg_dtypes[arg_num].shape

# fill the buffer with NaN poison
return np.full(out_shape, fill_value, dtype=out_base)

@staticmethod
@functools.lru_cache(maxsize=32)
def from_obj(obj, *, tags:frozenset=frozenset(), numpy_args:tuple=()):
def from_obj(obj, *, tags:frozenset=frozenset(), ctype_ptr_args:tuple=()):
name = LLVMBuilderContext.get_current().gen_llvm_function(obj, tags=tags).name
return LLVMBinaryFunction.get(name, numpy_args=numpy_args)
return LLVMBinaryFunction.get(name, ctype_ptr_args=ctype_ptr_args)

@staticmethod
@functools.lru_cache(maxsize=32)
def get(name: str, *, numpy_args:tuple=()):
return LLVMBinaryFunction(name, numpy_args=numpy_args)

def get_multi_run(self, *, numpy_args=()):
try:
multirun_llvm = _find_llvm_function(self.name + "_multirun")
except ValueError:
function = _find_llvm_function(self.name)
with LLVMBuilderContext.get_current() as ctx:
multirun_llvm = codegen.gen_multirun_wrapper(ctx, function)

return LLVMBinaryFunction.get(multirun_llvm.name, numpy_args=numpy_args)
def get(name: str, *, ctype_ptr_args:tuple=()):
return LLVMBinaryFunction(name, ctype_ptr_args=ctype_ptr_args)


_cpu_engine = None
Expand Down
50 changes: 0 additions & 50 deletions psyneulink/core/llvm/codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1119,56 +1119,6 @@ def gen_composition_run(ctx, composition, *, tags:frozenset):
return llvm_func


def gen_multirun_wrapper(ctx, function: ir.Function) -> ir.Function:
if function.module is not ctx.module:
function = ir.Function(ctx.module, function.type.pointee, function.name)
assert function.is_declaration

args = [a.type for a in function.args]
args.append(ctx.int32_ty.as_pointer())
multirun_ty = ir.FunctionType(function.type.pointee.return_type, args)
multirun_f = ir.Function(ctx.module, multirun_ty, function.name + "_multirun")
block = multirun_f.append_basic_block(name="entry")
builder = ir.IRBuilder(block)

multi_runs = builder.load(multirun_f.args[-1])
# Runs need special handling. data_in and data_out are one dimensional,
# but hold entries for all parallel invocations.
is_comp_run = len(function.args) == 7
if is_comp_run:
trials_count = builder.load(multirun_f.args[5])
input_count = builder.load(multirun_f.args[6])

with helpers.for_loop_zero_inc(builder, multi_runs, "multi_run_loop") as (b, index):
# Index all pointer arguments
indexed_args = []
for i, arg in enumerate(multirun_f.args[:-1]):
# Don't adjust #inputs and #trials
if isinstance(arg.type, ir.PointerType):
offset = index
# #runs and #trials needs to be the same for every invocation
if is_comp_run and i >= 5:
offset = ctx.int32_ty(0)
# Reset trial count for every invocation.
# Previous runs might have finished earlier
if i == 5:
builder.store(trials_count, arg)
# data arrays need special handling
elif is_comp_run and i == 4: # data_out
offset = b.mul(index, trials_count)
elif is_comp_run and i == 3: # data_in
offset = b.mul(index, input_count)

arg = b.gep(arg, [offset])

indexed_args.append(arg)

b.call(function, indexed_args)

builder.ret_void()
return multirun_f


def gen_autodiffcomp_exec(ctx, composition, *, tags:frozenset):
"""Creates llvm bin execute for autodiffcomp"""
assert composition.controller is None
Expand Down
Loading

0 comments on commit 6b899b4

Please sign in to comment.