llvm: Use Numpy ndpointer as default for pointer/array arguments (#3028)

Remove support for compiled multi-context execution. Do not store an explicit reference to ctype binary structures after converting to Numpy. Consolidate shared code between CPU and GPU execution. Use Context instead of execution id when constructing Composition execution.
PrincetonUniversity · Aug 6, 2024 · 6b899b4 · 6b899b4
2 parents e93b787 + 45d8ccd
commit 6b899b4
Show file tree

Hide file tree

Showing 13 changed files with 203 additions and 674 deletions.
diff --git a/psyneulink/core/components/functions/nonstateful/optimizationfunctions.py b/psyneulink/core/components/functions/nonstateful/optimizationfunctions.py
@@ -831,7 +831,7 @@ def _is_static(it:SampleIterator):
         num_evals = np.prod([d._num for d in self.search_space])
 
         # Map allocations to values
-        comp_exec = pnlvm.execution.CompExecution(ocm.agent_rep, [context.execution_id])
+        comp_exec = pnlvm.execution.CompExecution(ocm.agent_rep, context)
         execution_mode = ocm.parameters.comp_execution_mode._get(context)
         if execution_mode == "PTX":
             outcomes = comp_exec.cuda_evaluate(inputs, num_inputs_sets, num_evals, get_results)
@@ -2103,7 +2103,7 @@ def _function(self,
                 # select_min params are:
                 # params, state, min_sample_ptr, sample_ptr, min_value_ptr, value_ptr, opt_count_ptr, count
                 min_tags = frozenset({"select_min", "evaluate_type_objective"})
-                bin_func = pnlvm.LLVMBinaryFunction.from_obj(self, tags=min_tags, numpy_args=(2, 4, 6))
+                bin_func = pnlvm.LLVMBinaryFunction.from_obj(self, tags=min_tags, ctype_ptr_args=(0, 1, 3, 5))
 
                 ct_param = bin_func.byref_arg_types[0](*self._get_param_initializer(context))
                 ct_state = bin_func.byref_arg_types[1](*self._get_state_initializer(context))

diff --git a/psyneulink/core/llvm/__init__.py b/psyneulink/core/llvm/__init__.py
@@ -123,7 +123,7 @@ def _llvm_build(target_generation=_binary_generation + 1):
 
 
 class LLVMBinaryFunction:
-    def __init__(self, name: str, *, numpy_args=()):
+    def __init__(self, name: str, *, ctype_ptr_args=()):
         self.name = name
 
         self.__c_func = None
@@ -143,16 +143,18 @@ def __init__(self, name: str, *, numpy_args=()):
         # Create ctype function instance
         start = time.perf_counter()
         return_type = _convert_llvm_ir_to_ctype(f.return_value.type)
+
+        self.np_arg_dtypes = [_convert_llvm_ir_to_dtype(getattr(a.type, "pointee", a.type)) for a in f.args]
+
         args = [_convert_llvm_ir_to_ctype(a.type) for a in f.args]
 
         # '_type_' special attribute stores pointee type for pointers
         # https://docs.python.org/3/library/ctypes.html#ctypes._Pointer._type_
         self.byref_arg_types = [a._type_ if hasattr(a, "contents") else None for a in args]
-        self.np_params = [_convert_llvm_ir_to_dtype(getattr(a.type, "pointee", a.type)) for a in f.args]
 
-        for a in numpy_args:
-            assert self.byref_arg_types[a] is not None
-            args[a] = np.ctypeslib.ndpointer(dtype=self.np_params[a].base, shape=self.np_params[a].shape)
+        for i, arg in enumerate(self.np_arg_dtypes):
+            if i not in ctype_ptr_args and self.byref_arg_types[i] is not None:
+                args[i] = np.ctypeslib.ndpointer(dtype=arg.base, shape=arg.shape)
 
         middle = time.perf_counter()
         self.__c_func_type = ctypes.CFUNCTYPE(return_type, *args)
@@ -176,11 +178,6 @@ def c_func(self):
     def __call__(self, *args, **kwargs):
         return self.c_func(*args, **kwargs)
 
-    def wrap_call(self, *pargs):
-        cpargs = (ctypes.byref(p) if p is not None else None for p in pargs)
-        args = zip(cpargs, self.c_func.argtypes)
-        self(*(ctypes.cast(p, t) for p, t in args))
-
     @property
     def _cuda_kernel(self):
         if self.__cuda_kernel is None:
@@ -228,32 +225,22 @@ def cuda_wrap_call(self, *args, **kwargs):
 
     def np_buffer_for_arg(self, arg_num, *, extra_dimensions=(), fill_value=np.nan):
 
-        out_base = self.np_params[arg_num].base
-        out_shape = extra_dimensions + self.np_params[arg_num].shape
+        out_base = self.np_arg_dtypes[arg_num].base
+        out_shape = extra_dimensions + self.np_arg_dtypes[arg_num].shape
 
         # fill the buffer with NaN poison
         return np.full(out_shape, fill_value, dtype=out_base)
 
     @staticmethod
     @functools.lru_cache(maxsize=32)
-    def from_obj(obj, *, tags:frozenset=frozenset(), numpy_args:tuple=()):
+    def from_obj(obj, *, tags:frozenset=frozenset(), ctype_ptr_args:tuple=()):
         name = LLVMBuilderContext.get_current().gen_llvm_function(obj, tags=tags).name
-        return LLVMBinaryFunction.get(name, numpy_args=numpy_args)
+        return LLVMBinaryFunction.get(name, ctype_ptr_args=ctype_ptr_args)
 
     @staticmethod
     @functools.lru_cache(maxsize=32)
-    def get(name: str, *, numpy_args:tuple=()):
-        return LLVMBinaryFunction(name, numpy_args=numpy_args)
-
-    def get_multi_run(self, *, numpy_args=()):
-        try:
-            multirun_llvm = _find_llvm_function(self.name + "_multirun")
-        except ValueError:
-            function = _find_llvm_function(self.name)
-            with LLVMBuilderContext.get_current() as ctx:
-                multirun_llvm = codegen.gen_multirun_wrapper(ctx, function)
-
-        return LLVMBinaryFunction.get(multirun_llvm.name, numpy_args=numpy_args)
+    def get(name: str, *, ctype_ptr_args:tuple=()):
+        return LLVMBinaryFunction(name, ctype_ptr_args=ctype_ptr_args)
 
 
 _cpu_engine = None

diff --git a/psyneulink/core/llvm/codegen.py b/psyneulink/core/llvm/codegen.py
@@ -1119,56 +1119,6 @@ def gen_composition_run(ctx, composition, *, tags:frozenset):
     return llvm_func
 
 
-def gen_multirun_wrapper(ctx, function: ir.Function) -> ir.Function:
-    if function.module is not ctx.module:
-        function = ir.Function(ctx.module, function.type.pointee, function.name)
-        assert function.is_declaration
-
-    args = [a.type for a in function.args]
-    args.append(ctx.int32_ty.as_pointer())
-    multirun_ty = ir.FunctionType(function.type.pointee.return_type, args)
-    multirun_f = ir.Function(ctx.module, multirun_ty, function.name + "_multirun")
-    block = multirun_f.append_basic_block(name="entry")
-    builder = ir.IRBuilder(block)
-
-    multi_runs = builder.load(multirun_f.args[-1])
-    # Runs need special handling. data_in and data_out are one dimensional,
-    # but hold entries for all parallel invocations.
-    is_comp_run = len(function.args) == 7
-    if is_comp_run:
-        trials_count = builder.load(multirun_f.args[5])
-        input_count = builder.load(multirun_f.args[6])
-
-    with helpers.for_loop_zero_inc(builder, multi_runs, "multi_run_loop") as (b, index):
-        # Index all pointer arguments
-        indexed_args = []
-        for i, arg in enumerate(multirun_f.args[:-1]):
-            # Don't adjust #inputs and #trials
-            if isinstance(arg.type, ir.PointerType):
-                offset = index
-                # #runs and #trials needs to be the same for every invocation
-                if is_comp_run and i >= 5:
-                    offset = ctx.int32_ty(0)
-                    # Reset trial count for every invocation.
-                    # Previous runs might have finished earlier
-                    if i == 5:
-                        builder.store(trials_count, arg)
-                # data arrays need special handling
-                elif is_comp_run and i == 4:  # data_out
-                    offset = b.mul(index, trials_count)
-                elif is_comp_run and i == 3:  # data_in
-                    offset = b.mul(index, input_count)
-
-                arg = b.gep(arg, [offset])
-
-            indexed_args.append(arg)
-
-        b.call(function, indexed_args)
-
-    builder.ret_void()
-    return multirun_f
-
-
 def gen_autodiffcomp_exec(ctx, composition, *, tags:frozenset):
     """Creates llvm bin execute for autodiffcomp"""
     assert composition.controller is None