Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions econml/tree/_criterion.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,11 @@ cdef class Criterion:
# Methods
cdef int init(self, const DOUBLE_t[:, ::1] y,
DOUBLE_t* sample_weight, double weighted_n_samples,
SIZE_t* samples) nogil except -1
cdef int node_reset(self, SIZE_t start, SIZE_t end) nogil except -1
cdef int reset(self) nogil except -1
cdef int reverse_reset(self) nogil except -1
cdef int update(self, SIZE_t new_pos) nogil except -1
SIZE_t* samples) except -1 nogil
cdef int node_reset(self, SIZE_t start, SIZE_t end) except -1 nogil
cdef int reset(self) except -1 nogil
cdef int reverse_reset(self) except -1 nogil
cdef int update(self, SIZE_t new_pos) except -1 nogil
cdef double node_impurity(self) nogil
cdef double proxy_node_impurity(self) nogil
cdef void children_impurity(self, double* impurity_left,
Expand Down
9 changes: 4 additions & 5 deletions econml/tree/_splitter.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -103,20 +103,20 @@ cdef class Splitter:
cdef int init_sample_inds(self, SIZE_t* samples,
const SIZE_t[::1] np_samples,
DOUBLE_t* sample_weight,
SIZE_t* n_samples, double* weighted_n_samples) nogil except -1
SIZE_t* n_samples, double* weighted_n_samples) except -1 nogil

cdef int init(self, const DTYPE_t[:, :] X, const DOUBLE_t[:, ::1] y,
DOUBLE_t* sample_weight,
const SIZE_t[::1] np_samples_train,
const SIZE_t[::1] np_samples_val) nogil except -1
const SIZE_t[::1] np_samples_val) except -1 nogil

cdef int node_reset(self, SIZE_t start, SIZE_t end, double* weighted_n_node_samples,
SIZE_t start_val, SIZE_t end_val, double* weighted_n_node_samples_val) nogil except -1
SIZE_t start_val, SIZE_t end_val, double* weighted_n_node_samples_val) except -1 nogil

cdef int node_split(self,
double impurity, # Impurity of the node
SplitRecord* split,
SIZE_t* n_constant_features) nogil except -1
SIZE_t* n_constant_features) except -1 nogil

cdef void node_value_val(self, double* dest) nogil
cdef void node_jacobian_val(self, double* dest) nogil
Expand All @@ -126,4 +126,3 @@ cdef class Splitter:
cdef double proxy_node_impurity(self) nogil
cdef double proxy_node_impurity_val(self) nogil
cdef bint is_children_impurity_proxy(self) nogil

6 changes: 3 additions & 3 deletions econml/tree/_tree.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ cdef class Tree:
double impurity_train, SIZE_t n_node_samples_train,
double weighted_n_samples_train,
double impurity_val, SIZE_t n_node_samples_val,
double weighted_n_samples_val) nogil except -1
cdef int _resize(self, SIZE_t capacity) nogil except -1
cdef int _resize_c(self, SIZE_t capacity=*) nogil except -1
double weighted_n_samples_val) except -1 nogil
cdef int _resize(self, SIZE_t capacity) except -1 nogil
cdef int _resize_c(self, SIZE_t capacity=*) except -1 nogil

cdef np.ndarray _get_value_ndarray(self)
cdef np.ndarray _get_jac_ndarray(self)
Expand Down
62 changes: 33 additions & 29 deletions econml/tree/_tree.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ cdef extern from "numpy/arrayobject.h":
np.npy_intp* strides,
void* data, int flags, object obj)

cdef extern from "numpy/arrayobject.h":
int PyArray_SetBaseObject(np.ndarray arr, PyObject *obj) except -1

# =============================================================================
# Types and constants
# =============================================================================
Expand Down Expand Up @@ -121,7 +124,7 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
SIZE_t min_samples_leaf, double min_weight_leaf,
SIZE_t max_depth, double min_impurity_decrease):
""" Initialize parameters.

Parameters
----------
splitter : cython extension class of type Splitter
Expand Down Expand Up @@ -179,10 +182,10 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):
sample_weight_ptr = <DOUBLE_t*> sample_weight.data

# Initial capacity
cdef int init_capacity
cdef SIZE_t init_capacity

if tree.max_depth <= 10:
init_capacity = (2 ** (tree.max_depth + 1)) - 1
init_capacity = <SIZE_t>((2 ** (tree.max_depth + 1)) - 1)
else:
init_capacity = 2047

Expand Down Expand Up @@ -311,12 +314,12 @@ cdef class DepthFirstTreeBuilder(TreeBuilder):

# Add the node that was just split to the tree, with all the auxiliary information and
# get the `node_id` assigned to it.
node_id = tree._add_node(parent, is_left, is_leaf,
node_id = tree._add_node(parent, is_left, is_leaf,
split.feature, split.threshold,
impurity, n_node_samples, weighted_n_node_samples,
impurity_val, n_node_samples_val, weighted_n_node_samples_val)
# Memory error
if node_id == SIZE_MAX:
if node_id == <SIZE_t>SIZE_MAX:
rc = -1
break

Expand Down Expand Up @@ -404,7 +407,7 @@ cdef class Tree:
property weighted_n_node_samples:
def __get__(self):
return self._get_node_ndarray()['weighted_n_node_samples'][:self.node_count]

property impurity_train:
def __get__(self):
return self._get_node_ndarray()['impurity_train'][:self.node_count]
Expand Down Expand Up @@ -433,7 +436,7 @@ cdef class Tree:
if not self.store_jac:
raise AttributeError("Jacobian computation was not enabled. Set store_jac=True")
return self._get_jac_ndarray()[:self.node_count]

# The precond(x) of the node, for the case of linear moment trees with moment: J(x) * theta(x) - precond(x) = 0
property precond:
def __get__(self):
Expand Down Expand Up @@ -536,7 +539,7 @@ cdef class Tree:
self.capacity * sizeof(Node))
value = memcpy(self.value, (<np.ndarray> value_ndarray).data,
self.capacity * self.value_stride * sizeof(double))

if self.store_jac:
jac_ndarray = d['jac']
jac_shape = (node_ndarray.shape[0], self.n_outputs * self.n_outputs)
Expand All @@ -555,7 +558,7 @@ cdef class Tree:
precond = memcpy(self.precond, (<np.ndarray> precond_ndarray).data,
self.capacity * self.precond_stride * sizeof(double))

cdef int _resize(self, SIZE_t capacity) nogil except -1:
cdef int _resize(self, SIZE_t capacity) except -1 nogil:
"""Resize all inner arrays to `capacity`, if `capacity` == -1, then
double the size of the inner arrays.
Returns -1 in case of failure to allocate memory (and raise MemoryError)
Expand All @@ -566,15 +569,16 @@ cdef class Tree:
with gil:
raise MemoryError()

cdef int _resize_c(self, SIZE_t capacity=SIZE_MAX) nogil except -1:
cdef int _resize_c(self, SIZE_t capacity=SIZE_MAX) except -1 nogil:
"""Guts of _resize
Returns -1 in case of failure to allocate memory (and raise MemoryError)
or 0 otherwise.
"""
if capacity == self.capacity and self.nodes != NULL:

if capacity == <SIZE_t>self.capacity and self.nodes != NULL:
return 0

if capacity == SIZE_MAX:
if capacity == <SIZE_t>SIZE_MAX:
if self.capacity == 0:
capacity = 3 # default initial value
else:
Expand All @@ -588,7 +592,7 @@ cdef class Tree:
memset(<void*>(self.value + self.capacity * self.value_stride), 0,
(capacity - self.capacity) * self.value_stride *
sizeof(double))

if self.store_jac:
safe_realloc(&self.jac, capacity * self.jac_stride)
safe_realloc(&self.precond, capacity * self.precond_stride)
Expand All @@ -606,11 +610,11 @@ cdef class Tree:
return 0

cdef SIZE_t _add_node(self, SIZE_t parent, bint is_left, bint is_leaf,
SIZE_t feature, double threshold,
SIZE_t feature, double threshold,
double impurity_train, SIZE_t n_node_samples_train,
double weighted_n_node_samples_train,
double impurity_val, SIZE_t n_node_samples_val,
double weighted_n_node_samples_val) nogil except -1:
double weighted_n_node_samples_val) except -1 nogil:
"""Add a node to the tree.
The new node registers itself as the child of its parent.
Returns (size_t)(-1) on error.
Expand Down Expand Up @@ -657,29 +661,29 @@ cdef class Tree:
out = self._get_value_ndarray().take(self.apply(X), axis=0,
mode='clip')[:, :self.n_relevant_outputs, 0]
return out

cpdef np.ndarray predict_full(self, object X):
"""Predict target for X."""
out = self._get_value_ndarray().take(self.apply(X), axis=0,
mode='clip')[:, :, 0]
return out

cpdef np.ndarray predict_jac(self, object X):
"""Predict target for X."""
if not self.store_jac:
raise AttributeError("Jacobian computation was not enalbed. Set store_jac=True")
out = self._get_jac_ndarray().take(self.apply(X), axis=0,
mode='clip')
return out

cpdef np.ndarray predict_precond(self, object X):
"""Predict target for X."""
if not self.store_jac:
raise AttributeError("Preconditioned quantity computation was not enalbed. Set store_jac=True")
out = self._get_precond_ndarray().take(self.apply(X), axis=0,
mode='clip')
return out

cpdef predict_precond_and_jac(self, object X):
if not self.store_jac:
raise AttributeError("Preconditioned quantity computation was not enalbed. Set store_jac=True")
Expand Down Expand Up @@ -728,7 +732,7 @@ cdef class Tree:
out_ptr[i] = <SIZE_t>(node - self.nodes) # node offset

return out

cpdef object decision_path(self, object X):
"""Finds the decision path (=node) for each sample in X."""
return self._decision_path(X)
Expand Down Expand Up @@ -791,7 +795,7 @@ cdef class Tree:

cpdef compute_feature_importances(self, normalize=True, max_depth=None, depth_decay=.0):
"""Computes the importance of each feature (aka variable) based on impurity decrease.

Parameters
----------
normalize : bool, default True
Expand Down Expand Up @@ -846,12 +850,12 @@ cdef class Tree:
importances /= normalizer

return importances

cpdef compute_feature_heterogeneity_importances(self, normalize=True, max_depth=None, depth_decay=.0):
"""Computes the importance of each feature (aka variable) based on amount of
parameter heterogeneity it creates. Each split adds:
parent_weight * (left_weight * right_weight) * mean((value_left[k] - value_right[k])**2) / parent_weight**2

Parameters
----------
normalize : bool, default True
Expand Down Expand Up @@ -923,9 +927,9 @@ cdef class Tree:
cdef np.ndarray arr
arr = np.PyArray_SimpleNewFromData(3, shape, np.NPY_DOUBLE, self.value)
Py_INCREF(self)
arr.base = <PyObject*> self
PyArray_SetBaseObject(arr, <PyObject*> self)
return arr

cdef np.ndarray _get_jac_ndarray(self):
"""Wraps jacobian as a 2-d NumPy array.
The array keeps a reference to this Tree, which manages the underlying
Expand All @@ -937,7 +941,7 @@ cdef class Tree:
cdef np.ndarray arr
arr = np.PyArray_SimpleNewFromData(2, shape, np.NPY_DOUBLE, self.jac)
Py_INCREF(self)
arr.base = <PyObject*> self
PyArray_SetBaseObject(arr, <PyObject*> self)
return arr

cdef np.ndarray _get_precond_ndarray(self):
Expand All @@ -951,7 +955,7 @@ cdef class Tree:
cdef np.ndarray arr
arr = np.PyArray_SimpleNewFromData(2, shape, np.NPY_DOUBLE, self.precond)
Py_INCREF(self)
arr.base = <PyObject*> self
PyArray_SetBaseObject(arr, <PyObject*> self)
return arr

cdef np.ndarray _get_node_ndarray(self):
Expand All @@ -969,7 +973,7 @@ cdef class Tree:
arr = PyArray_NewFromDescr(<PyTypeObject *> np.ndarray,
<np.dtype> NODE_DTYPE, 1, shape,
strides, <void*> self.nodes,
np.NPY_DEFAULT, None)
np.NPY_ARRAY_DEFAULT, None)
Py_INCREF(self)
arr.base = <PyObject*> self
PyArray_SetBaseObject(arr, <PyObject*> self)
return arr
4 changes: 2 additions & 2 deletions econml/tree/_utils.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ ctypedef fused realloc_ptr:
(Node*)
(StackRecord*)

cdef realloc_ptr safe_realloc(realloc_ptr* p, SIZE_t nelems) nogil except *
cdef realloc_ptr safe_realloc(realloc_ptr* p, SIZE_t nelems) except * nogil

cdef np.ndarray sizet_ptr_to_ndarray(SIZE_t* data, SIZE_t size)

Expand Down Expand Up @@ -81,5 +81,5 @@ cdef class Stack:
cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t start_val, SIZE_t end_val,
SIZE_t depth, SIZE_t parent,
bint is_left, double impurity, double impurity_val,
SIZE_t n_constant_features) nogil except -1
SIZE_t n_constant_features) except -1 nogil
cdef int pop(self, StackRecord* res) nogil
13 changes: 8 additions & 5 deletions econml/tree/_utils.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,17 @@ cdef inline UINT32_t our_rand_r(UINT32_t* seed) nogil:
# good_cast = <UINT32_t>(RAND_R_MAX + 1)
# or:
# cdef np.uint32_t another_good_cast = <UINT32_t>RAND_R_MAX + 1
return seed[0] % <UINT32_t>(RAND_R_MAX + 1)
cdef np.uint32_t upper = <UINT32_t>RAND_R_MAX + 1
return seed[0] % upper


cdef realloc_ptr safe_realloc(realloc_ptr* p, SIZE_t nelems) nogil except *:
cdef realloc_ptr safe_realloc(realloc_ptr* p, SIZE_t nelems) except * nogil:
# sizeof(realloc_ptr[0]) would be more like idiomatic C, but causes Cython
# 0.20.1 to crash.
cdef SIZE_t nbytes = nelems * sizeof(p[0][0])
if nbytes / sizeof(p[0][0]) != nelems:
# Force to same type to avoid warning
cdef SIZE_t total = nbytes / sizeof(p[0][0])
if total != nelems:
# Overflow in the multiplication
with gil:
raise MemoryError("could not allocate (%d * %d) bytes"
Expand Down Expand Up @@ -126,7 +129,7 @@ cdef class Stack:
cdef int push(self, SIZE_t start, SIZE_t end, SIZE_t start_val, SIZE_t end_val,
SIZE_t depth, SIZE_t parent,
bint is_left, double impurity, double impurity_val,
SIZE_t n_constant_features) nogil except -1:
SIZE_t n_constant_features) except -1 nogil:
"""Push a new element onto the stack.
Return -1 in case of failure to allocate memory (and raise MemoryError)
or 0 otherwise.
Expand Down Expand Up @@ -170,4 +173,4 @@ cdef class Stack:
res[0] = stack[top - 1]
self.top = top - 1

return 0
return 0
Loading