Skip to content

Commit

Permalink
PERF: avoid potentially-copying ravel (pandas-dev#46733)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Apr 11, 2022
1 parent cd4caa6 commit a2029ce
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 12 deletions.
30 changes: 22 additions & 8 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import cython
import numpy as np

cimport numpy as cnp
from cpython.object cimport PyObject
from numpy cimport (
int32_t,
int64_t,
Expand Down Expand Up @@ -273,7 +274,8 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool = True):

@cython.boundscheck(False)
@cython.wraparound(False)
def datetime_to_datetime64(ndarray[object] values):
def datetime_to_datetime64(ndarray values):
# ndarray[object], but can't declare object without ndim
"""
Convert ndarray of datetime-like objects to int64 array representing
nanosecond timestamps.
Expand All @@ -288,20 +290,27 @@ def datetime_to_datetime64(ndarray[object] values):
inferred_tz : tzinfo or None
"""
cdef:
Py_ssize_t i, n = len(values)
Py_ssize_t i, n = values.size
object val
int64_t[:] iresult
int64_t ival
ndarray iresult # int64_t, but can't declare that without specifying ndim
npy_datetimestruct dts
_TSObject _ts
bint found_naive = False
tzinfo inferred_tz = None

result = np.empty(n, dtype='M8[ns]')
cnp.broadcast mi

result = np.empty((<object>values).shape, dtype='M8[ns]')
iresult = result.view('i8')

mi = cnp.PyArray_MultiIterNew2(iresult, values)
for i in range(n):
val = values[i]
# Analogous to: val = values[i]
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]

if checknull_with_nat(val):
iresult[i] = NPY_NAT
ival = NPY_NAT
elif PyDateTime_Check(val):
if val.tzinfo is not None:
if found_naive:
Expand All @@ -314,18 +323,23 @@ def datetime_to_datetime64(ndarray[object] values):
inferred_tz = val.tzinfo

_ts = convert_datetime_to_tsobject(val, None)
iresult[i] = _ts.value
ival = _ts.value
check_dts_bounds(&_ts.dts)
else:
found_naive = True
if inferred_tz is not None:
raise ValueError('Cannot mix tz-aware with '
'tz-naive values')
iresult[i] = pydatetime_to_dt64(val, &dts)
ival = pydatetime_to_dt64(val, &dts)
check_dts_bounds(&dts)
else:
raise TypeError(f'Unrecognized value type: {type(val)}')

# Analogous to: iresult[i] = ival
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival

cnp.PyArray_MultiIter_NEXT(mi)

return result, inferred_tz


Expand Down
3 changes: 1 addition & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2247,10 +2247,9 @@ def objects_to_datetime64ns(
result = result.reshape(data.shape, order=order)
except ValueError as err:
try:
values, tz_parsed = conversion.datetime_to_datetime64(data.ravel("K"))
values, tz_parsed = conversion.datetime_to_datetime64(data)
# If tzaware, these values represent unix timestamps, so we
# return them as i8 to distinguish from wall times
values = values.reshape(data.shape, order=order)
return values.view("i8"), tz_parsed
except (ValueError, TypeError):
raise err
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,14 +429,15 @@ def _formatter(self, boxed: bool = False):

return get_format_timedelta64(self, box=True)

@dtl.ravel_compat
def _format_native_types(
self, *, na_rep="NaT", date_format=None, **kwargs
) -> npt.NDArray[np.object_]:
from pandas.io.formats.format import get_format_timedelta64

formatter = get_format_timedelta64(self._ndarray, na_rep)
return np.array([formatter(x) for x in self._ndarray])
# equiv: np.array([formatter(x) for x in self._ndarray])
# but independent of dimension
return np.frompyfunc(formatter, 1, 1)(self._ndarray)

# ----------------------------------------------------------------
# Arithmetic Methods
Expand Down

0 comments on commit a2029ce

Please sign in to comment.