""" Cython implementations for internal ExtensionArrays. """ cimport cython import numpy as np cimport numpy as cnp from cpython cimport PyErr_Clear from numpy cimport ndarray cnp.import_array() @cython.freelist(16) cdef class NDArrayBacked: """ Implementing these methods in cython improves performance quite a bit. import pandas as pd from pandas._libs.arrays import NDArrayBacked as cls dti = pd.date_range("2016-01-01", periods=3) dta = dti._data arr = dta._ndarray obj = cls._simple_new(arr, arr.dtype) # for foo in [arr, dta, obj]: ... %timeit foo.copy() 299 ns ± 30 ns per loop # <-- arr underlying ndarray (for reference) 530 ns ± 9.24 ns per loop # <-- dta with cython NDArrayBacked 1.66 µs ± 46.3 ns per loop # <-- dta without cython NDArrayBacked 328 ns ± 5.29 ns per loop # <-- obj with NDArrayBacked.__cinit__ 371 ns ± 6.97 ns per loop # <-- obj with NDArrayBacked._simple_new %timeit foo.T 125 ns ± 6.27 ns per loop # <-- arr underlying ndarray (for reference) 226 ns ± 7.66 ns per loop # <-- dta with cython NDArrayBacked 911 ns ± 16.6 ns per loop # <-- dta without cython NDArrayBacked 215 ns ± 4.54 ns per loop # <-- obj with NDArrayBacked._simple_new """ # TODO: implement take in terms of cnp.PyArray_TakeFrom # TODO: implement concat_same_type in terms of cnp.PyArray_Concatenate # cdef: # readonly ndarray _ndarray # readonly object _dtype def __init__(self, ndarray values, object dtype): self._ndarray = values self._dtype = dtype @classmethod def _simple_new(cls, ndarray values, object dtype): cdef: NDArrayBacked obj obj = NDArrayBacked.__new__(cls) obj._ndarray = values obj._dtype = dtype return obj cpdef NDArrayBacked _from_backing_data(self, ndarray values): """ Construct a new ExtensionArray `new_array` with `arr` as its _ndarray. This should round-trip: self == self._from_backing_data(self._ndarray) """ # TODO: re-reuse simple_new if/when it can be cpdef cdef: NDArrayBacked obj obj = NDArrayBacked.__new__(type(self)) obj._ndarray = values obj._dtype = self._dtype return obj cpdef __setstate__(self, state): if isinstance(state, dict): if "_data" in state: data = state.pop("_data") elif "_ndarray" in state: data = state.pop("_ndarray") else: raise ValueError # pragma: no cover self._ndarray = data self._dtype = state.pop("_dtype") for key, val in state.items(): setattr(self, key, val) elif isinstance(state, tuple): if len(state) != 3: if len(state) == 1 and isinstance(state[0], dict): self.__setstate__(state[0]) return raise NotImplementedError(state) # pragma: no cover data, dtype = state[:2] if isinstance(dtype, np.ndarray): dtype, data = data, dtype self._ndarray = data self._dtype = dtype if isinstance(state[2], dict): for key, val in state[2].items(): setattr(self, key, val) else: raise NotImplementedError(state) # pragma: no cover else: raise NotImplementedError(state) # pragma: no cover def __len__(self) -> int: return len(self._ndarray) @property def shape(self): # object cast bc _ndarray.shape is npy_intp* return ((self._ndarray)).shape @property def ndim(self) -> int: return self._ndarray.ndim @property def size(self) -> int: return self._ndarray.size @property def nbytes(self) -> int: return self._ndarray.nbytes def copy(self, order="C"): cdef: cnp.NPY_ORDER order_code int success success = cnp.PyArray_OrderConverter(order, &order_code) if not success: # clear exception so that we don't get a SystemError PyErr_Clear() # same message used by numpy msg = f"order must be one of 'C', 'F', 'A', or 'K' (got '{order}')" raise ValueError(msg) res_values = cnp.PyArray_NewCopy(self._ndarray, order_code) return self._from_backing_data(res_values) def delete(self, loc, axis=0): res_values = np.delete(self._ndarray, loc, axis=axis) return self._from_backing_data(res_values) def swapaxes(self, axis1, axis2): res_values = cnp.PyArray_SwapAxes(self._ndarray, axis1, axis2) return self._from_backing_data(res_values) # TODO: pass NPY_MAXDIMS equiv to axis=None? def repeat(self, repeats, axis: int | np.integer = 0): if axis is None: axis = 0 res_values = cnp.PyArray_Repeat(self._ndarray, repeats, axis) return self._from_backing_data(res_values) def reshape(self, *args, **kwargs): res_values = self._ndarray.reshape(*args, **kwargs) return self._from_backing_data(res_values) def ravel(self, order="C"): # cnp.PyArray_OrderConverter(PyObject* obj, NPY_ORDER* order) # res_values = cnp.PyArray_Ravel(self._ndarray, order) res_values = self._ndarray.ravel(order) return self._from_backing_data(res_values) @property def T(self): res_values = self._ndarray.T return self._from_backing_data(res_values) def transpose(self, *axes): res_values = self._ndarray.transpose(*axes) return self._from_backing_data(res_values)