cimport cython from cython cimport ( Py_ssize_t, floating, ) from libc.stdlib cimport ( free, malloc, ) import numpy as np cimport numpy as cnp from numpy cimport ( complex64_t, complex128_t, float32_t, float64_t, int8_t, int64_t, intp_t, ndarray, uint8_t, uint64_t, ) from numpy.math cimport NAN cnp.import_array() from pandas._libs cimport util from pandas._libs.algos cimport ( get_rank_nan_fill_val, kth_smallest_c, ) from pandas._libs.algos import ( groupsort_indexer, rank_1d, take_2d_axis1_bool_bool, take_2d_axis1_float64_float64, ) from pandas._libs.dtypes cimport ( numeric_object_t, numeric_t, ) from pandas._libs.missing cimport checknull cdef int64_t NPY_NAT = util.get_nat() _int64_max = np.iinfo(np.int64).max cdef float64_t NaN = np.NaN cdef enum InterpolationEnumType: INTERPOLATION_LINEAR, INTERPOLATION_LOWER, INTERPOLATION_HIGHER, INTERPOLATION_NEAREST, INTERPOLATION_MIDPOINT cdef float64_t median_linear_mask(float64_t* a, int n, uint8_t* mask) nogil: cdef: int i, j, na_count = 0 float64_t* tmp float64_t result if n == 0: return NaN # count NAs for i in range(n): if mask[i]: na_count += 1 if na_count: if na_count == n: return NaN tmp = malloc((n - na_count) * sizeof(float64_t)) j = 0 for i in range(n): if not mask[i]: tmp[j] = a[i] j += 1 a = tmp n -= na_count result = calc_median_linear(a, n, na_count) if na_count: free(a) return result cdef float64_t median_linear(float64_t* a, int n) nogil: cdef: int i, j, na_count = 0 float64_t* tmp float64_t result if n == 0: return NaN # count NAs for i in range(n): if a[i] != a[i]: na_count += 1 if na_count: if na_count == n: return NaN tmp = malloc((n - na_count) * sizeof(float64_t)) j = 0 for i in range(n): if a[i] == a[i]: tmp[j] = a[i] j += 1 a = tmp n -= na_count result = calc_median_linear(a, n, na_count) if na_count: free(a) return result cdef float64_t calc_median_linear(float64_t* a, int n, int na_count) nogil: cdef: float64_t result if n % 2: result = kth_smallest_c(a, n // 2, n) else: result = (kth_smallest_c(a, n // 2, n) + kth_smallest_c(a, n // 2 - 1, n)) / 2 return result ctypedef fused int64float_t: int64_t uint64_t float32_t float64_t @cython.boundscheck(False) @cython.wraparound(False) def group_median_float64( ndarray[float64_t, ndim=2] out, ndarray[int64_t] counts, ndarray[float64_t, ndim=2] values, ndarray[intp_t] labels, Py_ssize_t min_count=-1, const uint8_t[:, :] mask=None, uint8_t[:, ::1] result_mask=None, ) -> None: """ Only aggregates on axis=0 """ cdef: Py_ssize_t i, j, N, K, ngroups, size ndarray[intp_t] _counts ndarray[float64_t, ndim=2] data ndarray[uint8_t, ndim=2] data_mask ndarray[intp_t] indexer float64_t* ptr uint8_t* ptr_mask float64_t result bint uses_mask = mask is not None assert min_count == -1, "'min_count' only used in sum and prod" ngroups = len(counts) N, K = (