# cython: boundscheck=False, wraparound=False, cdivision=True

from libc.math cimport (
    round,
    signbit,
    sqrt,
)
from libcpp.deque cimport deque

from pandas._libs.algos cimport TiebreakEnumType

import numpy as np

cimport numpy as cnp
from numpy cimport (
    float32_t,
    float64_t,
    int64_t,
    ndarray,
)

cnp.import_array()

import cython

from pandas._libs.algos import is_monotonic


cdef extern from "../src/skiplist.h":
    ctypedef struct node_t:
        node_t **next
        int *width
        double value
        int is_nil
        int levels
        int ref_count

    ctypedef struct skiplist_t:
        node_t *head
        node_t **tmp_chain
        int *tmp_steps
        int size
        int maxlevels

    skiplist_t* skiplist_init(int) nogil
    void skiplist_destroy(skiplist_t*) nogil
    double skiplist_get(skiplist_t*, int, int*) nogil
    int skiplist_insert(skiplist_t*, double) nogil
    int skiplist_remove(skiplist_t*, double) nogil
    int skiplist_rank(skiplist_t*, double) nogil
    int skiplist_min_rank(skiplist_t*, double) nogil

cdef:
    float32_t MINfloat32 = np.NINF
    float64_t MINfloat64 = np.NINF

    float32_t MAXfloat32 = np.inf
    float64_t MAXfloat64 = np.inf

    float64_t NaN = <float64_t>np.NaN

cdef bint is_monotonic_increasing_start_end_bounds(
    ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end
):
    return is_monotonic(start, False)[0] and is_monotonic(end, False)[0]

# ----------------------------------------------------------------------
# Rolling sum


cdef float64_t calc_sum(int64_t minp, int64_t nobs, float64_t sum_x,
                        int64_t num_consecutive_same_value, float64_t prev_value
                        ) nogil:
    cdef:
        float64_t result

    if nobs == 0 == minp:
        result = 0
    elif nobs >= minp:
        if num_consecutive_same_value >= nobs:
            result = prev_value * nobs
        else:
            result = sum_x
    else:
        result = NaN

    return result


cdef void add_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
                  float64_t *compensation, int64_t *num_consecutive_same_value,
                  float64_t *prev_value) nogil:
    """ add a value from the sum calc using Kahan summation """

    cdef:
        float64_t y, t

    # Not NaN
    if val == val:
        nobs[0] = nobs[0] + 1
        y = val - compensation[0]
        t = sum_x[0] + y
        compensation[0] = t - sum_x[0] - y
        sum_x[0] = t

        # GH#42064, record num of same values to remove floating point artifacts
        if val == prev_value[0]:
            num_consecutive_same_value[0] += 1
        else:
            # reset to 1 (include current value itself)
            num_consecutive_same_value[0] = 1
        prev_value[0] = val


cdef void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x,
                     float64_t *compensation) nogil:
    """ remove a value from the sum calc using Kahan summation """

    cdef:
        float64_t y, t

    # Not NaN
    if val == val:
        nobs[0] = nobs[0] - 1
        y = - val - compensation[0]
        t = sum_x[0] + y
        compensation[0] = t - sum_x[0] - y
        sum_x[0] = t


def roll_sum(const float64_t[:] values, ndarray[int64_t] start,
             ndarray[int64_t] end, int64_t minp) -> np.ndarray:
    cdef:
        Py_ssize_t i, j
        float64_t sum_x, compensation_add, compensation_remove, prev_value
        int64_t s, e, num_consecutive_same_value
        int64_t nobs = 0, N = len(start)
        ndarray[float64_t] output
        bint is_monotonic_increasing_bounds

    is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
        start, end
    )
    output = np.empty(N, dtype=np.float64)

    with nogil:

        for i in range(0, N):
            s = start[i]
            e = end[i]

            if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:

                # setup
                prev_value = values[s]
                num_consecutive_same_value = 0
                sum_x = compensation_add = compensation_remove = 0
                nobs = 0
                for j in range(s, e):
                    add_sum(values[j], &nobs, &sum_x, &compensation_add,
                            &num_consecutive_same_value, &prev_value)

            else:

                # calculate deletes
                for j in range(start[i - 1], s):
                    remove_sum(values[j], &nobs, &sum_x, &compensation_remove)

                # calculate adds
                for j in range(end[i - 1], e):
                    add_sum(values[j], &nobs, &sum_x, &compensation_add,
                            &num_consecutive_same_value, &prev_value)

            output[i] = calc_sum(
                minp, nobs, sum_x, num_consecutive_same_value, prev_value
            )

            if not is_monotonic_increasing_bounds:
                nobs = 0
                sum_x = 0.0
                compensation_remove = 0.0

    return output


# ----------------------------------------------------------------------
# Rolling mean


cdef float64_t calc_mean(int64_t minp, Py_ssize_t nobs, Py_ssize_t neg_ct,
                         float64_t sum_x, int64_t num_consecutive_same_value,
                         float64_t prev_value) nogil:
    cdef:
        float64_t result

    if nobs >= minp and nobs > 0:
        result = sum_x / <float64_t>nobs
        if num_consecutive_same_value >= nobs:
            result = prev_value
        elif neg_ct == 0 and result < 0:
            # all positive
            result = 0
        elif neg_ct == nobs and result > 0:
            # all negative
            result = 0
        else:
            pass
    else:
        result = NaN
    return result


cdef void add_mean(
    float64_t val,
    Py_ssize_t *nobs,
    float64_t *sum_x,
    Py_ssize_t *neg_ct,
    float64_t *compensation,
    int64_t *num_consecutive_same_value,
    float64_t *prev_value
) nogil:
    """ add a value from the mean calc using Kahan summation """
    cdef:
        float64_t y, t

    # Not NaN
    if val == val:
        nobs[0] = nobs[0] + 1
        y = val - compensation[0]
        t = sum_x[0] + y
        compensation[0] = t - sum_x[0] - y
        sum_x[0] = t
        if signbit(val):
            neg_ct[0] = neg_ct[0] + 1

        # GH#42064, record num of same values to remove floating point artifacts
        if val == prev_value[0]:
            num_consecutive_same_value[0] += 1
        else:
            # reset to 1 (include current value itself)
            num_consecutive_same_value[0] = 1
        prev_value[0] = val


cdef void remove_mean(float64_t val, Py_ssize_t *nobs, float64_t *sum_x,
                      Py_ssize_t *neg_ct, float64_t *compensation) nogil:
    """ remove a value from the mean calc using Kahan summation """
    cdef:
        float64_t y, t

    if val == val:
        nobs[0] = nobs[0] - 1
        y = - val - compensation[0]
        t = sum_x[0] + y
        compensation[0] = t - sum_x[0] - y
        sum_x[0] = t
        if signbit(val):
            neg_ct[0] = neg_ct[0] - 1


def roll_mean(const float64_t[:] values, ndarray[int64_t] start,
              ndarray[int64_t] end, int64_t minp) -> np.ndarray:
    cdef:
        float64_t val, compensation_add, compensation_remove, sum_x, prev_value
        int64_t s, e, num_consecutive_same_value
        Py_ssize_t nobs, i, j, neg_ct, N = len(start)
        ndarray[float64_t] output
        bint is_monotonic_increasing_bounds

    is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
        start, end
    )
    output = np.empty(N, dtype=np.float64)

    with nogil:

        for i in range(0, N):
            s = start[i]
            e = end[i]

            if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:

                # setup
                compensation_add = compensation_remove = sum_x = 0
                nobs = neg_ct = 0
                prev_value = values[s]
                num_consecutive_same_value = 0
                for j in range(s, e):
                    val = values[j]
                    add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add,
                             &num_consecutive_same_value, &prev_value)

            else:

                # calculate deletes
                for j in range(start[i - 1], s):
                    val = values[j]
                    remove_mean(val, &nobs, &sum_x, &neg_ct, &compensation_remove)

                # calculate adds
                for j in range(end[i - 1], e):
                    val = values[j]
                    add_mean(val, &nobs, &sum_x, &neg_ct, &compensation_add,
                             &num_consecutive_same_value, &prev_value)

            output[i] = calc_mean(
                minp, nobs, neg_ct, sum_x, num_consecutive_same_value, prev_value
            )

            if not is_monotonic_increasing_bounds:
                nobs = 0
                neg_ct = 0
                sum_x = 0.0
                compensation_remove = 0.0
    return output

# ----------------------------------------------------------------------
# Rolling variance


cdef float64_t calc_var(
    int64_t minp,
    int ddof,
    float64_t nobs,
    float64_t ssqdm_x,
    int64_t num_consecutive_same_value
) nogil:
    cdef:
        float64_t result

    # Variance is unchanged if no observation is added or removed
    if (nobs >= minp) and (nobs > ddof):

        # pathological case & repeatedly same values case
        if nobs == 1 or num_consecutive_same_value >= nobs:
            result = 0
        else:
            result = ssqdm_x / (nobs - <float64_t>ddof)
    else:
        result = NaN

    return result


cdef void add_var(
    float64_t val,
    float64_t *nobs,
    float64_t *mean_x,
    float64_t *ssqdm_x,
    float64_t *compensation,
    int64_t *num_consecutive_same_value,
    float64_t *prev_value,
) nogil:
    """ add a value from the var calc """
    cdef:
        float64_t delta, prev_mean, y, t

    # GH#21813, if msvc 2017 bug is resolved, we should be OK with != instead of `isnan`
    if val != val:
        return

    nobs[0] = nobs[0] + 1

    # GH#42064, record num of same values to remove floating point artifacts
    if val == prev_value[0]:
        num_consecutive_same_value[0] += 1
    else:
        # reset to 1 (include current value itself)
        num_consecutive_same_value[0] = 1
    prev_value[0] = val

    # Welford's method for the online variance-calculation
    # using Kahan summation
    # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
    prev_mean = mean_x[0] - compensation[0]
    y = val - compensation[0]
    t = y - mean_x[0]
    compensation[0] = t + mean_x[0] - y
    delta = t
    if nobs[0]:
        mean_x[0] = mean_x[0] + delta / nobs[0]
    else:
        mean_x[0] = 0
    ssqdm_x[0] = ssqdm_x[0] + (val - prev_mean) * (val - mean_x[0])


cdef void remove_var(
    float64_t val,
    float64_t *nobs,
    float64_t *mean_x,
    float64_t *ssqdm_x,
    float64_t *compensation
) nogil:
    """ remove a value from the var calc """
    cdef:
        float64_t delta, prev_mean, y, t
    if val == val:
        nobs[0] = nobs[0] - 1
        if nobs[0]:
            # Welford's method for the online variance-calculation
            # using Kahan summation
            # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
            prev_mean = mean_x[0] - compensation[0]
            y = val - compensation[0]
            t = y - mean_x[0]
            compensation[0] = t + mean_x[0] - y
            delta = t
            mean_x[0] = mean_x[0] - delta / nobs[0]
            ssqdm_x[0] = ssqdm_x[0] - (val - prev_mean) * (val - mean_x[0])
        else:
            mean_x[0] = 0
            ssqdm_x[0] = 0


def roll_var(const float64_t[:] values, ndarray[int64_t] start,
             ndarray[int64_t] end, int64_t minp, int ddof=1) -> np.ndarray:
    """
    Numerically stable implementation using Welford's method.
    """
    cdef:
        float64_t mean_x, ssqdm_x, nobs, compensation_add,
        float64_t compensation_remove, prev_value
        int64_t s, e, num_consecutive_same_value
        Py_ssize_t i, j, N = len(start)
        ndarray[float64_t] output
        bint is_monotonic_increasing_bounds

    minp = max(minp, 1)
    is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
        start, end
    )
    output = np.empty(N, dtype=np.float64)

    with nogil:

        for i in range(0, N):

            s = start[i]
            e = end[i]

            # Over the first window, observations can only be added
            # never removed
            if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:

                prev_value = values[s]
                num_consecutive_same_value = 0

                mean_x = ssqdm_x = nobs = compensation_add = compensation_remove = 0
                for j in range(s, e):
                    add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add,
                            &num_consecutive_same_value, &prev_value)

            else:

                # After the first window, observations can both be added
                # and removed

                # calculate deletes
                for j in range(start[i - 1], s):
                    remove_var(values[j], &nobs, &mean_x, &ssqdm_x,
                               &compensation_remove)

                # calculate adds
                for j in range(end[i - 1], e):
                    add_var(values[j], &nobs, &mean_x, &ssqdm_x, &compensation_add,
                            &num_consecutive_same_value, &prev_value)

            output[i] = calc_var(minp, ddof, nobs, ssqdm_x, num_consecutive_same_value)

            if not is_monotonic_increasing_bounds:
                nobs = 0.0
                mean_x = 0.0
                ssqdm_x = 0.0
                compensation_remove = 0.0

    return output

# ----------------------------------------------------------------------
# Rolling skewness


cdef float64_t calc_skew(int64_t minp, int64_t nobs,
                         float64_t x, float64_t xx, float64_t xxx,
                         int64_t num_consecutive_same_value
                         ) nogil:
    cdef:
        float64_t result, dnobs
        float64_t A, B, C, R

    if nobs >= minp:
        dnobs = <float64_t>nobs
        A = x / dnobs
        B = xx / dnobs - A * A
        C = xxx / dnobs - A * A * A - 3 * A * B

        if nobs < 3:
            result = NaN
        # GH 42064 46431
        # uniform case, force result to be 0
        elif num_consecutive_same_value >= nobs:
            result = 0.0
        # #18044: with uniform distribution, floating issue will
        #         cause B != 0. and cause the result is a very
        #         large number.
        #
        #         in core/nanops.py nanskew/nankurt call the function
        #         _zero_out_fperr(m2) to fix floating error.
        #         if the variance is less than 1e-14, it could be
        #         treat as zero, here we follow the original
        #         skew/kurt behaviour to check B <= 1e-14
        elif B <= 1e-14:
            result = NaN
        else:
            R = sqrt(B)
            result = ((sqrt(dnobs * (dnobs - 1.)) * C) /
                      ((dnobs - 2) * R * R * R))
    else:
        result = NaN

    return result


cdef void add_skew(float64_t val, int64_t *nobs,
                   float64_t *x, float64_t *xx,
                   float64_t *xxx,
                   float64_t *compensation_x,
                   float64_t *compensation_xx,
                   float64_t *compensation_xxx,
                   int64_t *num_consecutive_same_value,
                   float64_t *prev_value,
                   ) nogil:
    """ add a value from the skew calc """
    cdef:
        float64_t y, t

    # Not NaN
    if val == val:
        nobs[0] = nobs[0] + 1

        y = val - compensation_x[0]
        t = x[0] + y
        compensation_x[0] = t - x[0] - y
        x[0] = t
        y = val * val - compensation_xx[0]
        t = xx[0] + y
        compensation_xx[0] = t - xx[0] - y
        xx[0] = t
        y = val * val * val - compensation_xxx[0]
        t = xxx[0] + y
        compensation_xxx[0] = t - xxx[0] - y
        xxx[0] = t

        # GH#42064, record num of same values to remove floating point artifacts
        if val == prev_value[0]:
            num_consecutive_same_value[0] += 1
        else:
            # reset to 1 (include current value itself)
            num_consecutive_same_value[0] = 1
        prev_value[0] = val


cdef void remove_skew(float64_t val, int64_t *nobs,
                      float64_t *x, float64_t *xx,
                      float64_t *xxx,
                      float64_t *compensation_x,
                      float64_t *compensation_xx,
                      float64_t *compensation_xxx) nogil:
    """ remove a value from the skew calc """
    cdef:
        float64_t y, t

    # Not NaN
    if val == val:
        nobs[0] = nobs[0] - 1

        y = - val - compensation_x[0]
        t = x[0] + y
        compensation_x[0] = t - x[0] - y
        x[0] = t
        y = - val * val - compensation_xx[0]
        t = xx[0] + y
        compensation_xx[0] = t - xx[0] - y
        xx[0] = t
        y = - val * val * val - compensation_xxx[0]
        t = xxx[0] + y
        compensation_xxx[0] = t - xxx[0] - y
        xxx[0] = t


def roll_skew(ndarray[float64_t] values, ndarray[int64_t] start,
              ndarray[int64_t] end, int64_t minp) -> np.ndarray:
    cdef:
        Py_ssize_t i, j
        float64_t val, min_val, mean_val, sum_val = 0
        float64_t compensation_xxx_add, compensation_xxx_remove
        float64_t compensation_xx_add, compensation_xx_remove
        float64_t compensation_x_add, compensation_x_remove
        float64_t x, xx, xxx
        float64_t prev_value
        int64_t nobs = 0, N = len(start), V = len(values), nobs_mean = 0
        int64_t s, e, num_consecutive_same_value
        ndarray[float64_t] output, values_copy
        bint is_monotonic_increasing_bounds

    minp = max(minp, 3)
    is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
        start, end
    )
    output = np.empty(N, dtype=np.float64)
    min_val = np.nanmin(values)
    values_copy = np.copy(values)

    with nogil:
        for i in range(0, V):
            val = values_copy[i]
            if val == val:
                nobs_mean += 1
                sum_val += val
        mean_val = sum_val / nobs_mean
        # Other cases would lead to imprecision for smallest values
        if min_val - mean_val > -1e5:
            mean_val = round(mean_val)
            for i in range(0, V):
                values_copy[i] = values_copy[i] - mean_val

        for i in range(0, N):

            s = start[i]
            e = end[i]

            # Over the first window, observations can only be added
            # never removed
            if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:

                prev_value = values[s]
                num_consecutive_same_value = 0

                compensation_xxx_add = compensation_xxx_remove = 0
                compensation_xx_add = compensation_xx_remove = 0
                compensation_x_add = compensation_x_remove = 0
                x = xx = xxx = 0
                nobs = 0
                for j in range(s, e):
                    val = values_copy[j]
                    add_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_add,
                             &compensation_xx_add, &compensation_xxx_add,
                             &num_consecutive_same_value, &prev_value)

            else:

                # After the first window, observations can both be added
                # and removed
                # calculate deletes
                for j in range(start[i - 1], s):
                    val = values_copy[j]
                    remove_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_remove,
                                &compensation_xx_remove, &compensation_xxx_remove)

                # calculate adds
                for j in range(end[i - 1], e):
                    val = values_copy[j]
                    add_skew(val, &nobs, &x, &xx, &xxx, &compensation_x_add,
                             &compensation_xx_add, &compensation_xxx_add,
                             &num_consecutive_same_value, &prev_value)

            output[i] = calc_skew(minp, nobs, x, xx, xxx, num_consecutive_same_value)

            if not is_monotonic_increasing_bounds:
                nobs = 0
                x = 0.0
                xx = 0.0
                xxx = 0.0

    return output

# ----------------------------------------------------------------------
# Rolling kurtosis


cdef float64_t calc_kurt(int64_t minp, int64_t nobs,
                         float64_t x, float64_t xx,
                         float64_t xxx, float64_t xxxx,
                         int64_t num_consecutive_same_value,
                         ) nogil:
    cdef:
        float64_t result, dnobs
        float64_t A, B, C, D, R, K

    if nobs >= minp:
        if nobs < 4:
            result = NaN
        # GH 42064 46431
        # uniform case, force result to be -3.
        elif num_consecutive_same_value >= nobs:
            result = -3.
        else:
            dnobs = <float64_t>nobs
            A = x / dnobs
            R = A * A
            B = xx / dnobs - R
            R = R * A
            C = xxx / dnobs - R - 3 * A * B
            R = R * A
            D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A

            # #18044: with uniform distribution, floating issue will
            #         cause B != 0. and cause the result is a very
            #         large number.
            #
            #         in core/nanops.py nanskew/nankurt call the function
            #         _zero_out_fperr(m2) to fix floating error.
            #         if the variance is less than 1e-14, it could be
            #         treat as zero, here we follow the original
            #         skew/kurt behaviour to check B <= 1e-14
            if B <= 1e-14:
                result = NaN
            else:
                K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2)
                result = K / ((dnobs - 2.) * (dnobs - 3.))
    else:
        result = NaN

    return result


cdef void add_kurt(float64_t val, int64_t *nobs,
                   float64_t *x, float64_t *xx,
                   float64_t *xxx, float64_t *xxxx,
                   float64_t *compensation_x,
                   float64_t *compensation_xx,
                   float64_t *compensation_xxx,
                   float64_t *compensation_xxxx,
                   int64_t *num_consecutive_same_value,
                   float64_t *prev_value
                   ) nogil:
    """ add a value from the kurotic calc """
    cdef:
        float64_t y, t

    # Not NaN
    if val == val:
        nobs[0] = nobs[0] + 1

        y = val - compensation_x[0]
        t = x[0] + y
        compensation_x[0] = t - x[0] - y
        x[0] = t
        y = val * val - compensation_xx[0]
        t = xx[0] + y
        compensation_xx[0] = t - xx[0] - y
        xx[0] = t
        y = val * val * val - compensation_xxx[0]
        t = xxx[0] + y
        compensation_xxx[0] = t - xxx[0] - y
        xxx[0] = t
        y = val * val * val * val - compensation_xxxx[0]
        t = xxxx[0] + y
        compensation_xxxx[0] = t - xxxx[0] - y
        xxxx[0] = t

        # GH#42064, record num of same values to remove floating point artifacts
        if val == prev_value[0]:
            num_consecutive_same_value[0] += 1
        else:
            # reset to 1 (include current value itself)
            num_consecutive_same_value[0] = 1
        prev_value[0] = val


cdef void remove_kurt(float64_t val, int64_t *nobs,
                      float64_t *x, float64_t *xx,
                      float64_t *xxx, float64_t *xxxx,
                      float64_t *compensation_x,
                      float64_t *compensation_xx,
                      float64_t *compensation_xxx,
                      float64_t *compensation_xxxx) nogil:
    """ remove a value from the kurotic calc """
    cdef:
        float64_t y, t

    # Not NaN
    if val == val:
        nobs[0] = nobs[0] - 1

        y = - val - compensation_x[0]
        t = x[0] + y
        compensation_x[0] = t - x[0] - y
        x[0] = t
        y = - val * val - compensation_xx[0]
        t = xx[0] + y
        compensation_xx[0] = t - xx[0] - y
        xx[0] = t
        y = - val * val * val - compensation_xxx[0]
        t = xxx[0] + y
        compensation_xxx[0] = t - xxx[0] - y
        xxx[0] = t
        y = - val * val * val * val - compensation_xxxx[0]
        t = xxxx[0] + y
        compensation_xxxx[0] = t - xxxx[0] - y
        xxxx[0] = t


def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
              ndarray[int64_t] end, int64_t minp) -> np.ndarray:
    cdef:
        Py_ssize_t i, j
        float64_t val, mean_val, min_val, sum_val = 0
        float64_t compensation_xxxx_add, compensation_xxxx_remove
        float64_t compensation_xxx_remove, compensation_xxx_add
        float64_t compensation_xx_remove, compensation_xx_add
        float64_t compensation_x_remove, compensation_x_add
        float64_t x, xx, xxx, xxxx
        float64_t prev_value
        int64_t nobs, s, e, num_consecutive_same_value
        int64_t N = len(start), V = len(values), nobs_mean = 0
        ndarray[float64_t] output, values_copy
        bint is_monotonic_increasing_bounds

    minp = max(minp, 4)
    is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
        start, end
    )
    output = np.empty(N, dtype=np.float64)
    values_copy = np.copy(values)
    min_val = np.nanmin(values)

    with nogil:
        for i in range(0, V):
            val = values_copy[i]
            if val == val:
                nobs_mean += 1
                sum_val += val
        mean_val = sum_val / nobs_mean
        # Other cases would lead to imprecision for smallest values
        if min_val - mean_val > -1e4:
            mean_val = round(mean_val)
            for i in range(0, V):
                values_copy[i] = values_copy[i] - mean_val

        for i in range(0, N):

            s = start[i]
            e = end[i]

            # Over the first window, observations can only be added
            # never removed
            if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:

                prev_value = values[s]
                num_consecutive_same_value = 0

                compensation_xxxx_add = compensation_xxxx_remove = 0
                compensation_xxx_remove = compensation_xxx_add = 0
                compensation_xx_remove = compensation_xx_add = 0
                compensation_x_remove = compensation_x_add = 0
                x = xx = xxx = xxxx = 0
                nobs = 0
                for j in range(s, e):
                    add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx,
                             &compensation_x_add, &compensation_xx_add,
                             &compensation_xxx_add, &compensation_xxxx_add,
                             &num_consecutive_same_value, &prev_value)

            else:

                # After the first window, observations can both be added
                # and removed
                # calculate deletes
                for j in range(start[i - 1], s):
                    remove_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx,
                                &compensation_x_remove, &compensation_xx_remove,
                                &compensation_xxx_remove, &compensation_xxxx_remove)

                # calculate adds
                for j in range(end[i - 1], e):
                    add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx,
                             &compensation_x_add, &compensation_xx_add,
                             &compensation_xxx_add, &compensation_xxxx_add,
                             &num_consecutive_same_value, &prev_value)

            output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx,
                                  num_consecutive_same_value)

            if not is_monotonic_increasing_bounds:
                nobs = 0
                x = 0.0
                xx = 0.0
                xxx = 0.0
                xxxx = 0.0

    return output


# ----------------------------------------------------------------------
# Rolling median, min, max


def roll_median_c(const float64_t[:] values, ndarray[int64_t] start,
                  ndarray[int64_t] end, int64_t minp) -> np.ndarray:
    cdef:
        Py_ssize_t i, j
        bint err = False, is_monotonic_increasing_bounds
        int midpoint, ret = 0
        int64_t nobs = 0, N = len(start), s, e, win
        float64_t val, res
        skiplist_t *sl
        ndarray[float64_t] output

    is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
        start, end
    )

    # we use the Fixed/Variable Indexer here as the
    # actual skiplist ops outweigh any window computation costs
    output = np.empty(N, dtype=np.float64)

    if (end - start).max() == 0:
        output[:] = NaN
        return output
    win = (end - start).max()
    sl = skiplist_init(<int>win)
    if sl == NULL:
        raise MemoryError("skiplist_init failed")

    with nogil:

        for i in range(0, N):
            s = start[i]
            e = end[i]

            if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:

                if i != 0:
                    skiplist_destroy(sl)
                    sl = skiplist_init(<int>win)
                    nobs = 0
                # setup
                for j in range(s, e):
                    val = values[j]
                    if val == val:
                        nobs += 1
                        err = skiplist_insert(sl, val) == -1
                        if err:
                            break

            else:

                # calculate adds
                for j in range(end[i - 1], e):
                    val = values[j]
                    if val == val:
                        nobs += 1
                        err = skiplist_insert(sl, val) == -1
                        if err:
                            break

                # calculate deletes
                for j in range(start[i - 1], s):
                    val = values[j]
                    if val == val:
                        skiplist_remove(sl, val)
                        nobs -= 1
            if nobs >= minp:
                midpoint = <int>(nobs / 2)
                if nobs % 2:
                    res = skiplist_get(sl, midpoint, &ret)
                else:
                    res = (skiplist_get(sl, midpoint, &ret) +
                           skiplist_get(sl, (midpoint - 1), &ret)) / 2
                if ret == 0:
                    res = NaN
            else:
                res = NaN

            output[i] = res

            if not is_monotonic_increasing_bounds:
                nobs = 0
                skiplist_destroy(sl)
                sl = skiplist_init(<int>win)

    skiplist_destroy(sl)
    if err:
        raise MemoryError("skiplist_insert failed")
    return output


# ----------------------------------------------------------------------

# Moving maximum / minimum code taken from Bottleneck under the terms
# of its Simplified BSD license
# https://github.com/pydata/bottleneck


cdef float64_t init_mm(float64_t ai, Py_ssize_t *nobs, bint is_max) nogil:

    if ai == ai:
        nobs[0] = nobs[0] + 1
    elif is_max:
        ai = MINfloat64
    else:
        ai = MAXfloat64

    return ai


cdef void remove_mm(float64_t aold, Py_ssize_t *nobs) nogil:
    """ remove a value from the mm calc """
    if aold == aold:
        nobs[0] = nobs[0] - 1


cdef float64_t calc_mm(int64_t minp, Py_ssize_t nobs,
                       float64_t value) nogil:
    cdef:
        float64_t result

    if nobs >= minp:
        result = value
    else:
        result = NaN

    return result


def roll_max(ndarray[float64_t] values, ndarray[int64_t] start,
             ndarray[int64_t] end, int64_t minp) -> np.ndarray:
    """
    Moving max of 1d array of any numeric type along axis=0 ignoring NaNs.

    Parameters
    ----------
    values : np.ndarray[np.float64]
    window : int, size of rolling window
    minp : if number of observations in window
          is below this, output a NaN
    index : ndarray, optional
       index for window computation
    closed : 'right', 'left', 'both', 'neither'
            make the interval closed on the right, left,
            both or neither endpoints

    Returns
    -------
    np.ndarray[float]
    """
    return _roll_min_max(values, start, end, minp, is_max=1)


def roll_min(ndarray[float64_t] values, ndarray[int64_t] start,
             ndarray[int64_t] end, int64_t minp) -> np.ndarray:
    """
    Moving min of 1d array of any numeric type along axis=0 ignoring NaNs.

    Parameters
    ----------
    values : np.ndarray[np.float64]
    window : int, size of rolling window
    minp : if number of observations in window
          is below this, output a NaN
    index : ndarray, optional
       index for window computation

    Returns
    -------
    np.ndarray[float]
    """
    return _roll_min_max(values, start, end, minp, is_max=0)


cdef _roll_min_max(ndarray[float64_t] values,
                   ndarray[int64_t] starti,
                   ndarray[int64_t] endi,
                   int64_t minp,
                   bint is_max):
    cdef:
        float64_t ai
        int64_t curr_win_size, start
        Py_ssize_t i, k, nobs = 0, N = len(starti)
        deque Q[int64_t]  # min/max always the front
        deque W[int64_t]  # track the whole window for nobs compute
        ndarray[float64_t, ndim=1] output

    output = np.empty(N, dtype=np.float64)
    Q = deque[int64_t]()
    W = deque[int64_t]()

    with nogil:

        # This is using a modified version of the C++ code in this
        # SO post: https://stackoverflow.com/a/12239580
        # The original impl didn't deal with variable window sizes
        # So the code was optimized for that

        # first window's size
        curr_win_size = endi[0] - starti[0]
        # GH 32865
        # Anchor output index to values index to provide custom
        # BaseIndexer support
        for i in range(N):

            curr_win_size = endi[i] - starti[i]
            if i == 0:
                start = starti[i]
            else:
                start = endi[i - 1]

            for k in range(start, endi[i]):
                ai = init_mm(values[k], &nobs, is_max)
                # Discard previous entries if we find new min or max
                if is_max:
                    while not Q.empty() and ((ai >= values[Q.back()]) or
                                             values[Q.back()] != values[Q.back()]):
                        Q.pop_back()
                else:
                    while not Q.empty() and ((ai <= values[Q.back()]) or
                                             values[Q.back()] != values[Q.back()]):
                        Q.pop_back()
                Q.push_back(k)
                W.push_back(k)

            # Discard entries outside and left of current window
            while not Q.empty() and Q.front() <= starti[i] - 1:
                Q.pop_front()
            while not W.empty() and W.front() <= starti[i] - 1:
                remove_mm(values[W.front()], &nobs)
                W.pop_front()

            # Save output based on index in input value array
            if not Q.empty() and curr_win_size > 0:
                output[i] = calc_mm(minp, nobs, values[Q.front()])
            else:
                output[i] = NaN

    return output


cdef enum InterpolationType:
    LINEAR,
    LOWER,
    HIGHER,
    NEAREST,
    MIDPOINT


interpolation_types = {
    "linear": LINEAR,
    "lower": LOWER,
    "higher": HIGHER,
    "nearest": NEAREST,
    "midpoint": MIDPOINT,
}


def roll_quantile(const float64_t[:] values, ndarray[int64_t] start,
                  ndarray[int64_t] end, int64_t minp,
                  float64_t quantile, str interpolation) -> np.ndarray:
    """
    O(N log(window)) implementation using skip list
    """
    cdef:
        Py_ssize_t i, j, s, e, N = len(start), idx
        int ret = 0
        int64_t nobs = 0, win
        float64_t val, idx_with_fraction
        float64_t vlow, vhigh
        skiplist_t *skiplist
        InterpolationType interpolation_type
        ndarray[float64_t] output

    if quantile <= 0.0 or quantile >= 1.0:
        raise ValueError(f"quantile value {quantile} not in [0, 1]")

    try:
        interpolation_type = interpolation_types[interpolation]
    except KeyError:
        raise ValueError(f"Interpolation '{interpolation}' is not supported")

    is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
        start, end
    )
    # we use the Fixed/Variable Indexer here as the
    # actual skiplist ops outweigh any window computation costs
    output = np.empty(N, dtype=np.float64)

    win = (end - start).max()
    if win == 0:
        output[:] = NaN
        return output
    skiplist = skiplist_init(<int>win)
    if skiplist == NULL:
        raise MemoryError("skiplist_init failed")

    with nogil:
        for i in range(0, N):
            s = start[i]
            e = end[i]

            if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
                if i != 0:
                    nobs = 0
                    skiplist_destroy(skiplist)
                    skiplist = skiplist_init(<int>win)

                # setup
                for j in range(s, e):
                    val = values[j]
                    if val == val:
                        nobs += 1
                        skiplist_insert(skiplist, val)

            else:
                # calculate adds
                for j in range(end[i - 1], e):
                    val = values[j]
                    if val == val:
                        nobs += 1
                        skiplist_insert(skiplist, val)

                # calculate deletes
                for j in range(start[i - 1], s):
                    val = values[j]
                    if val == val:
                        skiplist_remove(skiplist, val)
                        nobs -= 1
            if nobs >= minp:
                if nobs == 1:
                    # Single value in skip list
                    output[i] = skiplist_get(skiplist, 0, &ret)
                else:
                    idx_with_fraction = quantile * (nobs - 1)
                    idx = <int>idx_with_fraction

                    if idx_with_fraction == idx:
                        # no need to interpolate
                        output[i] = skiplist_get(skiplist, idx, &ret)
                        continue

                    if interpolation_type == LINEAR:
                        vlow = skiplist_get(skiplist, idx, &ret)
                        vhigh = skiplist_get(skiplist, idx + 1, &ret)
                        output[i] = ((vlow + (vhigh - vlow) *
                                      (idx_with_fraction - idx)))
                    elif interpolation_type == LOWER:
                        output[i] = skiplist_get(skiplist, idx, &ret)
                    elif interpolation_type == HIGHER:
                        output[i] = skiplist_get(skiplist, idx + 1, &ret)
                    elif interpolation_type == NEAREST:
                        # the same behaviour as round()
                        if idx_with_fraction - idx == 0.5:
                            if idx % 2 == 0:
                                output[i] = skiplist_get(skiplist, idx, &ret)
                            else:
                                output[i] = skiplist_get(
                                    skiplist, idx + 1, &ret)
                        elif idx_with_fraction - idx < 0.5:
                            output[i] = skiplist_get(skiplist, idx, &ret)
                        else:
                            output[i] = skiplist_get(skiplist, idx + 1, &ret)
                    elif interpolation_type == MIDPOINT:
                        vlow = skiplist_get(skiplist, idx, &ret)
                        vhigh = skiplist_get(skiplist, idx + 1, &ret)
                        output[i] = <float64_t>(vlow + vhigh) / 2

                    if ret == 0:
                        output[i] = NaN
            else:
                output[i] = NaN

    skiplist_destroy(skiplist)

    return output


rolling_rank_tiebreakers = {
    "average": TiebreakEnumType.TIEBREAK_AVERAGE,
    "min": TiebreakEnumType.TIEBREAK_MIN,
    "max": TiebreakEnumType.TIEBREAK_MAX,
}


def roll_rank(const float64_t[:] values, ndarray[int64_t] start,
              ndarray[int64_t] end, int64_t minp, bint percentile,
              str method, bint ascending) -> np.ndarray:
    """
    O(N log(window)) implementation using skip list

    derived from roll_quantile
    """
    cdef:
        Py_ssize_t i, j, s, e, N = len(start)
        float64_t rank_min = 0, rank = 0
        int64_t nobs = 0, win
        float64_t val
        skiplist_t *skiplist
        float64_t[::1] output
        TiebreakEnumType rank_type

    try:
        rank_type = rolling_rank_tiebreakers[method]
    except KeyError:
        raise ValueError(f"Method '{method}' is not supported")

    is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
        start, end
    )
    # we use the Fixed/Variable Indexer here as the
    # actual skiplist ops outweigh any window computation costs
    output = np.empty(N, dtype=np.float64)

    win = (end - start).max()
    if win == 0:
        output[:] = NaN
        return np.asarray(output)
    skiplist = skiplist_init(<int>win)
    if skiplist == NULL:
        raise MemoryError("skiplist_init failed")

    with nogil:
        for i in range(N):
            s = start[i]
            e = end[i]

            if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1]:
                if i != 0:
                    nobs = 0
                    skiplist_destroy(skiplist)
                    skiplist = skiplist_init(<int>win)

                # setup
                for j in range(s, e):
                    val = values[j] if ascending else -values[j]
                    if val == val:
                        nobs += 1
                        rank = skiplist_insert(skiplist, val)
                        if rank == -1:
                            raise MemoryError("skiplist_insert failed")
                        if rank_type == TiebreakEnumType.TIEBREAK_AVERAGE:
                            # The average rank of `val` is the sum of the ranks of all
                            # instances of `val` in the skip list divided by the number
                            # of instances. The sum of consecutive integers from 1 to N
                            # is N * (N + 1) / 2.
                            # The sum of the ranks is the sum of integers from the
                            # lowest rank to the highest rank, which is the sum of
                            # integers from 1 to the highest rank minus the sum of
                            # integers from 1 to one less than the lowest rank.
                            rank_min = skiplist_min_rank(skiplist, val)
                            rank = (((rank * (rank + 1) / 2)
                                    - ((rank_min - 1) * rank_min / 2))
                                    / (rank - rank_min + 1))
                        elif rank_type == TiebreakEnumType.TIEBREAK_MIN:
                            rank = skiplist_min_rank(skiplist, val)
                    else:
                        rank = NaN

            else:
                # calculate deletes
                for j in range(start[i - 1], s):
                    val = values[j] if ascending else -values[j]
                    if val == val:
                        skiplist_remove(skiplist, val)
                        nobs -= 1

                # calculate adds
                for j in range(end[i - 1], e):
                    val = values[j] if ascending else -values[j]
                    if val == val:
                        nobs += 1
                        rank = skiplist_insert(skiplist, val)
                        if rank == -1:
                            raise MemoryError("skiplist_insert failed")
                        if rank_type == TiebreakEnumType.TIEBREAK_AVERAGE:
                            rank_min = skiplist_min_rank(skiplist, val)
                            rank = (((rank * (rank + 1) / 2)
                                    - ((rank_min - 1) * rank_min / 2))
                                    / (rank - rank_min + 1))
                        elif rank_type == TiebreakEnumType.TIEBREAK_MIN:
                            rank = skiplist_min_rank(skiplist, val)
                    else:
                        rank = NaN
            if nobs >= minp:
                output[i] = rank / nobs if percentile else rank
            else:
                output[i] = NaN

    skiplist_destroy(skiplist)

    return np.asarray(output)


def roll_apply(object obj,
               ndarray[int64_t] start, ndarray[int64_t] end,
               int64_t minp,
               object function, bint raw,
               tuple args, dict kwargs) -> np.ndarray:
    cdef:
        ndarray[float64_t] output, counts
        ndarray[float64_t, cast=True] arr
        Py_ssize_t i, s, e, N = len(start), n = len(obj)

    if n == 0:
        return np.array([], dtype=np.float64)

    arr = np.asarray(obj)

    # ndarray input
    if raw and not arr.flags.c_contiguous:
        arr = arr.copy("C")

    counts = roll_sum(np.isfinite(arr).astype(float), start, end, minp)

    output = np.empty(N, dtype=np.float64)

    for i in range(N):

        s = start[i]
        e = end[i]

        if counts[i] >= minp:
            if raw:
                output[i] = function(arr[s:e], *args, **kwargs)
            else:
                output[i] = function(obj.iloc[s:e], *args, **kwargs)
        else:
            output[i] = NaN

    return output


# ----------------------------------------------------------------------
# Rolling sum and mean for weighted window


def roll_weighted_sum(
    const float64_t[:] values, const float64_t[:] weights, int minp
) -> np.ndarray:
    return _roll_weighted_sum_mean(values, weights, minp, avg=0)


def roll_weighted_mean(
    const float64_t[:] values, const float64_t[:] weights, int minp
) -> np.ndarray:
    return _roll_weighted_sum_mean(values, weights, minp, avg=1)


cdef float64_t[:] _roll_weighted_sum_mean(const float64_t[:] values,
                                          const float64_t[:] weights,
                                          int minp, bint avg):
    """
    Assume len(weights) << len(values)
    """
    cdef:
        float64_t[:] output, tot_wgt, counts
        Py_ssize_t in_i, win_i, win_n, in_n
        float64_t val_in, val_win, c, w

    in_n = len(values)
    win_n = len(weights)

    output = np.zeros(in_n, dtype=np.float64)
    counts = np.zeros(in_n, dtype=np.float64)
    if avg:
        tot_wgt = np.zeros(in_n, dtype=np.float64)

    elif minp > in_n:
        minp = in_n + 1

    minp = max(minp, 1)

    with nogil:
        if avg:
            for win_i in range(win_n):
                val_win = weights[win_i]
                if val_win != val_win:
                    continue

                for in_i in range(in_n - (win_n - win_i) + 1):
                    val_in = values[in_i]
                    if val_in == val_in:
                        output[in_i + (win_n - win_i) - 1] += val_in * val_win
                        counts[in_i + (win_n - win_i) - 1] += 1
                        tot_wgt[in_i + (win_n - win_i) - 1] += val_win

            for in_i in range(in_n):
                c = counts[in_i]
                if c < minp:
                    output[in_i] = NaN
                else:
                    w = tot_wgt[in_i]
                    if w == 0:
                        output[in_i] = NaN
                    else:
                        output[in_i] /= tot_wgt[in_i]

        else:
            for win_i in range(win_n):
                val_win = weights[win_i]
                if val_win != val_win:
                    continue

                for in_i in range(in_n - (win_n - win_i) + 1):
                    val_in = values[in_i]

                    if val_in == val_in:
                        output[in_i + (win_n - win_i) - 1] += val_in * val_win
                        counts[in_i + (win_n - win_i) - 1] += 1

            for in_i in range(in_n):
                c = counts[in_i]
                if c < minp:
                    output[in_i] = NaN

    return output


# ----------------------------------------------------------------------
# Rolling var for weighted window


cdef float64_t calc_weighted_var(float64_t t,
                                 float64_t sum_w,
                                 Py_ssize_t win_n,
                                 unsigned int ddof,
                                 float64_t nobs,
                                 int64_t minp) nogil:
    """
    Calculate weighted variance for a window using West's method.

    Paper: https://dl.acm.org/citation.cfm?id=359153

    Parameters
    ----------
    t: float64_t
        sum of weighted squared differences
    sum_w: float64_t
        sum of weights
    win_n: Py_ssize_t
        window size
    ddof: unsigned int
        delta degrees of freedom
    nobs: float64_t
        number of observations
    minp: int64_t
        minimum number of observations

    Returns
    -------
    result : float64_t
        weighted variance of the window
    """

    cdef:
        float64_t result

    # Variance is unchanged if no observation is added or removed
    if (nobs >= minp) and (nobs > ddof):

        # pathological case
        if nobs == 1:
            result = 0
        else:
            result = t * win_n / ((win_n - ddof) * sum_w)
            if result < 0:
                result = 0
    else:
        result = NaN

    return result


cdef void add_weighted_var(float64_t val,
                           float64_t w,
                           float64_t *t,
                           float64_t *sum_w,
                           float64_t *mean,
                           float64_t *nobs) nogil:
    """
    Update weighted mean, sum of weights and sum of weighted squared
    differences to include value and weight pair in weighted variance
    calculation using West's method.

    Paper: https://dl.acm.org/citation.cfm?id=359153

    Parameters
    ----------
    val: float64_t
        window values
    w: float64_t
        window weights
    t: float64_t
        sum of weighted squared differences
    sum_w: float64_t
        sum of weights
    mean: float64_t
        weighted mean
    nobs: float64_t
        number of observations
    """

    cdef:
        float64_t temp, q, r

    if val != val:
        return

    nobs[0] = nobs[0] + 1

    q = val - mean[0]
    temp = sum_w[0] + w
    r = q * w / temp

    mean[0] = mean[0] + r
    t[0] = t[0] + r * sum_w[0] * q
    sum_w[0] = temp


cdef void remove_weighted_var(float64_t val,
                              float64_t w,
                              float64_t *t,
                              float64_t *sum_w,
                              float64_t *mean,
                              float64_t *nobs) nogil:
    """
    Update weighted mean, sum of weights and sum of weighted squared
    differences to remove value and weight pair from weighted variance
    calculation using West's method.

    Paper: https://dl.acm.org/citation.cfm?id=359153

    Parameters
    ----------
    val: float64_t
        window values
    w: float64_t
        window weights
    t: float64_t
        sum of weighted squared differences
    sum_w: float64_t
        sum of weights
    mean: float64_t
        weighted mean
    nobs: float64_t
        number of observations
    """

    cdef:
        float64_t temp, q, r

    if val == val:
        nobs[0] = nobs[0] - 1

        if nobs[0]:
            q = val - mean[0]
            temp = sum_w[0] - w
            r = q * w / temp

            mean[0] = mean[0] - r
            t[0] = t[0] - r * sum_w[0] * q
            sum_w[0] = temp

        else:
            t[0] = 0
            sum_w[0] = 0
            mean[0] = 0


def roll_weighted_var(const float64_t[:] values, const float64_t[:] weights,
                      int64_t minp, unsigned int ddof):
    """
    Calculates weighted rolling variance using West's online algorithm.

    Paper: https://dl.acm.org/citation.cfm?id=359153

    Parameters
    ----------
    values: float64_t[:]
        values to roll window over
    weights: float64_t[:]
        array of weights whose length is window size
    minp: int64_t
        minimum number of observations to calculate
        variance of a window
    ddof: unsigned int
         the divisor used in variance calculations
         is the window size - ddof

    Returns
    -------
    output: float64_t[:]
        weighted variances of windows
    """

    cdef:
        float64_t t = 0, sum_w = 0, mean = 0, nobs = 0
        float64_t val, pre_val, w, pre_w
        Py_ssize_t i, n, win_n
        float64_t[:] output

    n = len(values)
    win_n = len(weights)
    output = np.empty(n, dtype=np.float64)

    with nogil:

        for i in range(min(win_n, n)):
            add_weighted_var(values[i], weights[i], &t,
                             &sum_w, &mean, &nobs)

            output[i] = calc_weighted_var(t, sum_w, win_n,
                                          ddof, nobs, minp)

        for i in range(win_n, n):
            val = values[i]
            pre_val = values[i - win_n]

            w = weights[i % win_n]
            pre_w = weights[(i - win_n) % win_n]

            if val == val:
                if pre_val == pre_val:
                    remove_weighted_var(pre_val, pre_w, &t,
                                        &sum_w, &mean, &nobs)

                add_weighted_var(val, w, &t, &sum_w, &mean, &nobs)

            elif pre_val == pre_val:
                remove_weighted_var(pre_val, pre_w, &t,
                                    &sum_w, &mean, &nobs)

            output[i] = calc_weighted_var(t, sum_w, win_n,
                                          ddof, nobs, minp)

    return output


# ----------------------------------------------------------------------
# Exponentially weighted moving
@cython.cpow(True)
def ewm(const float64_t[:] vals, const int64_t[:] start, const int64_t[:] end,
        int minp, float64_t com, bint adjust, bint ignore_na,
        const float64_t[:] deltas=None, bint normalize=True) -> np.ndarray:
    """
    Compute exponentially-weighted moving average or sum using center-of-mass.

    Parameters
    ----------
    vals : ndarray (float64 type)
    start: ndarray (int64 type)
    end: ndarray (int64 type)
    minp : int
    com : float64
    adjust : bool
    ignore_na : bool
    deltas : ndarray (float64 type), optional. If None, implicitly assumes equally
             spaced points (used when `times` is not passed)
    normalize : bool, optional.
                If True, calculate the mean. If False, calculate the sum.

    Returns
    -------
    np.ndarray[float64_t]
    """

    cdef:
        Py_ssize_t i, j, s, e, nobs, win_size, N = len(vals), M = len(start)
        const float64_t[:] sub_vals
        const float64_t[:] sub_deltas=None
        ndarray[float64_t] sub_output, output = np.empty(N, dtype=np.float64)
        float64_t alpha, old_wt_factor, new_wt, weighted, old_wt, cur
        bint is_observation, use_deltas

    if N == 0:
        return output

    use_deltas = deltas is not None

    alpha = 1. / (1. + com)
    old_wt_factor = 1. - alpha
    new_wt = 1. if adjust else alpha

    for j in range(M):
        s = start[j]
        e = end[j]
        sub_vals = vals[s:e]
        # note that len(deltas) = len(vals) - 1 and deltas[i] is to be used in
        # conjunction with vals[i+1]
        if use_deltas:
            sub_deltas = deltas[s:e - 1]
        win_size = len(sub_vals)
        sub_output = np.empty(win_size, dtype=np.float64)

        weighted = sub_vals[0]
        is_observation = weighted == weighted
        nobs = int(is_observation)
        sub_output[0] = weighted if nobs >= minp else NaN
        old_wt = 1.

        with nogil:
            for i in range(1, win_size):
                cur = sub_vals[i]
                is_observation = cur == cur
                nobs += is_observation
                if weighted == weighted:

                    if is_observation or not ignore_na:
                        if normalize:
                            if use_deltas:
                                old_wt *= old_wt_factor ** sub_deltas[i - 1]
                            else:
                                old_wt *= old_wt_factor
                        else:
                            weighted = old_wt_factor * weighted
                        if is_observation:
                            if normalize:
                                # avoid numerical errors on constant series
                                if weighted != cur:
                                    weighted = old_wt * weighted + new_wt * cur
                                    weighted /= (old_wt + new_wt)
                                if adjust:
                                    old_wt += new_wt
                                else:
                                    old_wt = 1.
                            else:
                                weighted += cur
                elif is_observation:
                    weighted = cur

                sub_output[i] = weighted if nobs >= minp else NaN

        output[s:e] = sub_output

    return output


def ewmcov(const float64_t[:] input_x, const int64_t[:] start, const int64_t[:] end,
           int minp, const float64_t[:] input_y, float64_t com, bint adjust,
           bint ignore_na, bint bias) -> np.ndarray:
    """
    Compute exponentially-weighted moving variance using center-of-mass.

    Parameters
    ----------
    input_x : ndarray (float64 type)
    start: ndarray (int64 type)
    end: ndarray (int64 type)
    minp : int
    input_y : ndarray (float64 type)
    com : float64
    adjust : bool
    ignore_na : bool
    bias : bool

    Returns
    -------
    np.ndarray[float64_t]
    """

    cdef:
        Py_ssize_t i, j, s, e, win_size, nobs
        Py_ssize_t N = len(input_x), M = len(input_y), L = len(start)
        float64_t alpha, old_wt_factor, new_wt, mean_x, mean_y, cov
        float64_t sum_wt, sum_wt2, old_wt, cur_x, cur_y, old_mean_x, old_mean_y
        float64_t numerator, denominator
        const float64_t[:] sub_x_vals, sub_y_vals
        ndarray[float64_t] sub_out, output = np.empty(N, dtype=np.float64)
        bint is_observation

    if M != N:
        raise ValueError(f"arrays are of different lengths ({N} and {M})")

    if N == 0:
        return output

    alpha = 1. / (1. + com)
    old_wt_factor = 1. - alpha
    new_wt = 1. if adjust else alpha

    for j in range(L):
        s = start[j]
        e = end[j]
        sub_x_vals = input_x[s:e]
        sub_y_vals = input_y[s:e]
        win_size = len(sub_x_vals)
        sub_out = np.empty(win_size, dtype=np.float64)

        mean_x = sub_x_vals[0]
        mean_y = sub_y_vals[0]
        is_observation = (mean_x == mean_x) and (mean_y == mean_y)
        nobs = int(is_observation)
        if not is_observation:
            mean_x = NaN
            mean_y = NaN
        sub_out[0] = (0. if bias else NaN) if nobs >= minp else NaN
        cov = 0.
        sum_wt = 1.
        sum_wt2 = 1.
        old_wt = 1.

        with nogil:
            for i in range(1, win_size):
                cur_x = sub_x_vals[i]
                cur_y = sub_y_vals[i]
                is_observation = (cur_x == cur_x) and (cur_y == cur_y)
                nobs += is_observation
                if mean_x == mean_x:
                    if is_observation or not ignore_na:
                        sum_wt *= old_wt_factor
                        sum_wt2 *= (old_wt_factor * old_wt_factor)
                        old_wt *= old_wt_factor
                        if is_observation:
                            old_mean_x = mean_x
                            old_mean_y = mean_y

                            # avoid numerical errors on constant series
                            if mean_x != cur_x:
                                mean_x = ((old_wt * old_mean_x) +
                                          (new_wt * cur_x)) / (old_wt + new_wt)

                            # avoid numerical errors on constant series
                            if mean_y != cur_y:
                                mean_y = ((old_wt * old_mean_y) +
                                          (new_wt * cur_y)) / (old_wt + new_wt)
                            cov = ((old_wt * (cov + ((old_mean_x - mean_x) *
                                                     (old_mean_y - mean_y)))) +
                                   (new_wt * ((cur_x - mean_x) *
                                              (cur_y - mean_y)))) / (old_wt + new_wt)
                            sum_wt += new_wt
                            sum_wt2 += (new_wt * new_wt)
                            old_wt += new_wt
                            if not adjust:
                                sum_wt /= old_wt
                                sum_wt2 /= (old_wt * old_wt)
                                old_wt = 1.
                elif is_observation:
                    mean_x = cur_x
                    mean_y = cur_y

                if nobs >= minp:
                    if not bias:
                        numerator = sum_wt * sum_wt
                        denominator = numerator - sum_wt2
                        if denominator > 0:
                            sub_out[i] = (numerator / denominator) * cov
                        else:
                            sub_out[i] = NaN
                    else:
                        sub_out[i] = cov
                else:
                    sub_out[i] = NaN

        output[s:e] = sub_out

    return output