3RNN/Lib/site-packages/sklearn/tree/_utils.pyx

# Authors: Gilles Louppe <g.louppe@gmail.com>
#          Peter Prettenhofer <peter.prettenhofer@gmail.com>
#          Arnaud Joly <arnaud.v.joly@gmail.com>
#          Jacob Schreiber <jmschreiber91@gmail.com>
#          Nelson Liu <nelson@nelsonliu.me>
#
#
# License: BSD 3 clause

from libc.stdlib cimport free
from libc.stdlib cimport realloc
from libc.math cimport log as ln
from libc.math cimport isnan

import numpy as np
cimport numpy as cnp
cnp.import_array()

from ..utils._random cimport our_rand_r

# =============================================================================
# Helper functions
# =============================================================================

cdef int safe_realloc(realloc_ptr* p, size_t nelems) except -1 nogil:
    # sizeof(realloc_ptr[0]) would be more like idiomatic C, but causes Cython
    # 0.20.1 to crash.
    cdef size_t nbytes = nelems * sizeof(p[0][0])
    if nbytes / sizeof(p[0][0]) != nelems:
        # Overflow in the multiplication
        raise MemoryError(f"could not allocate ({nelems} * {sizeof(p[0][0])}) bytes")

    cdef realloc_ptr tmp = <realloc_ptr>realloc(p[0], nbytes)
    if tmp == NULL:
        raise MemoryError(f"could not allocate {nbytes} bytes")

    p[0] = tmp
    return 0


def _realloc_test():
    # Helper for tests. Tries to allocate <size_t>(-1) / 2 * sizeof(size_t)
    # bytes, which will always overflow.
    cdef intp_t* p = NULL
    safe_realloc(&p, <size_t>(-1) / 2)
    if p != NULL:
        free(p)
        assert False


cdef inline cnp.ndarray sizet_ptr_to_ndarray(intp_t* data, intp_t size):
    """Return copied data as 1D numpy array of intp's."""
    cdef cnp.npy_intp shape[1]
    shape[0] = <cnp.npy_intp> size
    return cnp.PyArray_SimpleNewFromData(1, shape, cnp.NPY_INTP, data).copy()


cdef inline intp_t rand_int(intp_t low, intp_t high,
                            uint32_t* random_state) noexcept nogil:
    """Generate a random integer in [low; end)."""
    return low + our_rand_r(random_state) % (high - low)


cdef inline float64_t rand_uniform(float64_t low, float64_t high,
                                   uint32_t* random_state) noexcept nogil:
    """Generate a random float64_t in [low; high)."""
    return ((high - low) * <float64_t> our_rand_r(random_state) /
            <float64_t> RAND_R_MAX) + low


cdef inline float64_t log(float64_t x) noexcept nogil:
    return ln(x) / ln(2.0)

# =============================================================================
# WeightedPQueue data structure
# =============================================================================

cdef class WeightedPQueue:
    """A priority queue class, always sorted in increasing order.

    Attributes
    ----------
    capacity : intp_t
        The capacity of the priority queue.

    array_ptr : intp_t
        The water mark of the priority queue; the priority queue grows from
        left to right in the array ``array_``. ``array_ptr`` is always
        less than ``capacity``.

    array_ : WeightedPQueueRecord*
        The array of priority queue records. The minimum element is on the
        left at index 0, and the maximum element is on the right at index
        ``array_ptr-1``.
    """

    def __cinit__(self, intp_t capacity):
        self.capacity = capacity
        self.array_ptr = 0
        safe_realloc(&self.array_, capacity)

    def __dealloc__(self):
        free(self.array_)

    cdef int reset(self) except -1 nogil:
        """Reset the WeightedPQueue to its state at construction

        Return -1 in case of failure to allocate memory (and raise MemoryError)
        or 0 otherwise.
        """
        self.array_ptr = 0
        # Since safe_realloc can raise MemoryError, use `except -1`
        safe_realloc(&self.array_, self.capacity)
        return 0

    cdef bint is_empty(self) noexcept nogil:
        return self.array_ptr <= 0

    cdef intp_t size(self) noexcept nogil:
        return self.array_ptr

    cdef int push(self, float64_t data, float64_t weight) except -1 nogil:
        """Push record on the array.

        Return -1 in case of failure to allocate memory (and raise MemoryError)
        or 0 otherwise.
        """
        cdef intp_t array_ptr = self.array_ptr
        cdef WeightedPQueueRecord* array = NULL
        cdef intp_t i

        # Resize if capacity not sufficient
        if array_ptr >= self.capacity:
            self.capacity *= 2
            # Since safe_realloc can raise MemoryError, use `except -1`
            safe_realloc(&self.array_, self.capacity)

        # Put element as last element of array
        array = self.array_
        array[array_ptr].data = data
        array[array_ptr].weight = weight

        # bubble last element up according until it is sorted
        # in ascending order
        i = array_ptr
        while(i != 0 and array[i].data < array[i-1].data):
            array[i], array[i-1] = array[i-1], array[i]
            i -= 1

        # Increase element count
        self.array_ptr = array_ptr + 1
        return 0

    cdef int remove(self, float64_t data, float64_t weight) noexcept nogil:
        """Remove a specific value/weight record from the array.
        Returns 0 if successful, -1 if record not found."""
        cdef intp_t array_ptr = self.array_ptr
        cdef WeightedPQueueRecord* array = self.array_
        cdef intp_t idx_to_remove = -1
        cdef intp_t i

        if array_ptr <= 0:
            return -1

        # find element to remove
        for i in range(array_ptr):
            if array[i].data == data and array[i].weight == weight:
                idx_to_remove = i
                break

        if idx_to_remove == -1:
            return -1

        # shift the elements after the removed element
        # to the left.
        for i in range(idx_to_remove, array_ptr-1):
            array[i] = array[i+1]

        self.array_ptr = array_ptr - 1
        return 0

    cdef int pop(self, float64_t* data, float64_t* weight) noexcept nogil:
        """Remove the top (minimum) element from array.
        Returns 0 if successful, -1 if nothing to remove."""
        cdef intp_t array_ptr = self.array_ptr
        cdef WeightedPQueueRecord* array = self.array_
        cdef intp_t i

        if array_ptr <= 0:
            return -1

        data[0] = array[0].data
        weight[0] = array[0].weight

        # shift the elements after the removed element
        # to the left.
        for i in range(0, array_ptr-1):
            array[i] = array[i+1]

        self.array_ptr = array_ptr - 1
        return 0

    cdef int peek(self, float64_t* data, float64_t* weight) noexcept nogil:
        """Write the top element from array to a pointer.
        Returns 0 if successful, -1 if nothing to write."""
        cdef WeightedPQueueRecord* array = self.array_
        if self.array_ptr <= 0:
            return -1
        # Take first value
        data[0] = array[0].data
        weight[0] = array[0].weight
        return 0

    cdef float64_t get_weight_from_index(self, intp_t index) noexcept nogil:
        """Given an index between [0,self.current_capacity], access
        the appropriate heap and return the requested weight"""
        cdef WeightedPQueueRecord* array = self.array_

        # get weight at index
        return array[index].weight

    cdef float64_t get_value_from_index(self, intp_t index) noexcept nogil:
        """Given an index between [0,self.current_capacity], access
        the appropriate heap and return the requested value"""
        cdef WeightedPQueueRecord* array = self.array_

        # get value at index
        return array[index].data

# =============================================================================
# WeightedMedianCalculator data structure
# =============================================================================

cdef class WeightedMedianCalculator:
    """A class to handle calculation of the weighted median from streams of
    data. To do so, it maintains a parameter ``k`` such that the sum of the
    weights in the range [0,k) is greater than or equal to half of the total
    weight. By minimizing the value of ``k`` that fulfills this constraint,
    calculating the median is done by either taking the value of the sample
    at index ``k-1`` of ``samples`` (samples[k-1].data) or the average of
    the samples at index ``k-1`` and ``k`` of ``samples``
    ((samples[k-1] + samples[k]) / 2).

    Attributes
    ----------
    initial_capacity : intp_t
        The initial capacity of the WeightedMedianCalculator.

    samples : WeightedPQueue
        Holds the samples (consisting of values and their weights) used in the
        weighted median calculation.

    total_weight : float64_t
        The sum of the weights of items in ``samples``. Represents the total
        weight of all samples used in the median calculation.

    k : intp_t
        Index used to calculate the median.

    sum_w_0_k : float64_t
        The sum of the weights from samples[0:k]. Used in the weighted
        median calculation; minimizing the value of ``k`` such that
        ``sum_w_0_k`` >= ``total_weight / 2`` provides a mechanism for
        calculating the median in constant time.

    """

    def __cinit__(self, intp_t initial_capacity):
        self.initial_capacity = initial_capacity
        self.samples = WeightedPQueue(initial_capacity)
        self.total_weight = 0
        self.k = 0
        self.sum_w_0_k = 0

    cdef intp_t size(self) noexcept nogil:
        """Return the number of samples in the
        WeightedMedianCalculator"""
        return self.samples.size()

    cdef int reset(self) except -1 nogil:
        """Reset the WeightedMedianCalculator to its state at construction

        Return -1 in case of failure to allocate memory (and raise MemoryError)
        or 0 otherwise.
        """
        # samples.reset (WeightedPQueue.reset) uses safe_realloc, hence
        # except -1
        self.samples.reset()
        self.total_weight = 0
        self.k = 0
        self.sum_w_0_k = 0
        return 0

    cdef int push(self, float64_t data, float64_t weight) except -1 nogil:
        """Push a value and its associated weight to the WeightedMedianCalculator

        Return -1 in case of failure to allocate memory (and raise MemoryError)
        or 0 otherwise.
        """
        cdef int return_value
        cdef float64_t original_median = 0.0

        if self.size() != 0:
            original_median = self.get_median()
        # samples.push (WeightedPQueue.push) uses safe_realloc, hence except -1
        return_value = self.samples.push(data, weight)
        self.update_median_parameters_post_push(data, weight,
                                                original_median)
        return return_value

    cdef int update_median_parameters_post_push(
            self, float64_t data, float64_t weight,
            float64_t original_median) noexcept nogil:
        """Update the parameters used in the median calculation,
        namely `k` and `sum_w_0_k` after an insertion"""

        # trivial case of one element.
        if self.size() == 1:
            self.k = 1
            self.total_weight = weight
            self.sum_w_0_k = self.total_weight
            return 0

        # get the original weighted median
        self.total_weight += weight

        if data < original_median:
            # inserting below the median, so increment k and
            # then update self.sum_w_0_k accordingly by adding
            # the weight that was added.
            self.k += 1
            # update sum_w_0_k by adding the weight added
            self.sum_w_0_k += weight

            # minimize k such that sum(W[0:k]) >= total_weight / 2
            # minimum value of k is 1
            while(self.k > 1 and ((self.sum_w_0_k -
                                   self.samples.get_weight_from_index(self.k-1))
                                  >= self.total_weight / 2.0)):
                self.k -= 1
                self.sum_w_0_k -= self.samples.get_weight_from_index(self.k)
            return 0

        if data >= original_median:
            # inserting above or at the median
            # minimize k such that sum(W[0:k]) >= total_weight / 2
            while(self.k < self.samples.size() and
                  (self.sum_w_0_k < self.total_weight / 2.0)):
                self.k += 1
                self.sum_w_0_k += self.samples.get_weight_from_index(self.k-1)
            return 0

    cdef int remove(self, float64_t data, float64_t weight) noexcept nogil:
        """Remove a value from the MedianHeap, removing it
        from consideration in the median calculation
        """
        cdef int return_value
        cdef float64_t original_median = 0.0

        if self.size() != 0:
            original_median = self.get_median()

        return_value = self.samples.remove(data, weight)
        self.update_median_parameters_post_remove(data, weight,
                                                  original_median)
        return return_value

    cdef int pop(self, float64_t* data, float64_t* weight) noexcept nogil:
        """Pop a value from the MedianHeap, starting from the
        left and moving to the right.
        """
        cdef int return_value
        cdef float64_t original_median = 0.0

        if self.size() != 0:
            original_median = self.get_median()

        # no elements to pop
        if self.samples.size() == 0:
            return -1

        return_value = self.samples.pop(data, weight)
        self.update_median_parameters_post_remove(data[0],
                                                  weight[0],
                                                  original_median)
        return return_value

    cdef int update_median_parameters_post_remove(
            self, float64_t data, float64_t weight,
            float64_t original_median) noexcept nogil:
        """Update the parameters used in the median calculation,
        namely `k` and `sum_w_0_k` after a removal"""
        # reset parameters because it there are no elements
        if self.samples.size() == 0:
            self.k = 0
            self.total_weight = 0
            self.sum_w_0_k = 0
            return 0

        # trivial case of one element.
        if self.samples.size() == 1:
            self.k = 1
            self.total_weight -= weight
            self.sum_w_0_k = self.total_weight
            return 0

        # get the current weighted median
        self.total_weight -= weight

        if data < original_median:
            # removing below the median, so decrement k and
            # then update self.sum_w_0_k accordingly by subtracting
            # the removed weight

            self.k -= 1
            # update sum_w_0_k by removing the weight at index k
            self.sum_w_0_k -= weight

            # minimize k such that sum(W[0:k]) >= total_weight / 2
            # by incrementing k and updating sum_w_0_k accordingly
            # until the condition is met.
            while(self.k < self.samples.size() and
                  (self.sum_w_0_k < self.total_weight / 2.0)):
                self.k += 1
                self.sum_w_0_k += self.samples.get_weight_from_index(self.k-1)
            return 0

        if data >= original_median:
            # removing above the median
            # minimize k such that sum(W[0:k]) >= total_weight / 2
            while(self.k > 1 and ((self.sum_w_0_k -
                                   self.samples.get_weight_from_index(self.k-1))
                                  >= self.total_weight / 2.0)):
                self.k -= 1
                self.sum_w_0_k -= self.samples.get_weight_from_index(self.k)
            return 0

    cdef float64_t get_median(self) noexcept nogil:
        """Write the median to a pointer, taking into account
        sample weights."""
        if self.sum_w_0_k == (self.total_weight / 2.0):
            # split median
            return (self.samples.get_value_from_index(self.k) +
                    self.samples.get_value_from_index(self.k-1)) / 2.0
        if self.sum_w_0_k > (self.total_weight / 2.0):
            # whole median
            return self.samples.get_value_from_index(self.k-1)


def _any_isnan_axis0(const float32_t[:, :] X):
    """Same as np.any(np.isnan(X), axis=0)"""
    cdef:
        intp_t i, j
        intp_t n_samples = X.shape[0]
        intp_t n_features = X.shape[1]
        unsigned char[::1] isnan_out = np.zeros(X.shape[1], dtype=np.bool_)

    with nogil:
        for i in range(n_samples):
            for j in range(n_features):
                if isnan_out[j]:
                    continue
                if isnan(X[i, j]):
                    isnan_out[j] = True
                    break
    return np.asarray(isnan_out)
1.0 2024-05-26 19:49:15 +02:00			`# Authors: Gilles Louppe <g.louppe@gmail.com>`
			`# Peter Prettenhofer <peter.prettenhofer@gmail.com>`
			`# Arnaud Joly <arnaud.v.joly@gmail.com>`
			`# Jacob Schreiber <jmschreiber91@gmail.com>`
			`# Nelson Liu <nelson@nelsonliu.me>`
			`#`
			`#`
			`# License: BSD 3 clause`

			`from libc.stdlib cimport free`
			`from libc.stdlib cimport realloc`
			`from libc.math cimport log as ln`
			`from libc.math cimport isnan`

			`import numpy as np`
			`cimport numpy as cnp`
			`cnp.import_array()`

			`from ..utils._random cimport our_rand_r`

			`# =============================================================================`
			`# Helper functions`
			`# =============================================================================`

			`cdef int safe_realloc(realloc_ptr* p, size_t nelems) except -1 nogil:`
			`# sizeof(realloc_ptr[0]) would be more like idiomatic C, but causes Cython`
			`# 0.20.1 to crash.`
			`cdef size_t nbytes = nelems * sizeof(p[0][0])`
			`if nbytes / sizeof(p[0][0]) != nelems:`
			`# Overflow in the multiplication`
			`raise MemoryError(f"could not allocate ({nelems} * {sizeof(p[0][0])}) bytes")`

			`cdef realloc_ptr tmp = <realloc_ptr>realloc(p[0], nbytes)`
			`if tmp == NULL:`
			`raise MemoryError(f"could not allocate {nbytes} bytes")`

			`p[0] = tmp`
			`return 0`


			`def _realloc_test():`
			`# Helper for tests. Tries to allocate <size_t>(-1) / 2 * sizeof(size_t)`
			`# bytes, which will always overflow.`
			`cdef intp_t* p = NULL`
			`safe_realloc(&p, <size_t>(-1) / 2)`
			`if p != NULL:`
			`free(p)`
			`assert False`


			`cdef inline cnp.ndarray sizet_ptr_to_ndarray(intp_t* data, intp_t size):`
			`"""Return copied data as 1D numpy array of intp's."""`
			`cdef cnp.npy_intp shape[1]`
			`shape[0] = <cnp.npy_intp> size`
			`return cnp.PyArray_SimpleNewFromData(1, shape, cnp.NPY_INTP, data).copy()`


			`cdef inline intp_t rand_int(intp_t low, intp_t high,`
			`uint32_t* random_state) noexcept nogil:`
			`"""Generate a random integer in [low; end)."""`
			`return low + our_rand_r(random_state) % (high - low)`


			`cdef inline float64_t rand_uniform(float64_t low, float64_t high,`
			`uint32_t* random_state) noexcept nogil:`
			`"""Generate a random float64_t in [low; high)."""`
			`return ((high - low) * <float64_t> our_rand_r(random_state) /`
			`<float64_t> RAND_R_MAX) + low`


			`cdef inline float64_t log(float64_t x) noexcept nogil:`
			`return ln(x) / ln(2.0)`

			`# =============================================================================`
			`# WeightedPQueue data structure`
			`# =============================================================================`

			`cdef class WeightedPQueue:`
			`"""A priority queue class, always sorted in increasing order.`

			`Attributes`
			`----------`
			`capacity : intp_t`
			`The capacity of the priority queue.`

			`array_ptr : intp_t`
			`The water mark of the priority queue; the priority queue grows from`
			left to right in the array ``array_``. ``array_ptr`` is always
			less than ``capacity``.

			`array_ : WeightedPQueueRecord*`
			`The array of priority queue records. The minimum element is on the`
			`left at index 0, and the maximum element is on the right at index`
			``array_ptr-1``.
			`"""`

			`def __cinit__(self, intp_t capacity):`
			`self.capacity = capacity`
			`self.array_ptr = 0`
			`safe_realloc(&self.array_, capacity)`

			`def __dealloc__(self):`
			`free(self.array_)`

			`cdef int reset(self) except -1 nogil:`
			`"""Reset the WeightedPQueue to its state at construction`

			`Return -1 in case of failure to allocate memory (and raise MemoryError)`
			`or 0 otherwise.`
			`"""`
			`self.array_ptr = 0`
			# Since safe_realloc can raise MemoryError, use `except -1`
			`safe_realloc(&self.array_, self.capacity)`
			`return 0`

			`cdef bint is_empty(self) noexcept nogil:`
			`return self.array_ptr <= 0`

			`cdef intp_t size(self) noexcept nogil:`
			`return self.array_ptr`

			`cdef int push(self, float64_t data, float64_t weight) except -1 nogil:`
			`"""Push record on the array.`

			`Return -1 in case of failure to allocate memory (and raise MemoryError)`
			`or 0 otherwise.`
			`"""`
			`cdef intp_t array_ptr = self.array_ptr`
			`cdef WeightedPQueueRecord* array = NULL`
			`cdef intp_t i`

			`# Resize if capacity not sufficient`
			`if array_ptr >= self.capacity:`
			`self.capacity *= 2`
			# Since safe_realloc can raise MemoryError, use `except -1`
			`safe_realloc(&self.array_, self.capacity)`

			`# Put element as last element of array`
			`array = self.array_`
			`array[array_ptr].data = data`
			`array[array_ptr].weight = weight`

			`# bubble last element up according until it is sorted`
			`# in ascending order`
			`i = array_ptr`
			`while(i != 0 and array[i].data < array[i-1].data):`
			`array[i], array[i-1] = array[i-1], array[i]`
			`i -= 1`

			`# Increase element count`
			`self.array_ptr = array_ptr + 1`
			`return 0`

			`cdef int remove(self, float64_t data, float64_t weight) noexcept nogil:`
			`"""Remove a specific value/weight record from the array.`
			`Returns 0 if successful, -1 if record not found."""`
			`cdef intp_t array_ptr = self.array_ptr`
			`cdef WeightedPQueueRecord* array = self.array_`
			`cdef intp_t idx_to_remove = -1`
			`cdef intp_t i`

			`if array_ptr <= 0:`
			`return -1`

			`# find element to remove`
			`for i in range(array_ptr):`
			`if array[i].data == data and array[i].weight == weight:`
			`idx_to_remove = i`
			`break`

			`if idx_to_remove == -1:`
			`return -1`

			`# shift the elements after the removed element`
			`# to the left.`
			`for i in range(idx_to_remove, array_ptr-1):`
			`array[i] = array[i+1]`

			`self.array_ptr = array_ptr - 1`
			`return 0`

			`cdef int pop(self, float64_t* data, float64_t* weight) noexcept nogil:`
			`"""Remove the top (minimum) element from array.`
			`Returns 0 if successful, -1 if nothing to remove."""`
			`cdef intp_t array_ptr = self.array_ptr`
			`cdef WeightedPQueueRecord* array = self.array_`
			`cdef intp_t i`

			`if array_ptr <= 0:`
			`return -1`

			`data[0] = array[0].data`
			`weight[0] = array[0].weight`

			`# shift the elements after the removed element`
			`# to the left.`
			`for i in range(0, array_ptr-1):`
			`array[i] = array[i+1]`

			`self.array_ptr = array_ptr - 1`
			`return 0`

			`cdef int peek(self, float64_t* data, float64_t* weight) noexcept nogil:`
			`"""Write the top element from array to a pointer.`
			`Returns 0 if successful, -1 if nothing to write."""`
			`cdef WeightedPQueueRecord* array = self.array_`
			`if self.array_ptr <= 0:`
			`return -1`
			`# Take first value`
			`data[0] = array[0].data`
			`weight[0] = array[0].weight`
			`return 0`

			`cdef float64_t get_weight_from_index(self, intp_t index) noexcept nogil:`
			`"""Given an index between [0,self.current_capacity], access`
			`the appropriate heap and return the requested weight"""`
			`cdef WeightedPQueueRecord* array = self.array_`

			`# get weight at index`
			`return array[index].weight`

			`cdef float64_t get_value_from_index(self, intp_t index) noexcept nogil:`
			`"""Given an index between [0,self.current_capacity], access`
			`the appropriate heap and return the requested value"""`
			`cdef WeightedPQueueRecord* array = self.array_`

			`# get value at index`
			`return array[index].data`

			`# =============================================================================`
			`# WeightedMedianCalculator data structure`
			`# =============================================================================`

			`cdef class WeightedMedianCalculator:`
			`"""A class to handle calculation of the weighted median from streams of`
			data. To do so, it maintains a parameter ``k`` such that the sum of the
			`weights in the range [0,k) is greater than or equal to half of the total`
			weight. By minimizing the value of ``k`` that fulfills this constraint,
			`calculating the median is done by either taking the value of the sample`
			at index ``k-1`` of ``samples`` (samples[k-1].data) or the average of
			the samples at index ``k-1`` and ``k`` of ``samples``
			`((samples[k-1] + samples[k]) / 2).`

			`Attributes`
			`----------`
			`initial_capacity : intp_t`
			`The initial capacity of the WeightedMedianCalculator.`

			`samples : WeightedPQueue`
			`Holds the samples (consisting of values and their weights) used in the`
			`weighted median calculation.`

			`total_weight : float64_t`
			The sum of the weights of items in ``samples``. Represents the total
			`weight of all samples used in the median calculation.`

			`k : intp_t`
			`Index used to calculate the median.`

			`sum_w_0_k : float64_t`
			`The sum of the weights from samples[0:k]. Used in the weighted`
			median calculation; minimizing the value of ``k`` such that
			``sum_w_0_k`` >= ``total_weight / 2`` provides a mechanism for
			`calculating the median in constant time.`

			`"""`

			`def __cinit__(self, intp_t initial_capacity):`
			`self.initial_capacity = initial_capacity`
			`self.samples = WeightedPQueue(initial_capacity)`
			`self.total_weight = 0`
			`self.k = 0`
			`self.sum_w_0_k = 0`

			`cdef intp_t size(self) noexcept nogil:`
			`"""Return the number of samples in the`
			`WeightedMedianCalculator"""`
			`return self.samples.size()`

			`cdef int reset(self) except -1 nogil:`
			`"""Reset the WeightedMedianCalculator to its state at construction`

			`Return -1 in case of failure to allocate memory (and raise MemoryError)`
			`or 0 otherwise.`
			`"""`
			`# samples.reset (WeightedPQueue.reset) uses safe_realloc, hence`
			`# except -1`
			`self.samples.reset()`
			`self.total_weight = 0`
			`self.k = 0`
			`self.sum_w_0_k = 0`
			`return 0`

			`cdef int push(self, float64_t data, float64_t weight) except -1 nogil:`
			`"""Push a value and its associated weight to the WeightedMedianCalculator`

			`Return -1 in case of failure to allocate memory (and raise MemoryError)`
			`or 0 otherwise.`
			`"""`
			`cdef int return_value`
			`cdef float64_t original_median = 0.0`

			`if self.size() != 0:`
			`original_median = self.get_median()`
			`# samples.push (WeightedPQueue.push) uses safe_realloc, hence except -1`
			`return_value = self.samples.push(data, weight)`
			`self.update_median_parameters_post_push(data, weight,`
			`original_median)`
			`return return_value`

			`cdef int update_median_parameters_post_push(`
			`self, float64_t data, float64_t weight,`
			`float64_t original_median) noexcept nogil:`
			`"""Update the parameters used in the median calculation,`
			namely `k` and `sum_w_0_k` after an insertion"""

			`# trivial case of one element.`
			`if self.size() == 1:`
			`self.k = 1`
			`self.total_weight = weight`
			`self.sum_w_0_k = self.total_weight`
			`return 0`

			`# get the original weighted median`
			`self.total_weight += weight`

			`if data < original_median:`
			`# inserting below the median, so increment k and`
			`# then update self.sum_w_0_k accordingly by adding`
			`# the weight that was added.`
			`self.k += 1`
			`# update sum_w_0_k by adding the weight added`
			`self.sum_w_0_k += weight`

			`# minimize k such that sum(W[0:k]) >= total_weight / 2`
			`# minimum value of k is 1`
			`while(self.k > 1 and ((self.sum_w_0_k -`
			`self.samples.get_weight_from_index(self.k-1))`
			`>= self.total_weight / 2.0)):`
			`self.k -= 1`
			`self.sum_w_0_k -= self.samples.get_weight_from_index(self.k)`
			`return 0`

			`if data >= original_median:`
			`# inserting above or at the median`
			`# minimize k such that sum(W[0:k]) >= total_weight / 2`
			`while(self.k < self.samples.size() and`
			`(self.sum_w_0_k < self.total_weight / 2.0)):`
			`self.k += 1`
			`self.sum_w_0_k += self.samples.get_weight_from_index(self.k-1)`
			`return 0`

			`cdef int remove(self, float64_t data, float64_t weight) noexcept nogil:`
			`"""Remove a value from the MedianHeap, removing it`
			`from consideration in the median calculation`
			`"""`
			`cdef int return_value`
			`cdef float64_t original_median = 0.0`

			`if self.size() != 0:`
			`original_median = self.get_median()`

			`return_value = self.samples.remove(data, weight)`
			`self.update_median_parameters_post_remove(data, weight,`
			`original_median)`
			`return return_value`

			`cdef int pop(self, float64_t* data, float64_t* weight) noexcept nogil:`
			`"""Pop a value from the MedianHeap, starting from the`
			`left and moving to the right.`
			`"""`
			`cdef int return_value`
			`cdef float64_t original_median = 0.0`

			`if self.size() != 0:`
			`original_median = self.get_median()`

			`# no elements to pop`
			`if self.samples.size() == 0:`
			`return -1`

			`return_value = self.samples.pop(data, weight)`
			`self.update_median_parameters_post_remove(data[0],`
			`weight[0],`
			`original_median)`
			`return return_value`

			`cdef int update_median_parameters_post_remove(`
			`self, float64_t data, float64_t weight,`
			`float64_t original_median) noexcept nogil:`
			`"""Update the parameters used in the median calculation,`
			namely `k` and `sum_w_0_k` after a removal"""
			`# reset parameters because it there are no elements`
			`if self.samples.size() == 0:`
			`self.k = 0`
			`self.total_weight = 0`
			`self.sum_w_0_k = 0`
			`return 0`

			`# trivial case of one element.`
			`if self.samples.size() == 1:`
			`self.k = 1`
			`self.total_weight -= weight`
			`self.sum_w_0_k = self.total_weight`
			`return 0`

			`# get the current weighted median`
			`self.total_weight -= weight`

			`if data < original_median:`
			`# removing below the median, so decrement k and`
			`# then update self.sum_w_0_k accordingly by subtracting`
			`# the removed weight`

			`self.k -= 1`
			`# update sum_w_0_k by removing the weight at index k`
			`self.sum_w_0_k -= weight`

			`# minimize k such that sum(W[0:k]) >= total_weight / 2`
			`# by incrementing k and updating sum_w_0_k accordingly`
			`# until the condition is met.`
			`while(self.k < self.samples.size() and`
			`(self.sum_w_0_k < self.total_weight / 2.0)):`
			`self.k += 1`
			`self.sum_w_0_k += self.samples.get_weight_from_index(self.k-1)`
			`return 0`

			`if data >= original_median:`
			`# removing above the median`
			`# minimize k such that sum(W[0:k]) >= total_weight / 2`
			`while(self.k > 1 and ((self.sum_w_0_k -`
			`self.samples.get_weight_from_index(self.k-1))`
			`>= self.total_weight / 2.0)):`
			`self.k -= 1`
			`self.sum_w_0_k -= self.samples.get_weight_from_index(self.k)`
			`return 0`

			`cdef float64_t get_median(self) noexcept nogil:`
			`"""Write the median to a pointer, taking into account`
			`sample weights."""`
			`if self.sum_w_0_k == (self.total_weight / 2.0):`
			`# split median`
			`return (self.samples.get_value_from_index(self.k) +`
			`self.samples.get_value_from_index(self.k-1)) / 2.0`
			`if self.sum_w_0_k > (self.total_weight / 2.0):`
			`# whole median`
			`return self.samples.get_value_from_index(self.k-1)`


			`def _any_isnan_axis0(const float32_t[:, :] X):`
			`"""Same as np.any(np.isnan(X), axis=0)"""`
			`cdef:`
			`intp_t i, j`
			`intp_t n_samples = X.shape[0]`
			`intp_t n_features = X.shape[1]`
			`unsigned char[::1] isnan_out = np.zeros(X.shape[1], dtype=np.bool_)`

			`with nogil:`
			`for i in range(n_samples):`
			`for j in range(n_features):`
			`if isnan_out[j]:`
			`continue`
			`if isnan(X[i, j]):`
			`isnan_out[j] = True`
			`break`
			`return np.asarray(isnan_out)`