3RNN/Lib/site-packages/sklearn/utils/_fast_dict.pyx

"""
Uses C++ map containers for fast dict-like behavior with keys being
integers, and values float.
"""
# Author: Gael Varoquaux
# License: BSD

# C++
from cython.operator cimport dereference as deref, preincrement as inc
from libcpp.utility cimport pair
from libcpp.map cimport map as cpp_map

import numpy as np

from ._typedefs cimport float64_t, intp_t


###############################################################################
# An object to be used in Python

# Lookup is faster than dict (up to 10 times), and so is full traversal
# (up to 50 times), and assignment (up to 6 times), but creation is
# slower (up to 3 times). Also, a large benefit is that memory
# consumption is reduced a lot compared to a Python dict

cdef class IntFloatDict:

    def __init__(
        self,
        intp_t[:] keys,
        float64_t[:] values,
    ):
        cdef int i
        cdef int size = values.size
        # Should check that sizes for keys and values are equal, and
        # after should boundcheck(False)
        for i in range(size):
            self.my_map[keys[i]] = values[i]

    def __len__(self):
        return self.my_map.size()

    def __getitem__(self, int key):
        cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.find(key)
        if it == self.my_map.end():
            # The key is not in the dict
            raise KeyError('%i' % key)
        return deref(it).second

    def __setitem__(self, int key, float value):
        self.my_map[key] = value

    # Cython 0.20 generates buggy code below. Commenting this out for now
    # and relying on the to_arrays method
    # def __iter__(self):
    #     cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.begin()
    #     cdef cpp_map[intp_t, float64_t].iterator end = self.my_map.end()
    #     while it != end:
    #         yield deref(it).first, deref(it).second
    #         inc(it)

    def __iter__(self):
        cdef int size = self.my_map.size()
        cdef intp_t [:] keys = np.empty(size, dtype=np.intp)
        cdef float64_t [:] values = np.empty(size, dtype=np.float64)
        self._to_arrays(keys, values)
        cdef int idx
        cdef intp_t key
        cdef float64_t value
        for idx in range(size):
            key = keys[idx]
            value = values[idx]
            yield key, value

    def to_arrays(self):
        """Return the key, value representation of the IntFloatDict
           object.

           Returns
           =======
           keys : ndarray, shape (n_items, ), dtype=int
                The indices of the data points
           values : ndarray, shape (n_items, ), dtype=float
                The values of the data points
        """
        cdef int size = self.my_map.size()
        keys = np.empty(size, dtype=np.intp)
        values = np.empty(size, dtype=np.float64)
        self._to_arrays(keys, values)
        return keys, values

    cdef _to_arrays(self, intp_t [:] keys, float64_t [:] values):
        # Internal version of to_arrays that takes already-initialized arrays
        cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.begin()
        cdef cpp_map[intp_t, float64_t].iterator end = self.my_map.end()
        cdef int index = 0
        while it != end:
            keys[index] = deref(it).first
            values[index] = deref(it).second
            inc(it)
            index += 1

    def update(self, IntFloatDict other):
        cdef cpp_map[intp_t, float64_t].iterator it = other.my_map.begin()
        cdef cpp_map[intp_t, float64_t].iterator end = other.my_map.end()
        while it != end:
            self.my_map[deref(it).first] = deref(it).second
            inc(it)

    def copy(self):
        cdef IntFloatDict out_obj = IntFloatDict.__new__(IntFloatDict)
        # The '=' operator is a copy operator for C++ maps
        out_obj.my_map = self.my_map
        return out_obj

    def append(self, intp_t key, float64_t value):
        # Construct our arguments
        cdef pair[intp_t, float64_t] args
        args.first = key
        args.second = value
        self.my_map.insert(args)


###############################################################################
# operation on dict

def argmin(IntFloatDict d):
    cdef cpp_map[intp_t, float64_t].iterator it = d.my_map.begin()
    cdef cpp_map[intp_t, float64_t].iterator end = d.my_map.end()
    cdef intp_t min_key = -1
    cdef float64_t min_value = np.inf
    while it != end:
        if deref(it).second < min_value:
            min_value = deref(it).second
            min_key = deref(it).first
        inc(it)
    return min_key, min_value
1.0 2024-05-26 19:49:15 +02:00			`"""`
			`Uses C++ map containers for fast dict-like behavior with keys being`
			`integers, and values float.`
			`"""`
			`# Author: Gael Varoquaux`
			`# License: BSD`

			`# C++`
			`from cython.operator cimport dereference as deref, preincrement as inc`
			`from libcpp.utility cimport pair`
			`from libcpp.map cimport map as cpp_map`

			`import numpy as np`

			`from ._typedefs cimport float64_t, intp_t`


			`###############################################################################`
			`# An object to be used in Python`

			`# Lookup is faster than dict (up to 10 times), and so is full traversal`
			`# (up to 50 times), and assignment (up to 6 times), but creation is`
			`# slower (up to 3 times). Also, a large benefit is that memory`
			`# consumption is reduced a lot compared to a Python dict`

			`cdef class IntFloatDict:`

			`def __init__(`
			`self,`
			`intp_t[:] keys,`
			`float64_t[:] values,`
			`):`
			`cdef int i`
			`cdef int size = values.size`
			`# Should check that sizes for keys and values are equal, and`
			`# after should boundcheck(False)`
			`for i in range(size):`
			`self.my_map[keys[i]] = values[i]`

			`def __len__(self):`
			`return self.my_map.size()`

			`def __getitem__(self, int key):`
			`cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.find(key)`
			`if it == self.my_map.end():`
			`# The key is not in the dict`
			`raise KeyError('%i' % key)`
			`return deref(it).second`

			`def __setitem__(self, int key, float value):`
			`self.my_map[key] = value`

			`# Cython 0.20 generates buggy code below. Commenting this out for now`
			`# and relying on the to_arrays method`
			`# def __iter__(self):`
			`# cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.begin()`
			`# cdef cpp_map[intp_t, float64_t].iterator end = self.my_map.end()`
			`# while it != end:`
			`# yield deref(it).first, deref(it).second`
			`# inc(it)`

			`def __iter__(self):`
			`cdef int size = self.my_map.size()`
			`cdef intp_t [:] keys = np.empty(size, dtype=np.intp)`
			`cdef float64_t [:] values = np.empty(size, dtype=np.float64)`
			`self._to_arrays(keys, values)`
			`cdef int idx`
			`cdef intp_t key`
			`cdef float64_t value`
			`for idx in range(size):`
			`key = keys[idx]`
			`value = values[idx]`
			`yield key, value`

			`def to_arrays(self):`
			`"""Return the key, value representation of the IntFloatDict`
			`object.`

			`Returns`
			`=======`
			`keys : ndarray, shape (n_items, ), dtype=int`
			`The indices of the data points`
			`values : ndarray, shape (n_items, ), dtype=float`
			`The values of the data points`
			`"""`
			`cdef int size = self.my_map.size()`
			`keys = np.empty(size, dtype=np.intp)`
			`values = np.empty(size, dtype=np.float64)`
			`self._to_arrays(keys, values)`
			`return keys, values`

			`cdef _to_arrays(self, intp_t [:] keys, float64_t [:] values):`
			`# Internal version of to_arrays that takes already-initialized arrays`
			`cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.begin()`
			`cdef cpp_map[intp_t, float64_t].iterator end = self.my_map.end()`
			`cdef int index = 0`
			`while it != end:`
			`keys[index] = deref(it).first`
			`values[index] = deref(it).second`
			`inc(it)`
			`index += 1`

			`def update(self, IntFloatDict other):`
			`cdef cpp_map[intp_t, float64_t].iterator it = other.my_map.begin()`
			`cdef cpp_map[intp_t, float64_t].iterator end = other.my_map.end()`
			`while it != end:`
			`self.my_map[deref(it).first] = deref(it).second`
			`inc(it)`

			`def copy(self):`
			`cdef IntFloatDict out_obj = IntFloatDict.__new__(IntFloatDict)`
			`# The '=' operator is a copy operator for C++ maps`
			`out_obj.my_map = self.my_map`
			`return out_obj`

			`def append(self, intp_t key, float64_t value):`
			`# Construct our arguments`
			`cdef pair[intp_t, float64_t] args`
			`args.first = key`
			`args.second = value`
			`self.my_map.insert(args)`


			`###############################################################################`
			`# operation on dict`

			`def argmin(IntFloatDict d):`
			`cdef cpp_map[intp_t, float64_t].iterator it = d.my_map.begin()`
			`cdef cpp_map[intp_t, float64_t].iterator end = d.my_map.end()`
			`cdef intp_t min_key = -1`
			`cdef float64_t min_value = np.inf`
			`while it != end:`
			`if deref(it).second < min_value:`
			`min_value = deref(it).second`
			`min_key = deref(it).first`
			`inc(it)`
			`return min_key, min_value`