138 lines
4.5 KiB
Cython
138 lines
4.5 KiB
Cython
|
"""
|
||
|
Uses C++ map containers for fast dict-like behavior with keys being
|
||
|
integers, and values float.
|
||
|
"""
|
||
|
# Author: Gael Varoquaux
|
||
|
# License: BSD
|
||
|
|
||
|
# C++
|
||
|
from cython.operator cimport dereference as deref, preincrement as inc
|
||
|
from libcpp.utility cimport pair
|
||
|
from libcpp.map cimport map as cpp_map
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
from ._typedefs cimport float64_t, intp_t
|
||
|
|
||
|
|
||
|
###############################################################################
|
||
|
# An object to be used in Python
|
||
|
|
||
|
# Lookup is faster than dict (up to 10 times), and so is full traversal
|
||
|
# (up to 50 times), and assignment (up to 6 times), but creation is
|
||
|
# slower (up to 3 times). Also, a large benefit is that memory
|
||
|
# consumption is reduced a lot compared to a Python dict
|
||
|
|
||
|
cdef class IntFloatDict:
|
||
|
|
||
|
def __init__(
|
||
|
self,
|
||
|
intp_t[:] keys,
|
||
|
float64_t[:] values,
|
||
|
):
|
||
|
cdef int i
|
||
|
cdef int size = values.size
|
||
|
# Should check that sizes for keys and values are equal, and
|
||
|
# after should boundcheck(False)
|
||
|
for i in range(size):
|
||
|
self.my_map[keys[i]] = values[i]
|
||
|
|
||
|
def __len__(self):
|
||
|
return self.my_map.size()
|
||
|
|
||
|
def __getitem__(self, int key):
|
||
|
cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.find(key)
|
||
|
if it == self.my_map.end():
|
||
|
# The key is not in the dict
|
||
|
raise KeyError('%i' % key)
|
||
|
return deref(it).second
|
||
|
|
||
|
def __setitem__(self, int key, float value):
|
||
|
self.my_map[key] = value
|
||
|
|
||
|
# Cython 0.20 generates buggy code below. Commenting this out for now
|
||
|
# and relying on the to_arrays method
|
||
|
# def __iter__(self):
|
||
|
# cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.begin()
|
||
|
# cdef cpp_map[intp_t, float64_t].iterator end = self.my_map.end()
|
||
|
# while it != end:
|
||
|
# yield deref(it).first, deref(it).second
|
||
|
# inc(it)
|
||
|
|
||
|
def __iter__(self):
|
||
|
cdef int size = self.my_map.size()
|
||
|
cdef intp_t [:] keys = np.empty(size, dtype=np.intp)
|
||
|
cdef float64_t [:] values = np.empty(size, dtype=np.float64)
|
||
|
self._to_arrays(keys, values)
|
||
|
cdef int idx
|
||
|
cdef intp_t key
|
||
|
cdef float64_t value
|
||
|
for idx in range(size):
|
||
|
key = keys[idx]
|
||
|
value = values[idx]
|
||
|
yield key, value
|
||
|
|
||
|
def to_arrays(self):
|
||
|
"""Return the key, value representation of the IntFloatDict
|
||
|
object.
|
||
|
|
||
|
Returns
|
||
|
=======
|
||
|
keys : ndarray, shape (n_items, ), dtype=int
|
||
|
The indices of the data points
|
||
|
values : ndarray, shape (n_items, ), dtype=float
|
||
|
The values of the data points
|
||
|
"""
|
||
|
cdef int size = self.my_map.size()
|
||
|
keys = np.empty(size, dtype=np.intp)
|
||
|
values = np.empty(size, dtype=np.float64)
|
||
|
self._to_arrays(keys, values)
|
||
|
return keys, values
|
||
|
|
||
|
cdef _to_arrays(self, intp_t [:] keys, float64_t [:] values):
|
||
|
# Internal version of to_arrays that takes already-initialized arrays
|
||
|
cdef cpp_map[intp_t, float64_t].iterator it = self.my_map.begin()
|
||
|
cdef cpp_map[intp_t, float64_t].iterator end = self.my_map.end()
|
||
|
cdef int index = 0
|
||
|
while it != end:
|
||
|
keys[index] = deref(it).first
|
||
|
values[index] = deref(it).second
|
||
|
inc(it)
|
||
|
index += 1
|
||
|
|
||
|
def update(self, IntFloatDict other):
|
||
|
cdef cpp_map[intp_t, float64_t].iterator it = other.my_map.begin()
|
||
|
cdef cpp_map[intp_t, float64_t].iterator end = other.my_map.end()
|
||
|
while it != end:
|
||
|
self.my_map[deref(it).first] = deref(it).second
|
||
|
inc(it)
|
||
|
|
||
|
def copy(self):
|
||
|
cdef IntFloatDict out_obj = IntFloatDict.__new__(IntFloatDict)
|
||
|
# The '=' operator is a copy operator for C++ maps
|
||
|
out_obj.my_map = self.my_map
|
||
|
return out_obj
|
||
|
|
||
|
def append(self, intp_t key, float64_t value):
|
||
|
# Construct our arguments
|
||
|
cdef pair[intp_t, float64_t] args
|
||
|
args.first = key
|
||
|
args.second = value
|
||
|
self.my_map.insert(args)
|
||
|
|
||
|
|
||
|
###############################################################################
|
||
|
# operation on dict
|
||
|
|
||
|
def argmin(IntFloatDict d):
|
||
|
cdef cpp_map[intp_t, float64_t].iterator it = d.my_map.begin()
|
||
|
cdef cpp_map[intp_t, float64_t].iterator end = d.my_map.end()
|
||
|
cdef intp_t min_key = -1
|
||
|
cdef float64_t min_value = np.inf
|
||
|
while it != end:
|
||
|
if deref(it).second < min_value:
|
||
|
min_value = deref(it).second
|
||
|
min_key = deref(it).first
|
||
|
inc(it)
|
||
|
return min_key, min_value
|