{{py: # Generated file: _kd_tree.pyx implementation_specific_values = [ # The values are arranged as follows: # # name_suffix, INPUT_DTYPE_t, INPUT_DTYPE # ('64', 'float64_t', 'np.float64'), ('32', 'float32_t', 'np.float32') ] # By Jake Vanderplas (2013) # written for the scikit-learn project # License: BSD }} __all__ = ['KDTree', 'KDTree64', 'KDTree32'] {{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} DOC_DICT{{name_suffix}} = { 'BinaryTree': 'KDTree{{name_suffix}}', 'binary_tree': 'kd_tree{{name_suffix}}', } VALID_METRICS{{name_suffix}} = [ 'EuclideanDistance{{name_suffix}}', 'ManhattanDistance{{name_suffix}}', 'ChebyshevDistance{{name_suffix}}', 'MinkowskiDistance{{name_suffix}}' ] {{endfor}} include "_binary_tree.pxi" {{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} # Inherit KDTree{{name_suffix}} from BinaryTree{{name_suffix}} cdef class KDTree{{name_suffix}}(BinaryTree{{name_suffix}}): __doc__ = CLASS_DOC.format(**DOC_DICT{{name_suffix}}) pass {{endfor}} # ---------------------------------------------------------------------- # The functions below specialized the Binary Tree as a KD Tree # # Note that these functions use the concept of "reduced distance". # The reduced distance, defined for some metrics, is a quantity which # is more efficient to compute than the distance, but preserves the # relative rankings of the true distance. For example, the reduced # distance for the Euclidean metric is the squared-euclidean distance. # For some metrics, the reduced distance is simply the distance. {{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}} cdef int allocate_data{{name_suffix}}( BinaryTree{{name_suffix}} tree, intp_t n_nodes, intp_t n_features, ) except -1: """Allocate arrays needed for the KD Tree""" tree.node_bounds = np.zeros((2, n_nodes, n_features), dtype={{INPUT_DTYPE}}) return 0 cdef int init_node{{name_suffix}}( BinaryTree{{name_suffix}} tree, NodeData_t[::1] node_data, intp_t i_node, intp_t idx_start, intp_t idx_end, ) except -1: """Initialize the node for the dataset stored in tree.data""" cdef intp_t n_features = tree.data.shape[1] cdef intp_t i, j cdef float64_t rad = 0 cdef {{INPUT_DTYPE_t}}* lower_bounds = &tree.node_bounds[0, i_node, 0] cdef {{INPUT_DTYPE_t}}* upper_bounds = &tree.node_bounds[1, i_node, 0] cdef const {{INPUT_DTYPE_t}}* data = &tree.data[0, 0] cdef const intp_t* idx_array = &tree.idx_array[0] cdef const {{INPUT_DTYPE_t}}* data_row # determine Node bounds for j in range(n_features): lower_bounds[j] = INF upper_bounds[j] = -INF # Compute the actual data range. At build time, this is slightly # slower than using the previously-computed bounds of the parent node, # but leads to more compact trees and thus faster queries. for i in range(idx_start, idx_end): data_row = data + idx_array[i] * n_features for j in range(n_features): lower_bounds[j] = fmin(lower_bounds[j], data_row[j]) upper_bounds[j] = fmax(upper_bounds[j], data_row[j]) for j in range(n_features): if tree.dist_metric.p == INF: rad = fmax(rad, 0.5 * (upper_bounds[j] - lower_bounds[j])) else: rad += pow(0.5 * abs(upper_bounds[j] - lower_bounds[j]), tree.dist_metric.p) node_data[i_node].idx_start = idx_start node_data[i_node].idx_end = idx_end # The radius will hold the size of the circumscribed hypersphere measured # with the specified metric: in querying, this is used as a measure of the # size of each node when deciding which nodes to split. node_data[i_node].radius = pow(rad, 1. / tree.dist_metric.p) return 0 cdef float64_t min_rdist{{name_suffix}}( BinaryTree{{name_suffix}} tree, intp_t i_node, const {{INPUT_DTYPE_t}}* pt, ) except -1 nogil: """Compute the minimum reduced-distance between a point and a node""" cdef intp_t n_features = tree.data.shape[1] cdef float64_t d, d_lo, d_hi, rdist=0.0 cdef intp_t j if tree.dist_metric.p == INF: for j in range(n_features): d_lo = tree.node_bounds[0, i_node, j] - pt[j] d_hi = pt[j] - tree.node_bounds[1, i_node, j] d = (d_lo + fabs(d_lo)) + (d_hi + fabs(d_hi)) rdist = fmax(rdist, 0.5 * d) else: # here we'll use the fact that x + abs(x) = 2 * max(x, 0) for j in range(n_features): d_lo = tree.node_bounds[0, i_node, j] - pt[j] d_hi = pt[j] - tree.node_bounds[1, i_node, j] d = (d_lo + fabs(d_lo)) + (d_hi + fabs(d_hi)) rdist += pow(0.5 * d, tree.dist_metric.p) return rdist cdef float64_t min_dist{{name_suffix}}( BinaryTree{{name_suffix}} tree, intp_t i_node, const {{INPUT_DTYPE_t}}* pt, ) except -1: """Compute the minimum distance between a point and a node""" if tree.dist_metric.p == INF: return min_rdist{{name_suffix}}(tree, i_node, pt) else: return pow( min_rdist{{name_suffix}}(tree, i_node, pt), 1. / tree.dist_metric.p ) cdef float64_t max_rdist{{name_suffix}}( BinaryTree{{name_suffix}} tree, intp_t i_node, const {{INPUT_DTYPE_t}}* pt, ) except -1: """Compute the maximum reduced-distance between a point and a node""" cdef intp_t n_features = tree.data.shape[1] cdef float64_t d_lo, d_hi, rdist=0.0 cdef intp_t j if tree.dist_metric.p == INF: for j in range(n_features): rdist = fmax(rdist, fabs(pt[j] - tree.node_bounds[0, i_node, j])) rdist = fmax(rdist, fabs(pt[j] - tree.node_bounds[1, i_node, j])) else: for j in range(n_features): d_lo = fabs(pt[j] - tree.node_bounds[0, i_node, j]) d_hi = fabs(pt[j] - tree.node_bounds[1, i_node, j]) rdist += pow(fmax(d_lo, d_hi), tree.dist_metric.p) return rdist cdef float64_t max_dist{{name_suffix}}( BinaryTree{{name_suffix}} tree, intp_t i_node, const {{INPUT_DTYPE_t}}* pt, ) except -1: """Compute the maximum distance between a point and a node""" if tree.dist_metric.p == INF: return max_rdist{{name_suffix}}(tree, i_node, pt) else: return pow( max_rdist{{name_suffix}}(tree, i_node, pt), 1. / tree.dist_metric.p ) cdef inline int min_max_dist{{name_suffix}}( BinaryTree{{name_suffix}} tree, intp_t i_node, const {{INPUT_DTYPE_t}}* pt, float64_t* min_dist, float64_t* max_dist, ) except -1 nogil: """Compute the minimum and maximum distance between a point and a node""" cdef intp_t n_features = tree.data.shape[1] cdef float64_t d, d_lo, d_hi cdef intp_t j min_dist[0] = 0.0 max_dist[0] = 0.0 if tree.dist_metric.p == INF: for j in range(n_features): d_lo = tree.node_bounds[0, i_node, j] - pt[j] d_hi = pt[j] - tree.node_bounds[1, i_node, j] d = (d_lo + fabs(d_lo)) + (d_hi + fabs(d_hi)) min_dist[0] = fmax(min_dist[0], 0.5 * d) max_dist[0] = fmax(max_dist[0], fabs(d_lo)) max_dist[0] = fmax(max_dist[0], fabs(d_hi)) else: # as above, use the fact that x + abs(x) = 2 * max(x, 0) for j in range(n_features): d_lo = tree.node_bounds[0, i_node, j] - pt[j] d_hi = pt[j] - tree.node_bounds[1, i_node, j] d = (d_lo + fabs(d_lo)) + (d_hi + fabs(d_hi)) min_dist[0] += pow(0.5 * d, tree.dist_metric.p) max_dist[0] += pow(fmax(fabs(d_lo), fabs(d_hi)), tree.dist_metric.p) min_dist[0] = pow(min_dist[0], 1. / tree.dist_metric.p) max_dist[0] = pow(max_dist[0], 1. / tree.dist_metric.p) return 0 cdef inline float64_t min_rdist_dual{{name_suffix}}( BinaryTree{{name_suffix}} tree1, intp_t i_node1, BinaryTree{{name_suffix}} tree2, intp_t i_node2, ) except -1: """Compute the minimum reduced distance between two nodes""" cdef intp_t n_features = tree1.data.shape[1] cdef float64_t d, d1, d2, rdist=0.0 cdef intp_t j if tree1.dist_metric.p == INF: for j in range(n_features): d1 = (tree1.node_bounds[0, i_node1, j] - tree2.node_bounds[1, i_node2, j]) d2 = (tree2.node_bounds[0, i_node2, j] - tree1.node_bounds[1, i_node1, j]) d = (d1 + fabs(d1)) + (d2 + fabs(d2)) rdist = fmax(rdist, 0.5 * d) else: # here we'll use the fact that x + abs(x) = 2 * max(x, 0) for j in range(n_features): d1 = (tree1.node_bounds[0, i_node1, j] - tree2.node_bounds[1, i_node2, j]) d2 = (tree2.node_bounds[0, i_node2, j] - tree1.node_bounds[1, i_node1, j]) d = (d1 + fabs(d1)) + (d2 + fabs(d2)) rdist += pow(0.5 * d, tree1.dist_metric.p) return rdist cdef inline float64_t min_dist_dual{{name_suffix}}( BinaryTree{{name_suffix}} tree1, intp_t i_node1, BinaryTree{{name_suffix}} tree2, intp_t i_node2, ) except -1: """Compute the minimum distance between two nodes""" return tree1.dist_metric._rdist_to_dist( min_rdist_dual{{name_suffix}}(tree1, i_node1, tree2, i_node2) ) cdef inline float64_t max_rdist_dual{{name_suffix}}( BinaryTree{{name_suffix}} tree1, intp_t i_node1, BinaryTree{{name_suffix}} tree2, intp_t i_node2, ) except -1: """Compute the maximum reduced distance between two nodes""" cdef intp_t n_features = tree1.data.shape[1] cdef float64_t d1, d2, rdist=0.0 cdef intp_t j if tree1.dist_metric.p == INF: for j in range(n_features): rdist = fmax(rdist, fabs(tree1.node_bounds[0, i_node1, j] - tree2.node_bounds[1, i_node2, j])) rdist = fmax(rdist, fabs(tree1.node_bounds[1, i_node1, j] - tree2.node_bounds[0, i_node2, j])) else: for j in range(n_features): d1 = fabs(tree1.node_bounds[0, i_node1, j] - tree2.node_bounds[1, i_node2, j]) d2 = fabs(tree1.node_bounds[1, i_node1, j] - tree2.node_bounds[0, i_node2, j]) rdist += pow(fmax(d1, d2), tree1.dist_metric.p) return rdist cdef inline float64_t max_dist_dual{{name_suffix}}( BinaryTree{{name_suffix}} tree1, intp_t i_node1, BinaryTree{{name_suffix}} tree2, intp_t i_node2, ) except -1: """Compute the maximum distance between two nodes""" return tree1.dist_metric._rdist_to_dist( max_rdist_dual{{name_suffix}}(tree1, i_node1, tree2, i_node2) ) {{endfor}} class KDTree(KDTree64): __doc__ = CLASS_DOC.format(BinaryTree="KDTree") pass