285 lines
9.1 KiB
Plaintext
285 lines
9.1 KiB
Plaintext
{{py:
|
|
|
|
# Generated file: _ball_tree.pyx
|
|
|
|
implementation_specific_values = [
|
|
# The values are arranged as follows:
|
|
#
|
|
# name_suffix, INPUT_DTYPE_t, INPUT_DTYPE
|
|
#
|
|
('64', 'float64_t', 'np.float64'),
|
|
('32', 'float32_t', 'np.float32')
|
|
]
|
|
|
|
# Author: Jake Vanderplas <vanderplas@astro.washington.edu>
|
|
# License: BSD 3 clause
|
|
|
|
}}
|
|
|
|
|
|
__all__ = ['BallTree', 'BallTree64', 'BallTree32']
|
|
|
|
{{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}
|
|
|
|
DOC_DICT{{name_suffix}} = {
|
|
'BinaryTree': 'BallTree{{name_suffix}}',
|
|
'binary_tree': 'ball_tree{{name_suffix}}',
|
|
}
|
|
|
|
VALID_METRICS{{name_suffix}} = [
|
|
'BrayCurtisDistance{{name_suffix}}',
|
|
'CanberraDistance{{name_suffix}}',
|
|
'ChebyshevDistance{{name_suffix}}',
|
|
'DiceDistance{{name_suffix}}',
|
|
'EuclideanDistance{{name_suffix}}',
|
|
'HammingDistance{{name_suffix}}',
|
|
'HaversineDistance{{name_suffix}}',
|
|
'JaccardDistance{{name_suffix}}',
|
|
'MahalanobisDistance{{name_suffix}}',
|
|
'ManhattanDistance{{name_suffix}}',
|
|
'MinkowskiDistance{{name_suffix}}',
|
|
'PyFuncDistance{{name_suffix}}',
|
|
'RogersTanimotoDistance{{name_suffix}}',
|
|
'RussellRaoDistance{{name_suffix}}',
|
|
'SEuclideanDistance{{name_suffix}}',
|
|
'SokalMichenerDistance{{name_suffix}}',
|
|
'SokalSneathDistance{{name_suffix}}',
|
|
'WMinkowskiDistance{{name_suffix}}',
|
|
]
|
|
|
|
{{endfor}}
|
|
|
|
include "_binary_tree.pxi"
|
|
|
|
{{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}
|
|
|
|
# Inherit BallTree{{name_suffix}} from BinaryTree{{name_suffix}}
|
|
cdef class BallTree{{name_suffix}}(BinaryTree{{name_suffix}}):
|
|
__doc__ = CLASS_DOC.format(**DOC_DICT{{name_suffix}})
|
|
pass
|
|
|
|
{{endfor}}
|
|
|
|
|
|
#----------------------------------------------------------------------
|
|
# The functions below specialized the Binary Tree as a Ball Tree
|
|
#
|
|
# Note that these functions use the concept of "reduced distance".
|
|
# The reduced distance, defined for some metrics, is a quantity which
|
|
# is more efficient to compute than the distance, but preserves the
|
|
# relative rankings of the true distance. For example, the reduced
|
|
# distance for the Euclidean metric is the squared-euclidean distance.
|
|
# For some metrics, the reduced distance is simply the distance.
|
|
|
|
{{for name_suffix, INPUT_DTYPE_t, INPUT_DTYPE in implementation_specific_values}}
|
|
|
|
cdef int allocate_data{{name_suffix}}(
|
|
BinaryTree{{name_suffix}} tree,
|
|
intp_t n_nodes,
|
|
intp_t n_features,
|
|
) except -1:
|
|
"""Allocate arrays needed for the KD Tree"""
|
|
tree.node_bounds = np.zeros((1, n_nodes, n_features), dtype={{INPUT_DTYPE}})
|
|
return 0
|
|
|
|
|
|
cdef int init_node{{name_suffix}}(
|
|
BinaryTree{{name_suffix}} tree,
|
|
NodeData_t[::1] node_data,
|
|
intp_t i_node,
|
|
intp_t idx_start,
|
|
intp_t idx_end,
|
|
) except -1:
|
|
"""Initialize the node for the dataset stored in tree.data"""
|
|
cdef intp_t n_features = tree.data.shape[1]
|
|
cdef intp_t n_points = idx_end - idx_start
|
|
|
|
cdef intp_t i, j
|
|
cdef float64_t radius
|
|
cdef const {{INPUT_DTYPE_t}} *this_pt
|
|
|
|
cdef intp_t* idx_array = &tree.idx_array[0]
|
|
cdef const {{INPUT_DTYPE_t}}* data = &tree.data[0, 0]
|
|
cdef {{INPUT_DTYPE_t}}* centroid = &tree.node_bounds[0, i_node, 0]
|
|
|
|
cdef bint with_sample_weight = tree.sample_weight is not None
|
|
cdef const {{INPUT_DTYPE_t}}* sample_weight
|
|
cdef float64_t sum_weight_node
|
|
if with_sample_weight:
|
|
sample_weight = &tree.sample_weight[0]
|
|
|
|
# determine Node centroid
|
|
for j in range(n_features):
|
|
centroid[j] = 0
|
|
|
|
if with_sample_weight:
|
|
sum_weight_node = 0
|
|
for i in range(idx_start, idx_end):
|
|
sum_weight_node += sample_weight[idx_array[i]]
|
|
this_pt = data + n_features * idx_array[i]
|
|
for j from 0 <= j < n_features:
|
|
centroid[j] += this_pt[j] * sample_weight[idx_array[i]]
|
|
|
|
for j in range(n_features):
|
|
centroid[j] /= sum_weight_node
|
|
else:
|
|
for i in range(idx_start, idx_end):
|
|
this_pt = data + n_features * idx_array[i]
|
|
for j from 0 <= j < n_features:
|
|
centroid[j] += this_pt[j]
|
|
|
|
for j in range(n_features):
|
|
centroid[j] /= n_points
|
|
|
|
# determine Node radius
|
|
radius = 0
|
|
for i in range(idx_start, idx_end):
|
|
radius = fmax(radius,
|
|
tree.rdist(centroid,
|
|
data + n_features * idx_array[i],
|
|
n_features))
|
|
|
|
node_data[i_node].radius = tree.dist_metric._rdist_to_dist(radius)
|
|
node_data[i_node].idx_start = idx_start
|
|
node_data[i_node].idx_end = idx_end
|
|
return 0
|
|
|
|
|
|
cdef inline float64_t min_dist{{name_suffix}}(
|
|
BinaryTree{{name_suffix}} tree,
|
|
intp_t i_node,
|
|
const {{INPUT_DTYPE_t}}* pt,
|
|
) except -1 nogil:
|
|
"""Compute the minimum distance between a point and a node"""
|
|
cdef float64_t dist_pt = tree.dist(pt, &tree.node_bounds[0, i_node, 0],
|
|
tree.data.shape[1])
|
|
return fmax(0, dist_pt - tree.node_data[i_node].radius)
|
|
|
|
|
|
cdef inline float64_t max_dist{{name_suffix}}(
|
|
BinaryTree{{name_suffix}} tree,
|
|
intp_t i_node,
|
|
const {{INPUT_DTYPE_t}}* pt,
|
|
) except -1:
|
|
"""Compute the maximum distance between a point and a node"""
|
|
cdef float64_t dist_pt = tree.dist(pt, &tree.node_bounds[0, i_node, 0],
|
|
tree.data.shape[1])
|
|
return dist_pt + tree.node_data[i_node].radius
|
|
|
|
|
|
cdef inline int min_max_dist{{name_suffix}}(
|
|
BinaryTree{{name_suffix}} tree,
|
|
intp_t i_node,
|
|
const {{INPUT_DTYPE_t}}* pt,
|
|
float64_t* min_dist,
|
|
float64_t* max_dist,
|
|
) except -1 nogil:
|
|
"""Compute the minimum and maximum distance between a point and a node"""
|
|
cdef float64_t dist_pt = tree.dist(pt, &tree.node_bounds[0, i_node, 0],
|
|
tree.data.shape[1])
|
|
cdef float64_t rad = tree.node_data[i_node].radius
|
|
min_dist[0] = fmax(0, dist_pt - rad)
|
|
max_dist[0] = dist_pt + rad
|
|
return 0
|
|
|
|
|
|
cdef inline float64_t min_rdist{{name_suffix}}(
|
|
BinaryTree{{name_suffix}} tree,
|
|
intp_t i_node,
|
|
const {{INPUT_DTYPE_t}}* pt,
|
|
) except -1 nogil:
|
|
"""Compute the minimum reduced-distance between a point and a node"""
|
|
if tree.euclidean:
|
|
return euclidean_dist_to_rdist{{name_suffix}}(
|
|
min_dist{{name_suffix}}(tree, i_node, pt)
|
|
)
|
|
else:
|
|
return tree.dist_metric._dist_to_rdist(
|
|
min_dist{{name_suffix}}(tree, i_node, pt)
|
|
)
|
|
|
|
|
|
cdef inline float64_t max_rdist{{name_suffix}}(
|
|
BinaryTree{{name_suffix}} tree,
|
|
intp_t i_node,
|
|
const {{INPUT_DTYPE_t}}* pt,
|
|
) except -1:
|
|
"""Compute the maximum reduced-distance between a point and a node"""
|
|
if tree.euclidean:
|
|
return euclidean_dist_to_rdist{{name_suffix}}(
|
|
max_dist{{name_suffix}}(tree, i_node, pt)
|
|
)
|
|
else:
|
|
return tree.dist_metric._dist_to_rdist(
|
|
max_dist{{name_suffix}}(tree, i_node, pt)
|
|
)
|
|
|
|
|
|
cdef inline float64_t min_dist_dual{{name_suffix}}(
|
|
BinaryTree{{name_suffix}} tree1,
|
|
intp_t i_node1,
|
|
BinaryTree{{name_suffix}} tree2,
|
|
intp_t i_node2,
|
|
) except -1:
|
|
"""compute the minimum distance between two nodes"""
|
|
cdef float64_t dist_pt = tree1.dist(&tree2.node_bounds[0, i_node2, 0],
|
|
&tree1.node_bounds[0, i_node1, 0],
|
|
tree1.data.shape[1])
|
|
return fmax(0, (dist_pt - tree1.node_data[i_node1].radius
|
|
- tree2.node_data[i_node2].radius))
|
|
|
|
|
|
cdef inline float64_t max_dist_dual{{name_suffix}}(
|
|
BinaryTree{{name_suffix}} tree1,
|
|
intp_t i_node1,
|
|
BinaryTree{{name_suffix}} tree2,
|
|
intp_t i_node2,
|
|
) except -1:
|
|
"""compute the maximum distance between two nodes"""
|
|
cdef float64_t dist_pt = tree1.dist(&tree2.node_bounds[0, i_node2, 0],
|
|
&tree1.node_bounds[0, i_node1, 0],
|
|
tree1.data.shape[1])
|
|
return (dist_pt + tree1.node_data[i_node1].radius
|
|
+ tree2.node_data[i_node2].radius)
|
|
|
|
|
|
cdef inline float64_t min_rdist_dual{{name_suffix}}(
|
|
BinaryTree{{name_suffix}} tree1,
|
|
intp_t i_node1,
|
|
BinaryTree{{name_suffix}} tree2,
|
|
intp_t i_node2,
|
|
) except -1:
|
|
"""compute the minimum reduced distance between two nodes"""
|
|
if tree1.euclidean:
|
|
return euclidean_dist_to_rdist{{name_suffix}}(
|
|
min_dist_dual{{name_suffix}}(tree1, i_node1, tree2, i_node2)
|
|
)
|
|
else:
|
|
return tree1.dist_metric._dist_to_rdist(
|
|
min_dist_dual{{name_suffix}}(tree1, i_node1, tree2, i_node2)
|
|
)
|
|
|
|
|
|
cdef inline float64_t max_rdist_dual{{name_suffix}}(
|
|
BinaryTree{{name_suffix}} tree1,
|
|
intp_t i_node1,
|
|
BinaryTree{{name_suffix}} tree2,
|
|
intp_t i_node2,
|
|
) except -1:
|
|
"""compute the maximum reduced distance between two nodes"""
|
|
if tree1.euclidean:
|
|
return euclidean_dist_to_rdist{{name_suffix}}(
|
|
max_dist_dual{{name_suffix}}(tree1, i_node1, tree2, i_node2)
|
|
)
|
|
else:
|
|
return tree1.dist_metric._dist_to_rdist(
|
|
max_dist_dual{{name_suffix}}(tree1, i_node1, tree2, i_node2)
|
|
)
|
|
|
|
{{endfor}}
|
|
|
|
|
|
class BallTree(BallTree64):
|
|
__doc__ = CLASS_DOC.format(BinaryTree="BallTree")
|
|
pass
|