99 lines
2.7 KiB
Python
99 lines
2.7 KiB
Python
|
import numpy as np
|
||
|
from numpy.testing import assert_allclose
|
||
|
from pytest import approx
|
||
|
|
||
|
from sklearn.utils.stats import _weighted_percentile
|
||
|
|
||
|
|
||
|
def test_weighted_percentile():
|
||
|
y = np.empty(102, dtype=np.float64)
|
||
|
y[:50] = 0
|
||
|
y[-51:] = 2
|
||
|
y[-1] = 100000
|
||
|
y[50] = 1
|
||
|
sw = np.ones(102, dtype=np.float64)
|
||
|
sw[-1] = 0.0
|
||
|
score = _weighted_percentile(y, sw, 50)
|
||
|
assert approx(score) == 1
|
||
|
|
||
|
|
||
|
def test_weighted_percentile_equal():
|
||
|
y = np.empty(102, dtype=np.float64)
|
||
|
y.fill(0.0)
|
||
|
sw = np.ones(102, dtype=np.float64)
|
||
|
sw[-1] = 0.0
|
||
|
score = _weighted_percentile(y, sw, 50)
|
||
|
assert score == 0
|
||
|
|
||
|
|
||
|
def test_weighted_percentile_zero_weight():
|
||
|
y = np.empty(102, dtype=np.float64)
|
||
|
y.fill(1.0)
|
||
|
sw = np.ones(102, dtype=np.float64)
|
||
|
sw.fill(0.0)
|
||
|
score = _weighted_percentile(y, sw, 50)
|
||
|
assert approx(score) == 1.0
|
||
|
|
||
|
|
||
|
def test_weighted_percentile_zero_weight_zero_percentile():
|
||
|
y = np.array([0, 1, 2, 3, 4, 5])
|
||
|
sw = np.array([0, 0, 1, 1, 1, 0])
|
||
|
score = _weighted_percentile(y, sw, 0)
|
||
|
assert approx(score) == 2
|
||
|
|
||
|
score = _weighted_percentile(y, sw, 50)
|
||
|
assert approx(score) == 3
|
||
|
|
||
|
score = _weighted_percentile(y, sw, 100)
|
||
|
assert approx(score) == 4
|
||
|
|
||
|
|
||
|
def test_weighted_median_equal_weights():
|
||
|
# Checks weighted percentile=0.5 is same as median when weights equal
|
||
|
rng = np.random.RandomState(0)
|
||
|
# Odd size as _weighted_percentile takes lower weighted percentile
|
||
|
x = rng.randint(10, size=11)
|
||
|
weights = np.ones(x.shape)
|
||
|
|
||
|
median = np.median(x)
|
||
|
w_median = _weighted_percentile(x, weights)
|
||
|
assert median == approx(w_median)
|
||
|
|
||
|
|
||
|
def test_weighted_median_integer_weights():
|
||
|
# Checks weighted percentile=0.5 is same as median when manually weight
|
||
|
# data
|
||
|
rng = np.random.RandomState(0)
|
||
|
x = rng.randint(20, size=10)
|
||
|
weights = rng.choice(5, size=10)
|
||
|
x_manual = np.repeat(x, weights)
|
||
|
|
||
|
median = np.median(x_manual)
|
||
|
w_median = _weighted_percentile(x, weights)
|
||
|
|
||
|
assert median == approx(w_median)
|
||
|
|
||
|
|
||
|
def test_weighted_percentile_2d():
|
||
|
# Check for when array 2D and sample_weight 1D
|
||
|
rng = np.random.RandomState(0)
|
||
|
x1 = rng.randint(10, size=10)
|
||
|
w1 = rng.choice(5, size=10)
|
||
|
|
||
|
x2 = rng.randint(20, size=10)
|
||
|
x_2d = np.vstack((x1, x2)).T
|
||
|
|
||
|
w_median = _weighted_percentile(x_2d, w1)
|
||
|
p_axis_0 = [_weighted_percentile(x_2d[:, i], w1) for i in range(x_2d.shape[1])]
|
||
|
assert_allclose(w_median, p_axis_0)
|
||
|
|
||
|
# Check when array and sample_weight boht 2D
|
||
|
w2 = rng.choice(5, size=10)
|
||
|
w_2d = np.vstack((w1, w2)).T
|
||
|
|
||
|
w_median = _weighted_percentile(x_2d, w_2d)
|
||
|
p_axis_0 = [
|
||
|
_weighted_percentile(x_2d[:, i], w_2d[:, i]) for i in range(x_2d.shape[1])
|
||
|
]
|
||
|
assert_allclose(w_median, p_axis_0)
|