import numpy as np from numpy.testing import assert_allclose from pytest import approx from sklearn.utils.stats import _weighted_percentile def test_weighted_percentile(): y = np.empty(102, dtype=np.float64) y[:50] = 0 y[-51:] = 2 y[-1] = 100000 y[50] = 1 sw = np.ones(102, dtype=np.float64) sw[-1] = 0.0 score = _weighted_percentile(y, sw, 50) assert approx(score) == 1 def test_weighted_percentile_equal(): y = np.empty(102, dtype=np.float64) y.fill(0.0) sw = np.ones(102, dtype=np.float64) sw[-1] = 0.0 score = _weighted_percentile(y, sw, 50) assert score == 0 def test_weighted_percentile_zero_weight(): y = np.empty(102, dtype=np.float64) y.fill(1.0) sw = np.ones(102, dtype=np.float64) sw.fill(0.0) score = _weighted_percentile(y, sw, 50) assert approx(score) == 1.0 def test_weighted_percentile_zero_weight_zero_percentile(): y = np.array([0, 1, 2, 3, 4, 5]) sw = np.array([0, 0, 1, 1, 1, 0]) score = _weighted_percentile(y, sw, 0) assert approx(score) == 2 score = _weighted_percentile(y, sw, 50) assert approx(score) == 3 score = _weighted_percentile(y, sw, 100) assert approx(score) == 4 def test_weighted_median_equal_weights(): # Checks weighted percentile=0.5 is same as median when weights equal rng = np.random.RandomState(0) # Odd size as _weighted_percentile takes lower weighted percentile x = rng.randint(10, size=11) weights = np.ones(x.shape) median = np.median(x) w_median = _weighted_percentile(x, weights) assert median == approx(w_median) def test_weighted_median_integer_weights(): # Checks weighted percentile=0.5 is same as median when manually weight # data rng = np.random.RandomState(0) x = rng.randint(20, size=10) weights = rng.choice(5, size=10) x_manual = np.repeat(x, weights) median = np.median(x_manual) w_median = _weighted_percentile(x, weights) assert median == approx(w_median) def test_weighted_percentile_2d(): # Check for when array 2D and sample_weight 1D rng = np.random.RandomState(0) x1 = rng.randint(10, size=10) w1 = rng.choice(5, size=10) x2 = rng.randint(20, size=10) x_2d = np.vstack((x1, x2)).T w_median = _weighted_percentile(x_2d, w1) p_axis_0 = [_weighted_percentile(x_2d[:, i], w1) for i in range(x_2d.shape[1])] assert_allclose(w_median, p_axis_0) # Check when array and sample_weight boht 2D w2 = rng.choice(5, size=10) w_2d = np.vstack((w1, w2)).T w_median = _weighted_percentile(x_2d, w_2d) p_axis_0 = [ _weighted_percentile(x_2d[:, i], w_2d[:, i]) for i in range(x_2d.shape[1]) ] assert_allclose(w_median, p_axis_0)