3RNN/Lib/site-packages/scipy/stats/tests/test_contingency.py

import numpy as np
from numpy.testing import (assert_equal, assert_array_equal,
                           assert_array_almost_equal, assert_approx_equal,
                           assert_allclose)
import pytest
from pytest import raises as assert_raises
from scipy.special import xlogy
from scipy.stats.contingency import (margins, expected_freq,
                                     chi2_contingency, association)


def test_margins():
    a = np.array([1])
    m = margins(a)
    assert_equal(len(m), 1)
    m0 = m[0]
    assert_array_equal(m0, np.array([1]))

    a = np.array([[1]])
    m0, m1 = margins(a)
    expected0 = np.array([[1]])
    expected1 = np.array([[1]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)

    a = np.arange(12).reshape(2, 6)
    m0, m1 = margins(a)
    expected0 = np.array([[15], [51]])
    expected1 = np.array([[6, 8, 10, 12, 14, 16]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)

    a = np.arange(24).reshape(2, 3, 4)
    m0, m1, m2 = margins(a)
    expected0 = np.array([[[66]], [[210]]])
    expected1 = np.array([[[60], [92], [124]]])
    expected2 = np.array([[[60, 66, 72, 78]]])
    assert_array_equal(m0, expected0)
    assert_array_equal(m1, expected1)
    assert_array_equal(m2, expected2)


def test_expected_freq():
    assert_array_equal(expected_freq([1]), np.array([1.0]))

    observed = np.array([[[2, 0], [0, 2]], [[0, 2], [2, 0]], [[1, 1], [1, 1]]])
    e = expected_freq(observed)
    assert_array_equal(e, np.ones_like(observed))

    observed = np.array([[10, 10, 20], [20, 20, 20]])
    e = expected_freq(observed)
    correct = np.array([[12., 12., 16.], [18., 18., 24.]])
    assert_array_almost_equal(e, correct)


def test_chi2_contingency_trivial():
    # Some very simple tests for chi2_contingency.

    # A trivial case
    obs = np.array([[1, 2], [1, 2]])
    chi2, p, dof, expected = chi2_contingency(obs, correction=False)
    assert_equal(chi2, 0.0)
    assert_equal(p, 1.0)
    assert_equal(dof, 1)
    assert_array_equal(obs, expected)

    # A *really* trivial case: 1-D data.
    obs = np.array([1, 2, 3])
    chi2, p, dof, expected = chi2_contingency(obs, correction=False)
    assert_equal(chi2, 0.0)
    assert_equal(p, 1.0)
    assert_equal(dof, 0)
    assert_array_equal(obs, expected)


def test_chi2_contingency_R():
    # Some test cases that were computed independently, using R.

    # Rcode = \
    # """
    # # Data vector.
    # data <- c(
    #   12, 34, 23,     4,  47,  11,
    #   35, 31, 11,    34,  10,  18,
    #   12, 32,  9,    18,  13,  19,
    #   12, 12, 14,     9,  33,  25
    #   )
    #
    # # Create factor tags:r=rows, c=columns, t=tiers
    # r <- factor(gl(4, 2*3, 2*3*4, labels=c("r1", "r2", "r3", "r4")))
    # c <- factor(gl(3, 1,   2*3*4, labels=c("c1", "c2", "c3")))
    # t <- factor(gl(2, 3,   2*3*4, labels=c("t1", "t2")))
    #
    # # 3-way Chi squared test of independence
    # s = summary(xtabs(data~r+c+t))
    # print(s)
    # """
    # Routput = \
    # """
    # Call: xtabs(formula = data ~ r + c + t)
    # Number of cases in table: 478
    # Number of factors: 3
    # Test for independence of all factors:
    #         Chisq = 102.17, df = 17, p-value = 3.514e-14
    # """
    obs = np.array(
        [[[12, 34, 23],
          [35, 31, 11],
          [12, 32, 9],
          [12, 12, 14]],
         [[4, 47, 11],
          [34, 10, 18],
          [18, 13, 19],
          [9, 33, 25]]])
    chi2, p, dof, expected = chi2_contingency(obs)
    assert_approx_equal(chi2, 102.17, significant=5)
    assert_approx_equal(p, 3.514e-14, significant=4)
    assert_equal(dof, 17)

    # Rcode = \
    # """
    # # Data vector.
    # data <- c(
    #     #
    #     12, 17,
    #     11, 16,
    #     #
    #     11, 12,
    #     15, 16,
    #     #
    #     23, 15,
    #     30, 22,
    #     #
    #     14, 17,
    #     15, 16
    #     )
    #
    # # Create factor tags:r=rows, c=columns, d=depths(?), t=tiers
    # r <- factor(gl(2, 2,  2*2*2*2, labels=c("r1", "r2")))
    # c <- factor(gl(2, 1,  2*2*2*2, labels=c("c1", "c2")))
    # d <- factor(gl(2, 4,  2*2*2*2, labels=c("d1", "d2")))
    # t <- factor(gl(2, 8,  2*2*2*2, labels=c("t1", "t2")))
    #
    # # 4-way Chi squared test of independence
    # s = summary(xtabs(data~r+c+d+t))
    # print(s)
    # """
    # Routput = \
    # """
    # Call: xtabs(formula = data ~ r + c + d + t)
    # Number of cases in table: 262
    # Number of factors: 4
    # Test for independence of all factors:
    #         Chisq = 8.758, df = 11, p-value = 0.6442
    # """
    obs = np.array(
        [[[[12, 17],
           [11, 16]],
          [[11, 12],
           [15, 16]]],
         [[[23, 15],
           [30, 22]],
          [[14, 17],
           [15, 16]]]])
    chi2, p, dof, expected = chi2_contingency(obs)
    assert_approx_equal(chi2, 8.758, significant=4)
    assert_approx_equal(p, 0.6442, significant=4)
    assert_equal(dof, 11)


def test_chi2_contingency_g():
    c = np.array([[15, 60], [15, 90]])
    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood',
                                    correction=False)
    assert_allclose(g, 2*xlogy(c, c/e).sum())

    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood',
                                    correction=True)
    c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]])
    assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum())

    c = np.array([[10, 12, 10], [12, 10, 10]])
    g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood')
    assert_allclose(g, 2*xlogy(c, c/e).sum())


def test_chi2_contingency_bad_args():
    # Test that "bad" inputs raise a ValueError.

    # Negative value in the array of observed frequencies.
    obs = np.array([[-1, 10], [1, 2]])
    assert_raises(ValueError, chi2_contingency, obs)

    # The zeros in this will result in zeros in the array
    # of expected frequencies.
    obs = np.array([[0, 1], [0, 1]])
    assert_raises(ValueError, chi2_contingency, obs)

    # A degenerate case: `observed` has size 0.
    obs = np.empty((0, 8))
    assert_raises(ValueError, chi2_contingency, obs)


def test_chi2_contingency_yates_gh13875():
    # Magnitude of Yates' continuity correction should not exceed difference
    # between expected and observed value of the statistic; see gh-13875
    observed = np.array([[1573, 3], [4, 0]])
    p = chi2_contingency(observed)[1]
    assert_allclose(p, 1, rtol=1e-12)


@pytest.mark.parametrize("correction", [False, True])
def test_result(correction):
    obs = np.array([[1, 2], [1, 2]])
    res = chi2_contingency(obs, correction=correction)
    assert_equal((res.statistic, res.pvalue, res.dof, res.expected_freq), res)


def test_bad_association_args():
    # Invalid Test Statistic
    assert_raises(ValueError, association, [[1, 2], [3, 4]], "X")
    # Invalid array shape
    assert_raises(ValueError, association, [[[1, 2]], [[3, 4]]], "cramer")
    # chi2_contingency exception
    assert_raises(ValueError, association, [[-1, 10], [1, 2]], 'cramer')
    # Invalid Array Item Data Type
    assert_raises(ValueError, association,
                  np.array([[1, 2], ["dd", 4]], dtype=object), 'cramer')


@pytest.mark.parametrize('stat, expected',
                         [('cramer', 0.09222412010290792),
                          ('tschuprow', 0.0775509319944633),
                          ('pearson', 0.12932925727138758)])
def test_assoc(stat, expected):
    # 2d Array
    obs1 = np.array([[12, 13, 14, 15, 16],
                     [17, 16, 18, 19, 11],
                     [9, 15, 14, 12, 11]])
    a = association(observed=obs1, method=stat)
    assert_allclose(a, expected)
1.0 2024-05-26 19:49:15 +02:00			`import numpy as np`
			`from numpy.testing import (assert_equal, assert_array_equal,`
			`assert_array_almost_equal, assert_approx_equal,`
			`assert_allclose)`
			`import pytest`
			`from pytest import raises as assert_raises`
			`from scipy.special import xlogy`
			`from scipy.stats.contingency import (margins, expected_freq,`
			`chi2_contingency, association)`


			`def test_margins():`
			`a = np.array([1])`
			`m = margins(a)`
			`assert_equal(len(m), 1)`
			`m0 = m[0]`
			`assert_array_equal(m0, np.array([1]))`

			`a = np.array([[1]])`
			`m0, m1 = margins(a)`
			`expected0 = np.array([[1]])`
			`expected1 = np.array([[1]])`
			`assert_array_equal(m0, expected0)`
			`assert_array_equal(m1, expected1)`

			`a = np.arange(12).reshape(2, 6)`
			`m0, m1 = margins(a)`
			`expected0 = np.array([[15], [51]])`
			`expected1 = np.array([[6, 8, 10, 12, 14, 16]])`
			`assert_array_equal(m0, expected0)`
			`assert_array_equal(m1, expected1)`

			`a = np.arange(24).reshape(2, 3, 4)`
			`m0, m1, m2 = margins(a)`
			`expected0 = np.array([[[66]], [[210]]])`
			`expected1 = np.array([[[60], [92], [124]]])`
			`expected2 = np.array([[[60, 66, 72, 78]]])`
			`assert_array_equal(m0, expected0)`
			`assert_array_equal(m1, expected1)`
			`assert_array_equal(m2, expected2)`


			`def test_expected_freq():`
			`assert_array_equal(expected_freq([1]), np.array([1.0]))`

			`observed = np.array([[[2, 0], [0, 2]], [[0, 2], [2, 0]], [[1, 1], [1, 1]]])`
			`e = expected_freq(observed)`
			`assert_array_equal(e, np.ones_like(observed))`

			`observed = np.array([[10, 10, 20], [20, 20, 20]])`
			`e = expected_freq(observed)`
			`correct = np.array([[12., 12., 16.], [18., 18., 24.]])`
			`assert_array_almost_equal(e, correct)`


			`def test_chi2_contingency_trivial():`
			`# Some very simple tests for chi2_contingency.`

			`# A trivial case`
			`obs = np.array([[1, 2], [1, 2]])`
			`chi2, p, dof, expected = chi2_contingency(obs, correction=False)`
			`assert_equal(chi2, 0.0)`
			`assert_equal(p, 1.0)`
			`assert_equal(dof, 1)`
			`assert_array_equal(obs, expected)`

			`# A really trivial case: 1-D data.`
			`obs = np.array([1, 2, 3])`
			`chi2, p, dof, expected = chi2_contingency(obs, correction=False)`
			`assert_equal(chi2, 0.0)`
			`assert_equal(p, 1.0)`
			`assert_equal(dof, 0)`
			`assert_array_equal(obs, expected)`


			`def test_chi2_contingency_R():`
			`# Some test cases that were computed independently, using R.`

			`# Rcode = \`
			`# """`
			`# # Data vector.`
			`# data <- c(`
			`# 12, 34, 23, 4, 47, 11,`
			`# 35, 31, 11, 34, 10, 18,`
			`# 12, 32, 9, 18, 13, 19,`
			`# 12, 12, 14, 9, 33, 25`
			`# )`
			`#`
			`# # Create factor tags:r=rows, c=columns, t=tiers`
			`# r <- factor(gl(4, 23, 23*4, labels=c("r1", "r2", "r3", "r4")))`
			`# c <- factor(gl(3, 1, 234, labels=c("c1", "c2", "c3")))`
			`# t <- factor(gl(2, 3, 234, labels=c("t1", "t2")))`
			`#`
			`# # 3-way Chi squared test of independence`
			`# s = summary(xtabs(data~r+c+t))`
			`# print(s)`
			`# """`
			`# Routput = \`
			`# """`
			`# Call: xtabs(formula = data ~ r + c + t)`
			`# Number of cases in table: 478`
			`# Number of factors: 3`
			`# Test for independence of all factors:`
			`# Chisq = 102.17, df = 17, p-value = 3.514e-14`
			`# """`
			`obs = np.array(`
			`[[[12, 34, 23],`
			`[35, 31, 11],`
			`[12, 32, 9],`
			`[12, 12, 14]],`
			`[[4, 47, 11],`
			`[34, 10, 18],`
			`[18, 13, 19],`
			`[9, 33, 25]]])`
			`chi2, p, dof, expected = chi2_contingency(obs)`
			`assert_approx_equal(chi2, 102.17, significant=5)`
			`assert_approx_equal(p, 3.514e-14, significant=4)`
			`assert_equal(dof, 17)`

			`# Rcode = \`
			`# """`
			`# # Data vector.`
			`# data <- c(`
			`# #`
			`# 12, 17,`
			`# 11, 16,`
			`# #`
			`# 11, 12,`
			`# 15, 16,`
			`# #`
			`# 23, 15,`
			`# 30, 22,`
			`# #`
			`# 14, 17,`
			`# 15, 16`
			`# )`
			`#`
			`# # Create factor tags:r=rows, c=columns, d=depths(?), t=tiers`
			`# r <- factor(gl(2, 2, 222*2, labels=c("r1", "r2")))`
			`# c <- factor(gl(2, 1, 222*2, labels=c("c1", "c2")))`
			`# d <- factor(gl(2, 4, 222*2, labels=c("d1", "d2")))`
			`# t <- factor(gl(2, 8, 222*2, labels=c("t1", "t2")))`
			`#`
			`# # 4-way Chi squared test of independence`
			`# s = summary(xtabs(data~r+c+d+t))`
			`# print(s)`
			`# """`
			`# Routput = \`
			`# """`
			`# Call: xtabs(formula = data ~ r + c + d + t)`
			`# Number of cases in table: 262`
			`# Number of factors: 4`
			`# Test for independence of all factors:`
			`# Chisq = 8.758, df = 11, p-value = 0.6442`
			`# """`
			`obs = np.array(`
			`[[[[12, 17],`
			`[11, 16]],`
			`[[11, 12],`
			`[15, 16]]],`
			`[[[23, 15],`
			`[30, 22]],`
			`[[14, 17],`
			`[15, 16]]]])`
			`chi2, p, dof, expected = chi2_contingency(obs)`
			`assert_approx_equal(chi2, 8.758, significant=4)`
			`assert_approx_equal(p, 0.6442, significant=4)`
			`assert_equal(dof, 11)`


			`def test_chi2_contingency_g():`
			`c = np.array([[15, 60], [15, 90]])`
			`g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood',`
			`correction=False)`
			`assert_allclose(g, 2*xlogy(c, c/e).sum())`

			`g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood',`
			`correction=True)`
			`c_corr = c + np.array([[-0.5, 0.5], [0.5, -0.5]])`
			`assert_allclose(g, 2*xlogy(c_corr, c_corr/e).sum())`

			`c = np.array([[10, 12, 10], [12, 10, 10]])`
			`g, p, dof, e = chi2_contingency(c, lambda_='log-likelihood')`
			`assert_allclose(g, 2*xlogy(c, c/e).sum())`


			`def test_chi2_contingency_bad_args():`
			`# Test that "bad" inputs raise a ValueError.`

			`# Negative value in the array of observed frequencies.`
			`obs = np.array([[-1, 10], [1, 2]])`
			`assert_raises(ValueError, chi2_contingency, obs)`

			`# The zeros in this will result in zeros in the array`
			`# of expected frequencies.`
			`obs = np.array([[0, 1], [0, 1]])`
			`assert_raises(ValueError, chi2_contingency, obs)`

			# A degenerate case: `observed` has size 0.
			`obs = np.empty((0, 8))`
			`assert_raises(ValueError, chi2_contingency, obs)`


			`def test_chi2_contingency_yates_gh13875():`
			`# Magnitude of Yates' continuity correction should not exceed difference`
			`# between expected and observed value of the statistic; see gh-13875`
			`observed = np.array([[1573, 3], [4, 0]])`
			`p = chi2_contingency(observed)[1]`
			`assert_allclose(p, 1, rtol=1e-12)`


			`@pytest.mark.parametrize("correction", [False, True])`
			`def test_result(correction):`
			`obs = np.array([[1, 2], [1, 2]])`
			`res = chi2_contingency(obs, correction=correction)`
			`assert_equal((res.statistic, res.pvalue, res.dof, res.expected_freq), res)`


			`def test_bad_association_args():`
			`# Invalid Test Statistic`
			`assert_raises(ValueError, association, [[1, 2], [3, 4]], "X")`
			`# Invalid array shape`
			`assert_raises(ValueError, association, [[[1, 2]], [[3, 4]]], "cramer")`
			`# chi2_contingency exception`
			`assert_raises(ValueError, association, [[-1, 10], [1, 2]], 'cramer')`
			`# Invalid Array Item Data Type`
			`assert_raises(ValueError, association,`
			`np.array([[1, 2], ["dd", 4]], dtype=object), 'cramer')`


			`@pytest.mark.parametrize('stat, expected',`
			`[('cramer', 0.09222412010290792),`
			`('tschuprow', 0.0775509319944633),`
			`('pearson', 0.12932925727138758)])`
			`def test_assoc(stat, expected):`
			`# 2d Array`
			`obs1 = np.array([[12, 13, 14, 15, 16],`
			`[17, 16, 18, 19, 11],`
			`[9, 15, 14, 12, 11]])`
			`a = association(observed=obs1, method=stat)`
			`assert_allclose(a, expected)`