projektAI/venv/Lib/site-packages/pandas/tests/arrays/categorical/test_missing.py

import collections

import numpy as np
import pytest

from pandas.core.dtypes.dtypes import CategoricalDtype

import pandas as pd
from pandas import Categorical, DataFrame, Index, Series, isna
import pandas._testing as tm


class TestCategoricalMissing:
    def test_isna(self):
        exp = np.array([False, False, True])
        cat = Categorical(["a", "b", np.nan])
        res = cat.isna()

        tm.assert_numpy_array_equal(res, exp)

    def test_na_flags_int_categories(self):
        # #1457

        categories = list(range(10))
        labels = np.random.randint(0, 10, 20)
        labels[::5] = -1

        cat = Categorical(labels, categories, fastpath=True)
        repr(cat)

        tm.assert_numpy_array_equal(isna(cat), labels == -1)

    def test_nan_handling(self):

        # Nans are represented as -1 in codes
        c = Categorical(["a", "b", np.nan, "a"])
        tm.assert_index_equal(c.categories, Index(["a", "b"]))
        tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8))
        c[1] = np.nan
        tm.assert_index_equal(c.categories, Index(["a", "b"]))
        tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], dtype=np.int8))

        # Adding nan to categories should make assigned nan point to the
        # category!
        c = Categorical(["a", "b", np.nan, "a"])
        tm.assert_index_equal(c.categories, Index(["a", "b"]))
        tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8))

    def test_set_dtype_nans(self):
        c = Categorical(["a", "b", np.nan])
        result = c._set_dtype(CategoricalDtype(["a", "c"]))
        tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8"))

    def test_set_item_nan(self):
        cat = Categorical([1, 2, 3])
        cat[1] = np.nan

        exp = Categorical([1, np.nan, 3], categories=[1, 2, 3])
        tm.assert_categorical_equal(cat, exp)

    @pytest.mark.parametrize(
        "fillna_kwargs, msg",
        [
            (
                {"value": 1, "method": "ffill"},
                "Cannot specify both 'value' and 'method'.",
            ),
            ({}, "Must specify a fill 'value' or 'method'."),
            ({"method": "bad"}, "Invalid fill method. Expecting .* bad"),
            (
                {"value": Series([1, 2, 3, 4, "a"])},
                "Cannot setitem on a Categorical with a new category",
            ),
        ],
    )
    def test_fillna_raises(self, fillna_kwargs, msg):
        # https://github.com/pandas-dev/pandas/issues/19682
        # https://github.com/pandas-dev/pandas/issues/13628
        cat = Categorical([1, 2, 3, None, None])

        with pytest.raises(ValueError, match=msg):
            cat.fillna(**fillna_kwargs)

    @pytest.mark.parametrize("named", [True, False])
    def test_fillna_iterable_category(self, named):
        # https://github.com/pandas-dev/pandas/issues/21097
        if named:
            Point = collections.namedtuple("Point", "x y")
        else:
            Point = lambda *args: args  # tuple
        cat = Categorical(np.array([Point(0, 0), Point(0, 1), None], dtype=object))
        result = cat.fillna(Point(0, 0))
        expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)])

        tm.assert_categorical_equal(result, expected)

    def test_fillna_array(self):
        # accept Categorical or ndarray value if it holds appropriate values
        cat = Categorical(["A", "B", "C", None, None])

        other = cat.fillna("C")
        result = cat.fillna(other)
        tm.assert_categorical_equal(result, other)
        assert isna(cat[-1])  # didnt modify original inplace

        other = np.array(["A", "B", "C", "B", "A"])
        result = cat.fillna(other)
        expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype)
        tm.assert_categorical_equal(result, expected)
        assert isna(cat[-1])  # didnt modify original inplace

    @pytest.mark.parametrize(
        "values, expected",
        [
            ([1, 2, 3], np.array([False, False, False])),
            ([1, 2, np.nan], np.array([False, False, True])),
            ([1, 2, np.inf], np.array([False, False, True])),
            ([1, 2, pd.NA], np.array([False, False, True])),
        ],
    )
    def test_use_inf_as_na(self, values, expected):
        # https://github.com/pandas-dev/pandas/issues/33594
        with pd.option_context("mode.use_inf_as_na", True):
            cat = Categorical(values)
            result = cat.isna()
            tm.assert_numpy_array_equal(result, expected)

            result = Series(cat).isna()
            expected = Series(expected)
            tm.assert_series_equal(result, expected)

            result = DataFrame(cat).isna()
            expected = DataFrame(expected)
            tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "values, expected",
        [
            ([1, 2, 3], np.array([False, False, False])),
            ([1, 2, np.nan], np.array([False, False, True])),
            ([1, 2, np.inf], np.array([False, False, True])),
            ([1, 2, pd.NA], np.array([False, False, True])),
        ],
    )
    def test_use_inf_as_na_outside_context(self, values, expected):
        # https://github.com/pandas-dev/pandas/issues/33594
        # Using isna directly for Categorical will fail in general here
        cat = Categorical(values)

        with pd.option_context("mode.use_inf_as_na", True):
            result = pd.isna(cat)
            tm.assert_numpy_array_equal(result, expected)

            result = pd.isna(Series(cat))
            expected = Series(expected)
            tm.assert_series_equal(result, expected)

            result = pd.isna(DataFrame(cat))
            expected = DataFrame(expected)
            tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "a1, a2, categories",
        [
            (["a", "b", "c"], [np.nan, "a", "b"], ["a", "b", "c"]),
            ([1, 2, 3], [np.nan, 1, 2], [1, 2, 3]),
        ],
    )
    def test_compare_categorical_with_missing(self, a1, a2, categories):
        # GH 28384
        cat_type = CategoricalDtype(categories)

        # !=
        result = Series(a1, dtype=cat_type) != Series(a2, dtype=cat_type)
        expected = Series(a1) != Series(a2)
        tm.assert_series_equal(result, expected)

        # ==
        result = Series(a1, dtype=cat_type) == Series(a2, dtype=cat_type)
        expected = Series(a1) == Series(a2)
        tm.assert_series_equal(result, expected)
Działa 2021-06-06 22:13:05 +02:00			`import collections`

			`import numpy as np`
			`import pytest`

			`from pandas.core.dtypes.dtypes import CategoricalDtype`

			`import pandas as pd`
			`from pandas import Categorical, DataFrame, Index, Series, isna`
			`import pandas._testing as tm`


			`class TestCategoricalMissing:`
			`def test_isna(self):`
			`exp = np.array([False, False, True])`
			`cat = Categorical(["a", "b", np.nan])`
			`res = cat.isna()`

			`tm.assert_numpy_array_equal(res, exp)`

			`def test_na_flags_int_categories(self):`
			`# #1457`

			`categories = list(range(10))`
			`labels = np.random.randint(0, 10, 20)`
			`labels[::5] = -1`

			`cat = Categorical(labels, categories, fastpath=True)`
			`repr(cat)`

			`tm.assert_numpy_array_equal(isna(cat), labels == -1)`

			`def test_nan_handling(self):`

			`# Nans are represented as -1 in codes`
			`c = Categorical(["a", "b", np.nan, "a"])`
			`tm.assert_index_equal(c.categories, Index(["a", "b"]))`
			`tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8))`
			`c[1] = np.nan`
			`tm.assert_index_equal(c.categories, Index(["a", "b"]))`
			`tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], dtype=np.int8))`

			`# Adding nan to categories should make assigned nan point to the`
			`# category!`
			`c = Categorical(["a", "b", np.nan, "a"])`
			`tm.assert_index_equal(c.categories, Index(["a", "b"]))`
			`tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8))`

			`def test_set_dtype_nans(self):`
			`c = Categorical(["a", "b", np.nan])`
			`result = c._set_dtype(CategoricalDtype(["a", "c"]))`
			`tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8"))`

			`def test_set_item_nan(self):`
			`cat = Categorical([1, 2, 3])`
			`cat[1] = np.nan`

			`exp = Categorical([1, np.nan, 3], categories=[1, 2, 3])`
			`tm.assert_categorical_equal(cat, exp)`

			`@pytest.mark.parametrize(`
			`"fillna_kwargs, msg",`
			`[`
			`(`
			`{"value": 1, "method": "ffill"},`
			`"Cannot specify both 'value' and 'method'.",`
			`),`
			`({}, "Must specify a fill 'value' or 'method'."),`
			`({"method": "bad"}, "Invalid fill method. Expecting .* bad"),`
			`(`
			`{"value": Series([1, 2, 3, 4, "a"])},`
			`"Cannot setitem on a Categorical with a new category",`
			`),`
			`],`
			`)`
			`def test_fillna_raises(self, fillna_kwargs, msg):`
			`# https://github.com/pandas-dev/pandas/issues/19682`
			`# https://github.com/pandas-dev/pandas/issues/13628`
			`cat = Categorical([1, 2, 3, None, None])`

			`with pytest.raises(ValueError, match=msg):`
			`cat.fillna(**fillna_kwargs)`

			`@pytest.mark.parametrize("named", [True, False])`
			`def test_fillna_iterable_category(self, named):`
			`# https://github.com/pandas-dev/pandas/issues/21097`
			`if named:`
			`Point = collections.namedtuple("Point", "x y")`
			`else:`
			`Point = lambda *args: args # tuple`
			`cat = Categorical(np.array([Point(0, 0), Point(0, 1), None], dtype=object))`
			`result = cat.fillna(Point(0, 0))`
			`expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)])`

			`tm.assert_categorical_equal(result, expected)`

			`def test_fillna_array(self):`
			`# accept Categorical or ndarray value if it holds appropriate values`
			`cat = Categorical(["A", "B", "C", None, None])`

			`other = cat.fillna("C")`
			`result = cat.fillna(other)`
			`tm.assert_categorical_equal(result, other)`
			`assert isna(cat[-1]) # didnt modify original inplace`

			`other = np.array(["A", "B", "C", "B", "A"])`
			`result = cat.fillna(other)`
			`expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype)`
			`tm.assert_categorical_equal(result, expected)`
			`assert isna(cat[-1]) # didnt modify original inplace`

			`@pytest.mark.parametrize(`
			`"values, expected",`
			`[`
			`([1, 2, 3], np.array([False, False, False])),`
			`([1, 2, np.nan], np.array([False, False, True])),`
			`([1, 2, np.inf], np.array([False, False, True])),`
			`([1, 2, pd.NA], np.array([False, False, True])),`
			`],`
			`)`
			`def test_use_inf_as_na(self, values, expected):`
			`# https://github.com/pandas-dev/pandas/issues/33594`
			`with pd.option_context("mode.use_inf_as_na", True):`
			`cat = Categorical(values)`
			`result = cat.isna()`
			`tm.assert_numpy_array_equal(result, expected)`

			`result = Series(cat).isna()`
			`expected = Series(expected)`
			`tm.assert_series_equal(result, expected)`

			`result = DataFrame(cat).isna()`
			`expected = DataFrame(expected)`
			`tm.assert_frame_equal(result, expected)`

			`@pytest.mark.parametrize(`
			`"values, expected",`
			`[`
			`([1, 2, 3], np.array([False, False, False])),`
			`([1, 2, np.nan], np.array([False, False, True])),`
			`([1, 2, np.inf], np.array([False, False, True])),`
			`([1, 2, pd.NA], np.array([False, False, True])),`
			`],`
			`)`
			`def test_use_inf_as_na_outside_context(self, values, expected):`
			`# https://github.com/pandas-dev/pandas/issues/33594`
			`# Using isna directly for Categorical will fail in general here`
			`cat = Categorical(values)`

			`with pd.option_context("mode.use_inf_as_na", True):`
			`result = pd.isna(cat)`
			`tm.assert_numpy_array_equal(result, expected)`

			`result = pd.isna(Series(cat))`
			`expected = Series(expected)`
			`tm.assert_series_equal(result, expected)`

			`result = pd.isna(DataFrame(cat))`
			`expected = DataFrame(expected)`
			`tm.assert_frame_equal(result, expected)`

			`@pytest.mark.parametrize(`
			`"a1, a2, categories",`
			`[`
			`(["a", "b", "c"], [np.nan, "a", "b"], ["a", "b", "c"]),`
			`([1, 2, 3], [np.nan, 1, 2], [1, 2, 3]),`
			`],`
			`)`
			`def test_compare_categorical_with_missing(self, a1, a2, categories):`
			`# GH 28384`
			`cat_type = CategoricalDtype(categories)`

			`# !=`
			`result = Series(a1, dtype=cat_type) != Series(a2, dtype=cat_type)`
			`expected = Series(a1) != Series(a2)`
			`tm.assert_series_equal(result, expected)`

			`# ==`
			`result = Series(a1, dtype=cat_type) == Series(a2, dtype=cat_type)`
			`expected = Series(a1) == Series(a2)`
			`tm.assert_series_equal(result, expected)`