3RNN/Lib/site-packages/pandas/tests/arrays/categorical/test_missing.py

import collections

import numpy as np
import pytest

from pandas.core.dtypes.dtypes import CategoricalDtype

import pandas as pd
from pandas import (
    Categorical,
    DataFrame,
    Index,
    Series,
    isna,
)
import pandas._testing as tm


class TestCategoricalMissing:
    def test_isna(self):
        exp = np.array([False, False, True])
        cat = Categorical(["a", "b", np.nan])
        res = cat.isna()

        tm.assert_numpy_array_equal(res, exp)

    def test_na_flags_int_categories(self):
        # #1457

        categories = list(range(10))
        labels = np.random.default_rng(2).integers(0, 10, 20)
        labels[::5] = -1

        cat = Categorical(labels, categories)
        repr(cat)

        tm.assert_numpy_array_equal(isna(cat), labels == -1)

    def test_nan_handling(self):
        # Nans are represented as -1 in codes
        c = Categorical(["a", "b", np.nan, "a"])
        tm.assert_index_equal(c.categories, Index(["a", "b"]))
        tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8))
        c[1] = np.nan
        tm.assert_index_equal(c.categories, Index(["a", "b"]))
        tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], dtype=np.int8))

        # Adding nan to categories should make assigned nan point to the
        # category!
        c = Categorical(["a", "b", np.nan, "a"])
        tm.assert_index_equal(c.categories, Index(["a", "b"]))
        tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8))

    def test_set_dtype_nans(self):
        c = Categorical(["a", "b", np.nan])
        result = c._set_dtype(CategoricalDtype(["a", "c"]))
        tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8"))

    def test_set_item_nan(self):
        cat = Categorical([1, 2, 3])
        cat[1] = np.nan

        exp = Categorical([1, np.nan, 3], categories=[1, 2, 3])
        tm.assert_categorical_equal(cat, exp)

    @pytest.mark.parametrize(
        "fillna_kwargs, msg",
        [
            (
                {"value": 1, "method": "ffill"},
                "Cannot specify both 'value' and 'method'.",
            ),
            ({}, "Must specify a fill 'value' or 'method'."),
            ({"method": "bad"}, "Invalid fill method. Expecting .* bad"),
            (
                {"value": Series([1, 2, 3, 4, "a"])},
                "Cannot setitem on a Categorical with a new category",
            ),
        ],
    )
    def test_fillna_raises(self, fillna_kwargs, msg):
        # https://github.com/pandas-dev/pandas/issues/19682
        # https://github.com/pandas-dev/pandas/issues/13628
        cat = Categorical([1, 2, 3, None, None])

        if len(fillna_kwargs) == 1 and "value" in fillna_kwargs:
            err = TypeError
        else:
            err = ValueError

        with pytest.raises(err, match=msg):
            cat.fillna(**fillna_kwargs)

    @pytest.mark.parametrize("named", [True, False])
    def test_fillna_iterable_category(self, named):
        # https://github.com/pandas-dev/pandas/issues/21097
        if named:
            Point = collections.namedtuple("Point", "x y")
        else:
            Point = lambda *args: args  # tuple
        cat = Categorical(np.array([Point(0, 0), Point(0, 1), None], dtype=object))
        result = cat.fillna(Point(0, 0))
        expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)])

        tm.assert_categorical_equal(result, expected)

        # Case where the Point is not among our categories; we want ValueError,
        #  not NotImplementedError GH#41914
        cat = Categorical(np.array([Point(1, 0), Point(0, 1), None], dtype=object))
        msg = "Cannot setitem on a Categorical with a new category"
        with pytest.raises(TypeError, match=msg):
            cat.fillna(Point(0, 0))

    def test_fillna_array(self):
        # accept Categorical or ndarray value if it holds appropriate values
        cat = Categorical(["A", "B", "C", None, None])

        other = cat.fillna("C")
        result = cat.fillna(other)
        tm.assert_categorical_equal(result, other)
        assert isna(cat[-1])  # didn't modify original inplace

        other = np.array(["A", "B", "C", "B", "A"])
        result = cat.fillna(other)
        expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype)
        tm.assert_categorical_equal(result, expected)
        assert isna(cat[-1])  # didn't modify original inplace

    @pytest.mark.parametrize(
        "values, expected",
        [
            ([1, 2, 3], np.array([False, False, False])),
            ([1, 2, np.nan], np.array([False, False, True])),
            ([1, 2, np.inf], np.array([False, False, True])),
            ([1, 2, pd.NA], np.array([False, False, True])),
        ],
    )
    def test_use_inf_as_na(self, values, expected):
        # https://github.com/pandas-dev/pandas/issues/33594
        msg = "use_inf_as_na option is deprecated"
        with tm.assert_produces_warning(FutureWarning, match=msg):
            with pd.option_context("mode.use_inf_as_na", True):
                cat = Categorical(values)
                result = cat.isna()
                tm.assert_numpy_array_equal(result, expected)

                result = Series(cat).isna()
                expected = Series(expected)
                tm.assert_series_equal(result, expected)

                result = DataFrame(cat).isna()
                expected = DataFrame(expected)
                tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "values, expected",
        [
            ([1, 2, 3], np.array([False, False, False])),
            ([1, 2, np.nan], np.array([False, False, True])),
            ([1, 2, np.inf], np.array([False, False, True])),
            ([1, 2, pd.NA], np.array([False, False, True])),
        ],
    )
    def test_use_inf_as_na_outside_context(self, values, expected):
        # https://github.com/pandas-dev/pandas/issues/33594
        # Using isna directly for Categorical will fail in general here
        cat = Categorical(values)

        msg = "use_inf_as_na option is deprecated"
        with tm.assert_produces_warning(FutureWarning, match=msg):
            with pd.option_context("mode.use_inf_as_na", True):
                result = isna(cat)
                tm.assert_numpy_array_equal(result, expected)

                result = isna(Series(cat))
                expected = Series(expected)
                tm.assert_series_equal(result, expected)

                result = isna(DataFrame(cat))
                expected = DataFrame(expected)
                tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "a1, a2, categories",
        [
            (["a", "b", "c"], [np.nan, "a", "b"], ["a", "b", "c"]),
            ([1, 2, 3], [np.nan, 1, 2], [1, 2, 3]),
        ],
    )
    def test_compare_categorical_with_missing(self, a1, a2, categories):
        # GH 28384
        cat_type = CategoricalDtype(categories)

        # !=
        result = Series(a1, dtype=cat_type) != Series(a2, dtype=cat_type)
        expected = Series(a1) != Series(a2)
        tm.assert_series_equal(result, expected)

        # ==
        result = Series(a1, dtype=cat_type) == Series(a2, dtype=cat_type)
        expected = Series(a1) == Series(a2)
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        "na_value, dtype",
        [
            (pd.NaT, "datetime64[ns]"),
            (None, "float64"),
            (np.nan, "float64"),
            (pd.NA, "float64"),
        ],
    )
    def test_categorical_only_missing_values_no_cast(self, na_value, dtype):
        # GH#44900
        result = Categorical([na_value, na_value])
        tm.assert_index_equal(result.categories, Index([], dtype=dtype))
1.0 2024-05-26 19:49:15 +02:00			`import collections`

			`import numpy as np`
			`import pytest`

			`from pandas.core.dtypes.dtypes import CategoricalDtype`

			`import pandas as pd`
			`from pandas import (`
			`Categorical,`
			`DataFrame,`
			`Index,`
			`Series,`
			`isna,`
			`)`
			`import pandas._testing as tm`


			`class TestCategoricalMissing:`
			`def test_isna(self):`
			`exp = np.array([False, False, True])`
			`cat = Categorical(["a", "b", np.nan])`
			`res = cat.isna()`

			`tm.assert_numpy_array_equal(res, exp)`

			`def test_na_flags_int_categories(self):`
			`# #1457`

			`categories = list(range(10))`
			`labels = np.random.default_rng(2).integers(0, 10, 20)`
			`labels[::5] = -1`

			`cat = Categorical(labels, categories)`
			`repr(cat)`

			`tm.assert_numpy_array_equal(isna(cat), labels == -1)`

			`def test_nan_handling(self):`
			`# Nans are represented as -1 in codes`
			`c = Categorical(["a", "b", np.nan, "a"])`
			`tm.assert_index_equal(c.categories, Index(["a", "b"]))`
			`tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8))`
			`c[1] = np.nan`
			`tm.assert_index_equal(c.categories, Index(["a", "b"]))`
			`tm.assert_numpy_array_equal(c._codes, np.array([0, -1, -1, 0], dtype=np.int8))`

			`# Adding nan to categories should make assigned nan point to the`
			`# category!`
			`c = Categorical(["a", "b", np.nan, "a"])`
			`tm.assert_index_equal(c.categories, Index(["a", "b"]))`
			`tm.assert_numpy_array_equal(c._codes, np.array([0, 1, -1, 0], dtype=np.int8))`

			`def test_set_dtype_nans(self):`
			`c = Categorical(["a", "b", np.nan])`
			`result = c._set_dtype(CategoricalDtype(["a", "c"]))`
			`tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8"))`

			`def test_set_item_nan(self):`
			`cat = Categorical([1, 2, 3])`
			`cat[1] = np.nan`

			`exp = Categorical([1, np.nan, 3], categories=[1, 2, 3])`
			`tm.assert_categorical_equal(cat, exp)`

			`@pytest.mark.parametrize(`
			`"fillna_kwargs, msg",`
			`[`
			`(`
			`{"value": 1, "method": "ffill"},`
			`"Cannot specify both 'value' and 'method'.",`
			`),`
			`({}, "Must specify a fill 'value' or 'method'."),`
			`({"method": "bad"}, "Invalid fill method. Expecting .* bad"),`
			`(`
			`{"value": Series([1, 2, 3, 4, "a"])},`
			`"Cannot setitem on a Categorical with a new category",`
			`),`
			`],`
			`)`
			`def test_fillna_raises(self, fillna_kwargs, msg):`
			`# https://github.com/pandas-dev/pandas/issues/19682`
			`# https://github.com/pandas-dev/pandas/issues/13628`
			`cat = Categorical([1, 2, 3, None, None])`

			`if len(fillna_kwargs) == 1 and "value" in fillna_kwargs:`
			`err = TypeError`
			`else:`
			`err = ValueError`

			`with pytest.raises(err, match=msg):`
			`cat.fillna(**fillna_kwargs)`

			`@pytest.mark.parametrize("named", [True, False])`
			`def test_fillna_iterable_category(self, named):`
			`# https://github.com/pandas-dev/pandas/issues/21097`
			`if named:`
			`Point = collections.namedtuple("Point", "x y")`
			`else:`
			`Point = lambda *args: args # tuple`
			`cat = Categorical(np.array([Point(0, 0), Point(0, 1), None], dtype=object))`
			`result = cat.fillna(Point(0, 0))`
			`expected = Categorical([Point(0, 0), Point(0, 1), Point(0, 0)])`

			`tm.assert_categorical_equal(result, expected)`

			`# Case where the Point is not among our categories; we want ValueError,`
			`# not NotImplementedError GH#41914`
			`cat = Categorical(np.array([Point(1, 0), Point(0, 1), None], dtype=object))`
			`msg = "Cannot setitem on a Categorical with a new category"`
			`with pytest.raises(TypeError, match=msg):`
			`cat.fillna(Point(0, 0))`

			`def test_fillna_array(self):`
			`# accept Categorical or ndarray value if it holds appropriate values`
			`cat = Categorical(["A", "B", "C", None, None])`

			`other = cat.fillna("C")`
			`result = cat.fillna(other)`
			`tm.assert_categorical_equal(result, other)`
			`assert isna(cat[-1]) # didn't modify original inplace`

			`other = np.array(["A", "B", "C", "B", "A"])`
			`result = cat.fillna(other)`
			`expected = Categorical(["A", "B", "C", "B", "A"], dtype=cat.dtype)`
			`tm.assert_categorical_equal(result, expected)`
			`assert isna(cat[-1]) # didn't modify original inplace`

			`@pytest.mark.parametrize(`
			`"values, expected",`
			`[`
			`([1, 2, 3], np.array([False, False, False])),`
			`([1, 2, np.nan], np.array([False, False, True])),`
			`([1, 2, np.inf], np.array([False, False, True])),`
			`([1, 2, pd.NA], np.array([False, False, True])),`
			`],`
			`)`
			`def test_use_inf_as_na(self, values, expected):`
			`# https://github.com/pandas-dev/pandas/issues/33594`
			`msg = "use_inf_as_na option is deprecated"`
			`with tm.assert_produces_warning(FutureWarning, match=msg):`
			`with pd.option_context("mode.use_inf_as_na", True):`
			`cat = Categorical(values)`
			`result = cat.isna()`
			`tm.assert_numpy_array_equal(result, expected)`

			`result = Series(cat).isna()`
			`expected = Series(expected)`
			`tm.assert_series_equal(result, expected)`

			`result = DataFrame(cat).isna()`
			`expected = DataFrame(expected)`
			`tm.assert_frame_equal(result, expected)`

			`@pytest.mark.parametrize(`
			`"values, expected",`
			`[`
			`([1, 2, 3], np.array([False, False, False])),`
			`([1, 2, np.nan], np.array([False, False, True])),`
			`([1, 2, np.inf], np.array([False, False, True])),`
			`([1, 2, pd.NA], np.array([False, False, True])),`
			`],`
			`)`
			`def test_use_inf_as_na_outside_context(self, values, expected):`
			`# https://github.com/pandas-dev/pandas/issues/33594`
			`# Using isna directly for Categorical will fail in general here`
			`cat = Categorical(values)`

			`msg = "use_inf_as_na option is deprecated"`
			`with tm.assert_produces_warning(FutureWarning, match=msg):`
			`with pd.option_context("mode.use_inf_as_na", True):`
			`result = isna(cat)`
			`tm.assert_numpy_array_equal(result, expected)`

			`result = isna(Series(cat))`
			`expected = Series(expected)`
			`tm.assert_series_equal(result, expected)`

			`result = isna(DataFrame(cat))`
			`expected = DataFrame(expected)`
			`tm.assert_frame_equal(result, expected)`

			`@pytest.mark.parametrize(`
			`"a1, a2, categories",`
			`[`
			`(["a", "b", "c"], [np.nan, "a", "b"], ["a", "b", "c"]),`
			`([1, 2, 3], [np.nan, 1, 2], [1, 2, 3]),`
			`],`
			`)`
			`def test_compare_categorical_with_missing(self, a1, a2, categories):`
			`# GH 28384`
			`cat_type = CategoricalDtype(categories)`

			`# !=`
			`result = Series(a1, dtype=cat_type) != Series(a2, dtype=cat_type)`
			`expected = Series(a1) != Series(a2)`
			`tm.assert_series_equal(result, expected)`

			`# ==`
			`result = Series(a1, dtype=cat_type) == Series(a2, dtype=cat_type)`
			`expected = Series(a1) == Series(a2)`
			`tm.assert_series_equal(result, expected)`

			`@pytest.mark.parametrize(`
			`"na_value, dtype",`
			`[`
			`(pd.NaT, "datetime64[ns]"),`
			`(None, "float64"),`
			`(np.nan, "float64"),`
			`(pd.NA, "float64"),`
			`],`
			`)`
			`def test_categorical_only_missing_values_no_cast(self, na_value, dtype):`
			`# GH#44900`
			`result = Categorical([na_value, na_value])`
			`tm.assert_index_equal(result.categories, Index([], dtype=dtype))`