import numpy as np import pytest import pandas as pd import pandas._testing as tm from pandas.core.arrays.numpy_ import PandasArray, PandasDtype from . import base @pytest.fixture(params=["float", "object"]) def dtype(request): return PandasDtype(np.dtype(request.param)) @pytest.fixture def allow_in_pandas(monkeypatch): """ A monkeypatch to tells pandas to let us in. By default, passing a PandasArray to an index / series / frame constructor will unbox that PandasArray to an ndarray, and treat it as a non-EA column. We don't want people using EAs without reason. The mechanism for this is a check against ABCPandasArray in each constructor. But, for testing, we need to allow them in pandas. So we patch the _typ of PandasArray, so that we evade the ABCPandasArray check. """ with monkeypatch.context() as m: m.setattr(PandasArray, "_typ", "extension") yield @pytest.fixture def data(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": return pd.Series([(i,) for i in range(100)]).array return PandasArray(np.arange(1, 101, dtype=dtype._dtype)) @pytest.fixture def data_missing(allow_in_pandas, dtype): if dtype.numpy_dtype == "object": return PandasArray(np.array([np.nan, (1,)], dtype=object)) return PandasArray(np.array([np.nan, 1.0])) @pytest.fixture def na_value(): return np.nan @pytest.fixture def na_cmp(): def cmp(a, b): return np.isnan(a) and np.isnan(b) return cmp @pytest.fixture def data_for_sorting(allow_in_pandas, dtype): """Length-3 array with a known sort order. This should be three items [B, C, A] with A < B < C """ if dtype.numpy_dtype == "object": # Use an empty tuple for first element, then remove, # to disable np.array's shape inference. return PandasArray(np.array([(), (2,), (3,), (1,)], dtype=object)[1:]) return PandasArray(np.array([1, 2, 0])) @pytest.fixture def data_missing_for_sorting(allow_in_pandas, dtype): """Length-3 array with a known sort order. This should be three items [B, NA, A] with A < B and NA missing. """ if dtype.numpy_dtype == "object": return PandasArray(np.array([(1,), np.nan, (0,)], dtype=object)) return PandasArray(np.array([1, np.nan, 0])) @pytest.fixture def data_for_grouping(allow_in_pandas, dtype): """Data for factorization, grouping, and unique tests. Expected to be like [B, B, NA, NA, A, A, B, C] Where A < B < C and NA is missing """ if dtype.numpy_dtype == "object": a, b, c = (1,), (2,), (3,) else: a, b, c = np.arange(3) return PandasArray( np.array([b, b, np.nan, np.nan, a, a, b, c], dtype=dtype.numpy_dtype) ) @pytest.fixture def skip_numpy_object(dtype): """ Tests for PandasArray with nested data. Users typically won't create these objects via `pd.array`, but they can show up through `.array` on a Series with nested data. Many of the base tests fail, as they aren't appropriate for nested data. This fixture allows these tests to be skipped when used as a usefixtures marker to either an individual test or a test class. """ if dtype == "object": raise pytest.skip("Skipping for object dtype.") skip_nested = pytest.mark.usefixtures("skip_numpy_object") class BaseNumPyTests: pass class TestCasting(BaseNumPyTests, base.BaseCastingTests): @skip_nested def test_astype_str(self, data): # ValueError: setting an array element with a sequence super().test_astype_str(data) @skip_nested def test_astype_string(self, data): # GH-33465 # ValueError: setting an array element with a sequence super().test_astype_string(data) class TestConstructors(BaseNumPyTests, base.BaseConstructorsTests): @pytest.mark.skip(reason="We don't register our dtype") # We don't want to register. This test should probably be split in two. def test_from_dtype(self, data): pass @skip_nested def test_array_from_scalars(self, data): # ValueError: PandasArray must be 1-dimensional. super().test_array_from_scalars(data) @skip_nested def test_series_constructor_scalar_with_index(self, data, dtype): # ValueError: Length of passed values is 1, index implies 3. super().test_series_constructor_scalar_with_index(data, dtype) class TestDtype(BaseNumPyTests, base.BaseDtypeTests): @pytest.mark.skip(reason="Incorrect expected.") # we unsurprisingly clash with a NumPy name. def test_check_dtype(self, data): pass class TestGetitem(BaseNumPyTests, base.BaseGetitemTests): @skip_nested def test_getitem_scalar(self, data): # AssertionError super().test_getitem_scalar(data) @skip_nested def test_take_series(self, data): # ValueError: PandasArray must be 1-dimensional. super().test_take_series(data) def test_loc_iloc_frame_single_dtype(self, data, request): npdtype = data.dtype.numpy_dtype if npdtype == object: # GH#33125 mark = pytest.mark.xfail( reason="GH#33125 astype doesn't recognize data.dtype" ) request.node.add_marker(mark) super().test_loc_iloc_frame_single_dtype(data) class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests): @skip_nested def test_groupby_extension_apply( self, data_for_grouping, groupby_apply_op, request ): super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) class TestInterface(BaseNumPyTests, base.BaseInterfaceTests): @skip_nested def test_array_interface(self, data): # NumPy array shape inference super().test_array_interface(data) class TestMethods(BaseNumPyTests, base.BaseMethodsTests): @pytest.mark.skip(reason="TODO: remove?") def test_value_counts(self, all_data, dropna): pass @pytest.mark.xfail(reason="not working. will be covered by #32028") def test_value_counts_with_normalize(self, data): return super().test_value_counts_with_normalize(data) @pytest.mark.skip(reason="Incorrect expected") # We have a bool dtype, so the result is an ExtensionArray # but expected is not def test_combine_le(self, data_repeated): super().test_combine_le(data_repeated) @skip_nested def test_combine_add(self, data_repeated): # Not numeric super().test_combine_add(data_repeated) @skip_nested def test_shift_fill_value(self, data): # np.array shape inference. Shift implementation fails. super().test_shift_fill_value(data) @skip_nested @pytest.mark.parametrize("box", [pd.Series, lambda x: x]) @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique]) def test_unique(self, data, box, method): # Fails creating expected super().test_unique(data, box, method) @skip_nested def test_fillna_copy_frame(self, data_missing): # The "scalar" for this array isn't a scalar. super().test_fillna_copy_frame(data_missing) @skip_nested def test_fillna_copy_series(self, data_missing): # The "scalar" for this array isn't a scalar. super().test_fillna_copy_series(data_missing) @skip_nested def test_hash_pandas_object_works(self, data, as_frame): # ndarray of tuples not hashable super().test_hash_pandas_object_works(data, as_frame) @skip_nested def test_searchsorted(self, data_for_sorting, as_series): # Test setup fails. super().test_searchsorted(data_for_sorting, as_series) @skip_nested def test_where_series(self, data, na_value, as_frame): # Test setup fails. super().test_where_series(data, na_value, as_frame) @skip_nested @pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]]) def test_repeat(self, data, repeats, as_series, use_numpy): # Fails creating expected super().test_repeat(data, repeats, as_series, use_numpy) @pytest.mark.xfail(reason="PandasArray.diff may fail on dtype") def test_diff(self, data, periods): return super().test_diff(data, periods) @skip_nested @pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame]) def test_equals(self, data, na_value, as_series, box): # Fails creating with _from_sequence super().test_equals(data, na_value, as_series, box) @skip_nested class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests): divmod_exc = None series_scalar_exc = None frame_scalar_exc = None series_array_exc = None def test_divmod_series_array(self, data): s = pd.Series(data) self._check_divmod_op(s, divmod, data, exc=None) @pytest.mark.skip("We implement ops") def test_error(self, data, all_arithmetic_operators): pass def test_arith_series_with_scalar(self, data, all_arithmetic_operators): super().test_arith_series_with_scalar(data, all_arithmetic_operators) def test_arith_series_with_array(self, data, all_arithmetic_operators): super().test_arith_series_with_array(data, all_arithmetic_operators) class TestPrinting(BaseNumPyTests, base.BasePrintingTests): pass @skip_nested class TestNumericReduce(BaseNumPyTests, base.BaseNumericReduceTests): def check_reduce(self, s, op_name, skipna): result = getattr(s, op_name)(skipna=skipna) # avoid coercing int -> float. Just cast to the actual numpy type. expected = getattr(s.astype(s.dtype._dtype), op_name)(skipna=skipna) tm.assert_almost_equal(result, expected) @skip_nested class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests): pass class TestMissing(BaseNumPyTests, base.BaseMissingTests): @skip_nested def test_fillna_scalar(self, data_missing): # Non-scalar "scalar" values. super().test_fillna_scalar(data_missing) @skip_nested def test_fillna_series_method(self, data_missing, fillna_method): # Non-scalar "scalar" values. super().test_fillna_series_method(data_missing, fillna_method) @skip_nested def test_fillna_series(self, data_missing): # Non-scalar "scalar" values. super().test_fillna_series(data_missing) @skip_nested def test_fillna_frame(self, data_missing): # Non-scalar "scalar" values. super().test_fillna_frame(data_missing) @pytest.mark.skip("Invalid test") def test_fillna_fill_other(self, data): # inplace update doesn't work correctly with patched extension arrays # extract_array returns PandasArray, while dtype is a numpy dtype super().test_fillna_fill_other(data_missing) class TestReshaping(BaseNumPyTests, base.BaseReshapingTests): @pytest.mark.skip("Incorrect parent test") # not actually a mixed concat, since we concat int and int. def test_concat_mixed_dtypes(self, data): super().test_concat_mixed_dtypes(data) @pytest.mark.xfail( reason="GH#33125 PandasArray.astype does not recognize PandasDtype" ) def test_concat(self, data, in_frame): super().test_concat(data, in_frame) @pytest.mark.xfail( reason="GH#33125 PandasArray.astype does not recognize PandasDtype" ) def test_concat_all_na_block(self, data_missing, in_frame): super().test_concat_all_na_block(data_missing, in_frame) @skip_nested def test_merge(self, data, na_value): # Fails creating expected super().test_merge(data, na_value) @skip_nested def test_merge_on_extension_array(self, data): # Fails creating expected super().test_merge_on_extension_array(data) @skip_nested def test_merge_on_extension_array_duplicates(self, data): # Fails creating expected super().test_merge_on_extension_array_duplicates(data) @skip_nested def test_transpose_frame(self, data): super().test_transpose_frame(data) class TestSetitem(BaseNumPyTests, base.BaseSetitemTests): @skip_nested def test_setitem_scalar_series(self, data, box_in_series): # AssertionError super().test_setitem_scalar_series(data, box_in_series) @skip_nested def test_setitem_sequence(self, data, box_in_series): # ValueError: shape mismatch: value array of shape (2,1) could not # be broadcast to indexing result of shape (2,) super().test_setitem_sequence(data, box_in_series) @skip_nested def test_setitem_sequence_mismatched_length_raises(self, data, as_array): # ValueError: PandasArray must be 1-dimensional. super().test_setitem_sequence_mismatched_length_raises(data, as_array) @skip_nested def test_setitem_sequence_broadcasts(self, data, box_in_series): # ValueError: cannot set using a list-like indexer with a different # length than the value super().test_setitem_sequence_broadcasts(data, box_in_series) @skip_nested def test_setitem_loc_scalar_mixed(self, data): # AssertionError super().test_setitem_loc_scalar_mixed(data) @skip_nested def test_setitem_loc_scalar_multiple_homogoneous(self, data): # AssertionError super().test_setitem_loc_scalar_multiple_homogoneous(data) @skip_nested def test_setitem_iloc_scalar_mixed(self, data): # AssertionError super().test_setitem_iloc_scalar_mixed(data) @skip_nested def test_setitem_iloc_scalar_multiple_homogoneous(self, data): # AssertionError super().test_setitem_iloc_scalar_multiple_homogoneous(data) @skip_nested @pytest.mark.parametrize("setter", ["loc", None]) def test_setitem_mask_broadcast(self, data, setter): # ValueError: cannot set using a list-like indexer with a different # length than the value super().test_setitem_mask_broadcast(data, setter) @skip_nested def test_setitem_scalar_key_sequence_raise(self, data): # Failed: DID NOT RAISE super().test_setitem_scalar_key_sequence_raise(data) # TODO: there is some issue with PandasArray, therefore, # skip the setitem test for now, and fix it later (GH 31446) @skip_nested @pytest.mark.parametrize( "mask", [ np.array([True, True, True, False, False]), pd.array([True, True, True, False, False], dtype="boolean"), ], ids=["numpy-array", "boolean-array"], ) def test_setitem_mask(self, data, mask, box_in_series): super().test_setitem_mask(data, mask, box_in_series) @skip_nested def test_setitem_mask_raises(self, data, box_in_series): super().test_setitem_mask_raises(data, box_in_series) @skip_nested @pytest.mark.parametrize( "idx", [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], ids=["list", "integer-array", "numpy-array"], ) def test_setitem_integer_array(self, data, idx, box_in_series): super().test_setitem_integer_array(data, idx, box_in_series) @skip_nested @pytest.mark.parametrize( "idx, box_in_series", [ ([0, 1, 2, pd.NA], False), pytest.param([0, 1, 2, pd.NA], True, marks=pytest.mark.xfail), (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False), ], ids=["list-False", "list-True", "integer-array-False", "integer-array-True"], ) def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series): super().test_setitem_integer_with_missing_raises(data, idx, box_in_series) @skip_nested def test_setitem_slice(self, data, box_in_series): super().test_setitem_slice(data, box_in_series) @skip_nested def test_setitem_loc_iloc_slice(self, data): super().test_setitem_loc_iloc_slice(data) @skip_nested class TestParsing(BaseNumPyTests, base.BaseParsingTests): pass