import numpy as np import pytest import pandas as pd import pandas._testing as tm from pandas.core.arrays import integer_array @pytest.mark.parametrize("ufunc", [np.abs, np.sign]) # np.sign emits a warning with nans, @pytest.mark.filterwarnings("ignore:invalid value encountered in sign") def test_ufuncs_single_int(ufunc): a = integer_array([1, 2, -3, np.nan]) result = ufunc(a) expected = integer_array(ufunc(a.astype(float))) tm.assert_extension_array_equal(result, expected) s = pd.Series(a) result = ufunc(s) expected = pd.Series(integer_array(ufunc(a.astype(float)))) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt]) def test_ufuncs_single_float(ufunc): a = integer_array([1, 2, -3, np.nan]) with np.errstate(invalid="ignore"): result = ufunc(a) expected = ufunc(a.astype(float)) tm.assert_numpy_array_equal(result, expected) s = pd.Series(a) with np.errstate(invalid="ignore"): result = ufunc(s) expected = ufunc(s.astype(float)) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("ufunc", [np.add, np.subtract]) def test_ufuncs_binary_int(ufunc): # two IntegerArrays a = integer_array([1, 2, -3, np.nan]) result = ufunc(a, a) expected = integer_array(ufunc(a.astype(float), a.astype(float))) tm.assert_extension_array_equal(result, expected) # IntegerArray with numpy array arr = np.array([1, 2, 3, 4]) result = ufunc(a, arr) expected = integer_array(ufunc(a.astype(float), arr)) tm.assert_extension_array_equal(result, expected) result = ufunc(arr, a) expected = integer_array(ufunc(arr, a.astype(float))) tm.assert_extension_array_equal(result, expected) # IntegerArray with scalar result = ufunc(a, 1) expected = integer_array(ufunc(a.astype(float), 1)) tm.assert_extension_array_equal(result, expected) result = ufunc(1, a) expected = integer_array(ufunc(1, a.astype(float))) tm.assert_extension_array_equal(result, expected) def test_ufunc_binary_output(): a = integer_array([1, 2, np.nan]) result = np.modf(a) expected = np.modf(a.to_numpy(na_value=np.nan, dtype="float")) assert isinstance(result, tuple) assert len(result) == 2 for x, y in zip(result, expected): # TODO(FloatArray): This will return an extension array. # y = integer_array(y) tm.assert_numpy_array_equal(x, y) @pytest.mark.parametrize("values", [[0, 1], [0, None]]) def test_ufunc_reduce_raises(values): a = integer_array(values) msg = r"The 'reduce' method is not supported." with pytest.raises(NotImplementedError, match=msg): np.add.reduce(a) @pytest.mark.parametrize( "pandasmethname, kwargs", [ ("var", {"ddof": 0}), ("var", {"ddof": 1}), ("kurtosis", {}), ("skew", {}), ("sem", {}), ], ) def test_stat_method(pandasmethname, kwargs): s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64") pandasmeth = getattr(s, pandasmethname) result = pandasmeth(**kwargs) s2 = pd.Series(data=[1, 2, 3, 4, 5, 6], dtype="Int64") pandasmeth = getattr(s2, pandasmethname) expected = pandasmeth(**kwargs) assert expected == result def test_value_counts_na(): arr = pd.array([1, 2, 1, pd.NA], dtype="Int64") result = arr.value_counts(dropna=False) expected = pd.Series([2, 1, 1], index=[1, 2, pd.NA], dtype="Int64") tm.assert_series_equal(result, expected) result = arr.value_counts(dropna=True) expected = pd.Series([2, 1], index=[1, 2], dtype="Int64") tm.assert_series_equal(result, expected) def test_value_counts_empty(): # https://github.com/pandas-dev/pandas/issues/33317 s = pd.Series([], dtype="Int64") result = s.value_counts() # TODO: The dtype of the index seems wrong (it's int64 for non-empty) idx = pd.Index([], dtype="object") expected = pd.Series([], index=idx, dtype="Int64") tm.assert_series_equal(result, expected) def test_value_counts_with_normalize(): # GH 33172 s = pd.Series([1, 2, 1, pd.NA], dtype="Int64") result = s.value_counts(normalize=True) expected = pd.Series([2, 1], index=[1, 2], dtype="Float64") / 3 tm.assert_series_equal(result, expected) @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("min_count", [0, 4]) def test_integer_array_sum(skipna, min_count, any_nullable_int_dtype): dtype = any_nullable_int_dtype arr = pd.array([1, 2, 3, None], dtype=dtype) result = arr.sum(skipna=skipna, min_count=min_count) if skipna and min_count == 0: assert result == 6 else: assert result is pd.NA @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("method", ["min", "max"]) def test_integer_array_min_max(skipna, method, any_nullable_int_dtype): dtype = any_nullable_int_dtype arr = pd.array([0, 1, None], dtype=dtype) func = getattr(arr, method) result = func(skipna=skipna) if skipna: assert result == (0 if method == "min" else 1) else: assert result is pd.NA @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize("min_count", [0, 9]) def test_integer_array_prod(skipna, min_count, any_nullable_int_dtype): dtype = any_nullable_int_dtype arr = pd.array([1, 2, None], dtype=dtype) result = arr.prod(skipna=skipna, min_count=min_count) if skipna and min_count == 0: assert result == 2 else: assert result is pd.NA @pytest.mark.parametrize( "values, expected", [([1, 2, 3], 6), ([1, 2, 3, None], 6), ([None], 0)] ) def test_integer_array_numpy_sum(values, expected): arr = pd.array(values, dtype="Int64") result = np.sum(arr) assert result == expected @pytest.mark.parametrize("op", ["sum", "prod", "min", "max"]) def test_dataframe_reductions(op): # https://github.com/pandas-dev/pandas/pull/32867 # ensure the integers are not cast to float during reductions df = pd.DataFrame({"a": pd.array([1, 2], dtype="Int64")}) result = df.max() assert isinstance(result["a"], np.int64) # TODO(jreback) - these need testing / are broken # shift # set_index (destroys type)