from datetime import ( datetime, timedelta, ) import numpy as np import pytest import pandas as pd from pandas import ( Categorical, DataFrame, DatetimeIndex, Index, NaT, Period, PeriodIndex, RangeIndex, Series, Timedelta, TimedeltaIndex, Timestamp, date_range, isna, timedelta_range, to_timedelta, ) import pandas._testing as tm from pandas.core import nanops def get_objs(): indexes = [ tm.makeBoolIndex(10, name="a"), tm.makeIntIndex(10, name="a"), tm.makeFloatIndex(10, name="a"), tm.makeDateIndex(10, name="a"), tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern"), tm.makePeriodIndex(10, name="a"), tm.makeStringIndex(10, name="a"), ] arr = np.random.randn(10) series = [Series(arr, index=idx, name="a") for idx in indexes] objs = indexes + series return objs objs = get_objs() class TestReductions: @pytest.mark.parametrize("opname", ["max", "min"]) @pytest.mark.parametrize("obj", objs) def test_ops(self, opname, obj): result = getattr(obj, opname)() if not isinstance(obj, PeriodIndex): expected = getattr(obj.values, opname)() else: expected = Period(ordinal=getattr(obj.asi8, opname)(), freq=obj.freq) if getattr(obj, "tz", None) is not None: # We need to de-localize before comparing to the numpy-produced result expected = expected.astype("M8[ns]").astype("int64") assert result._value == expected else: assert result == expected @pytest.mark.parametrize("opname", ["max", "min"]) @pytest.mark.parametrize( "dtype, val", [ ("object", 2.0), ("float64", 2.0), ("datetime64[ns]", datetime(2011, 11, 1)), ("Int64", 2), ("boolean", True), ], ) def test_nanminmax(self, opname, dtype, val, index_or_series): # GH#7261 klass = index_or_series def check_missing(res): if dtype == "datetime64[ns]": return res is NaT elif dtype in ["Int64", "boolean"]: return res is pd.NA else: return isna(res) obj = klass([None], dtype=dtype) assert check_missing(getattr(obj, opname)()) assert check_missing(getattr(obj, opname)(skipna=False)) obj = klass([], dtype=dtype) assert check_missing(getattr(obj, opname)()) assert check_missing(getattr(obj, opname)(skipna=False)) if dtype == "object": # generic test with object only works for empty / all NaN return obj = klass([None, val], dtype=dtype) assert getattr(obj, opname)() == val assert check_missing(getattr(obj, opname)(skipna=False)) obj = klass([None, val, None], dtype=dtype) assert getattr(obj, opname)() == val assert check_missing(getattr(obj, opname)(skipna=False)) @pytest.mark.parametrize("opname", ["max", "min"]) def test_nanargminmax(self, opname, index_or_series): # GH#7261 klass = index_or_series arg_op = "arg" + opname if klass is Index else "idx" + opname obj = klass([NaT, datetime(2011, 11, 1)]) assert getattr(obj, arg_op)() == 1 result = getattr(obj, arg_op)(skipna=False) if klass is Series: assert np.isnan(result) else: assert result == -1 obj = klass([NaT, datetime(2011, 11, 1), NaT]) # check DatetimeIndex non-monotonic path assert getattr(obj, arg_op)() == 1 result = getattr(obj, arg_op)(skipna=False) if klass is Series: assert np.isnan(result) else: assert result == -1 @pytest.mark.parametrize("opname", ["max", "min"]) @pytest.mark.parametrize("dtype", ["M8[ns]", "datetime64[ns, UTC]"]) def test_nanops_empty_object(self, opname, index_or_series, dtype): klass = index_or_series arg_op = "arg" + opname if klass is Index else "idx" + opname obj = klass([], dtype=dtype) assert getattr(obj, opname)() is NaT assert getattr(obj, opname)(skipna=False) is NaT with pytest.raises(ValueError, match="empty sequence"): getattr(obj, arg_op)() with pytest.raises(ValueError, match="empty sequence"): getattr(obj, arg_op)(skipna=False) def test_argminmax(self): obj = Index(np.arange(5, dtype="int64")) assert obj.argmin() == 0 assert obj.argmax() == 4 obj = Index([np.nan, 1, np.nan, 2]) assert obj.argmin() == 1 assert obj.argmax() == 3 assert obj.argmin(skipna=False) == -1 assert obj.argmax(skipna=False) == -1 obj = Index([np.nan]) assert obj.argmin() == -1 assert obj.argmax() == -1 assert obj.argmin(skipna=False) == -1 assert obj.argmax(skipna=False) == -1 obj = Index([NaT, datetime(2011, 11, 1), datetime(2011, 11, 2), NaT]) assert obj.argmin() == 1 assert obj.argmax() == 2 assert obj.argmin(skipna=False) == -1 assert obj.argmax(skipna=False) == -1 obj = Index([NaT]) assert obj.argmin() == -1 assert obj.argmax() == -1 assert obj.argmin(skipna=False) == -1 assert obj.argmax(skipna=False) == -1 @pytest.mark.parametrize("op, expected_col", [["max", "a"], ["min", "b"]]) def test_same_tz_min_max_axis_1(self, op, expected_col): # GH 10390 df = DataFrame( date_range("2016-01-01 00:00:00", periods=3, tz="UTC"), columns=["a"] ) df["b"] = df.a.subtract(Timedelta(seconds=3600)) result = getattr(df, op)(axis=1) expected = df[expected_col].rename(None) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("func", ["maximum", "minimum"]) def test_numpy_reduction_with_tz_aware_dtype(self, tz_aware_fixture, func): # GH 15552 tz = tz_aware_fixture arg = pd.to_datetime(["2019"]).tz_localize(tz) expected = Series(arg) result = getattr(np, func)(expected, expected) tm.assert_series_equal(result, expected) def test_nan_int_timedelta_sum(self): # GH 27185 df = DataFrame( { "A": Series([1, 2, NaT], dtype="timedelta64[ns]"), "B": Series([1, 2, np.nan], dtype="Int64"), } ) expected = Series({"A": Timedelta(3), "B": 3}) result = df.sum() tm.assert_series_equal(result, expected) class TestIndexReductions: # Note: the name TestIndexReductions indicates these tests # were moved from a Index-specific test file, _not_ that these tests are # intended long-term to be Index-specific @pytest.mark.parametrize( "start,stop,step", [ (0, 400, 3), (500, 0, -6), (-(10**6), 10**6, 4), (10**6, -(10**6), -4), (0, 10, 20), ], ) def test_max_min_range(self, start, stop, step): # GH#17607 idx = RangeIndex(start, stop, step) expected = idx._values.max() result = idx.max() assert result == expected # skipna should be irrelevant since RangeIndex should never have NAs result2 = idx.max(skipna=False) assert result2 == expected expected = idx._values.min() result = idx.min() assert result == expected # skipna should be irrelevant since RangeIndex should never have NAs result2 = idx.min(skipna=False) assert result2 == expected # empty idx = RangeIndex(start, stop, -step) assert isna(idx.max()) assert isna(idx.min()) def test_minmax_timedelta64(self): # monotonic idx1 = TimedeltaIndex(["1 days", "2 days", "3 days"]) assert idx1.is_monotonic_increasing # non-monotonic idx2 = TimedeltaIndex(["1 days", np.nan, "3 days", "NaT"]) assert not idx2.is_monotonic_increasing for idx in [idx1, idx2]: assert idx.min() == Timedelta("1 days") assert idx.max() == Timedelta("3 days") assert idx.argmin() == 0 assert idx.argmax() == 2 @pytest.mark.parametrize("op", ["min", "max"]) def test_minmax_timedelta_empty_or_na(self, op): # Return NaT obj = TimedeltaIndex([]) assert getattr(obj, op)() is NaT obj = TimedeltaIndex([NaT]) assert getattr(obj, op)() is NaT obj = TimedeltaIndex([NaT, NaT, NaT]) assert getattr(obj, op)() is NaT def test_numpy_minmax_timedelta64(self): td = timedelta_range("16815 days", "16820 days", freq="D") assert np.min(td) == Timedelta("16815 days") assert np.max(td) == Timedelta("16820 days") errmsg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=errmsg): np.min(td, out=0) with pytest.raises(ValueError, match=errmsg): np.max(td, out=0) assert np.argmin(td) == 0 assert np.argmax(td) == 5 errmsg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=errmsg): np.argmin(td, out=0) with pytest.raises(ValueError, match=errmsg): np.argmax(td, out=0) def test_timedelta_ops(self): # GH#4984 # make sure ops return Timedelta s = Series( [Timestamp("20130101") + timedelta(seconds=i * i) for i in range(10)] ) td = s.diff() result = td.mean() expected = to_timedelta(timedelta(seconds=9)) assert result == expected result = td.to_frame().mean() assert result[0] == expected result = td.quantile(0.1) expected = Timedelta(np.timedelta64(2600, "ms")) assert result == expected result = td.median() expected = to_timedelta("00:00:09") assert result == expected result = td.to_frame().median() assert result[0] == expected # GH#6462 # consistency in returned values for sum result = td.sum() expected = to_timedelta("00:01:21") assert result == expected result = td.to_frame().sum() assert result[0] == expected # std result = td.std() expected = to_timedelta(Series(td.dropna().values).std()) assert result == expected result = td.to_frame().std() assert result[0] == expected # GH#10040 # make sure NaT is properly handled by median() s = Series([Timestamp("2015-02-03"), Timestamp("2015-02-07")]) assert s.diff().median() == timedelta(days=4) s = Series( [Timestamp("2015-02-03"), Timestamp("2015-02-07"), Timestamp("2015-02-15")] ) assert s.diff().median() == timedelta(days=6) @pytest.mark.parametrize("opname", ["skew", "kurt", "sem", "prod", "var"]) def test_invalid_td64_reductions(self, opname): s = Series( [Timestamp("20130101") + timedelta(seconds=i * i) for i in range(10)] ) td = s.diff() msg = "|".join( [ f"reduction operation '{opname}' not allowed for this dtype", rf"cannot perform {opname} with type timedelta64\[ns\]", f"does not support reduction '{opname}'", ] ) with pytest.raises(TypeError, match=msg): getattr(td, opname)() with pytest.raises(TypeError, match=msg): getattr(td.to_frame(), opname)(numeric_only=False) def test_minmax_tz(self, tz_naive_fixture): tz = tz_naive_fixture # monotonic idx1 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], tz=tz) assert idx1.is_monotonic_increasing # non-monotonic idx2 = DatetimeIndex( ["2011-01-01", NaT, "2011-01-03", "2011-01-02", NaT], tz=tz ) assert not idx2.is_monotonic_increasing for idx in [idx1, idx2]: assert idx.min() == Timestamp("2011-01-01", tz=tz) assert idx.max() == Timestamp("2011-01-03", tz=tz) assert idx.argmin() == 0 assert idx.argmax() == 2 @pytest.mark.parametrize("op", ["min", "max"]) def test_minmax_nat_datetime64(self, op): # Return NaT obj = DatetimeIndex([]) assert isna(getattr(obj, op)()) obj = DatetimeIndex([NaT]) assert isna(getattr(obj, op)()) obj = DatetimeIndex([NaT, NaT, NaT]) assert isna(getattr(obj, op)()) def test_numpy_minmax_integer(self): # GH#26125 idx = Index([1, 2, 3]) expected = idx.values.max() result = np.max(idx) assert result == expected expected = idx.values.min() result = np.min(idx) assert result == expected errmsg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=errmsg): np.min(idx, out=0) with pytest.raises(ValueError, match=errmsg): np.max(idx, out=0) expected = idx.values.argmax() result = np.argmax(idx) assert result == expected expected = idx.values.argmin() result = np.argmin(idx) assert result == expected errmsg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=errmsg): np.argmin(idx, out=0) with pytest.raises(ValueError, match=errmsg): np.argmax(idx, out=0) def test_numpy_minmax_range(self): # GH#26125 idx = RangeIndex(0, 10, 3) result = np.max(idx) assert result == 9 result = np.min(idx) assert result == 0 errmsg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=errmsg): np.min(idx, out=0) with pytest.raises(ValueError, match=errmsg): np.max(idx, out=0) # No need to test again argmax/argmin compat since the implementation # is the same as basic integer index def test_numpy_minmax_datetime64(self): dr = date_range(start="2016-01-15", end="2016-01-20") assert np.min(dr) == Timestamp("2016-01-15 00:00:00") assert np.max(dr) == Timestamp("2016-01-20 00:00:00") errmsg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=errmsg): np.min(dr, out=0) with pytest.raises(ValueError, match=errmsg): np.max(dr, out=0) assert np.argmin(dr) == 0 assert np.argmax(dr) == 5 errmsg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=errmsg): np.argmin(dr, out=0) with pytest.raises(ValueError, match=errmsg): np.argmax(dr, out=0) def test_minmax_period(self): # monotonic idx1 = PeriodIndex([NaT, "2011-01-01", "2011-01-02", "2011-01-03"], freq="D") assert not idx1.is_monotonic_increasing assert idx1[1:].is_monotonic_increasing # non-monotonic idx2 = PeriodIndex( ["2011-01-01", NaT, "2011-01-03", "2011-01-02", NaT], freq="D" ) assert not idx2.is_monotonic_increasing for idx in [idx1, idx2]: assert idx.min() == Period("2011-01-01", freq="D") assert idx.max() == Period("2011-01-03", freq="D") assert idx1.argmin() == 1 assert idx2.argmin() == 0 assert idx1.argmax() == 3 assert idx2.argmax() == 2 @pytest.mark.parametrize("op", ["min", "max"]) @pytest.mark.parametrize("data", [[], [NaT], [NaT, NaT, NaT]]) def test_minmax_period_empty_nat(self, op, data): # Return NaT obj = PeriodIndex(data, freq="M") result = getattr(obj, op)() assert result is NaT def test_numpy_minmax_period(self): pr = pd.period_range(start="2016-01-15", end="2016-01-20") assert np.min(pr) == Period("2016-01-15", freq="D") assert np.max(pr) == Period("2016-01-20", freq="D") errmsg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=errmsg): np.min(pr, out=0) with pytest.raises(ValueError, match=errmsg): np.max(pr, out=0) assert np.argmin(pr) == 0 assert np.argmax(pr) == 5 errmsg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=errmsg): np.argmin(pr, out=0) with pytest.raises(ValueError, match=errmsg): np.argmax(pr, out=0) def test_min_max_categorical(self): ci = pd.CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) msg = ( r"Categorical is not ordered for operation min\n" r"you can use .as_ordered\(\) to change the Categorical to an ordered one\n" ) with pytest.raises(TypeError, match=msg): ci.min() msg = ( r"Categorical is not ordered for operation max\n" r"you can use .as_ordered\(\) to change the Categorical to an ordered one\n" ) with pytest.raises(TypeError, match=msg): ci.max() ci = pd.CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=True) assert ci.min() == "c" assert ci.max() == "b" class TestSeriesReductions: # Note: the name TestSeriesReductions indicates these tests # were moved from a series-specific test file, _not_ that these tests are # intended long-term to be series-specific def test_sum_inf(self): s = Series(np.random.randn(10)) s2 = s.copy() s[5:8] = np.inf s2[5:8] = np.nan assert np.isinf(s.sum()) arr = np.random.randn(100, 100).astype("f4") arr[:, 2] = np.inf with pd.option_context("mode.use_inf_as_na", True): tm.assert_almost_equal(s.sum(), s2.sum()) res = nanops.nansum(arr, axis=1) assert np.isinf(res).all() @pytest.mark.parametrize( "dtype", ["float64", "Float32", "Int64", "boolean", "object"] ) @pytest.mark.parametrize("use_bottleneck", [True, False]) @pytest.mark.parametrize("method, unit", [("sum", 0.0), ("prod", 1.0)]) def test_empty(self, method, unit, use_bottleneck, dtype): with pd.option_context("use_bottleneck", use_bottleneck): # GH#9422 / GH#18921 # Entirely empty s = Series([], dtype=dtype) # NA by default result = getattr(s, method)() assert result == unit # Explicit result = getattr(s, method)(min_count=0) assert result == unit result = getattr(s, method)(min_count=1) assert isna(result) # Skipna, default result = getattr(s, method)(skipna=True) result == unit # Skipna, explicit result = getattr(s, method)(skipna=True, min_count=0) assert result == unit result = getattr(s, method)(skipna=True, min_count=1) assert isna(result) result = getattr(s, method)(skipna=False, min_count=0) assert result == unit result = getattr(s, method)(skipna=False, min_count=1) assert isna(result) # All-NA s = Series([np.nan], dtype=dtype) # NA by default result = getattr(s, method)() assert result == unit # Explicit result = getattr(s, method)(min_count=0) assert result == unit result = getattr(s, method)(min_count=1) assert isna(result) # Skipna, default result = getattr(s, method)(skipna=True) result == unit # skipna, explicit result = getattr(s, method)(skipna=True, min_count=0) assert result == unit result = getattr(s, method)(skipna=True, min_count=1) assert isna(result) # Mix of valid, empty s = Series([np.nan, 1], dtype=dtype) # Default result = getattr(s, method)() assert result == 1.0 # Explicit result = getattr(s, method)(min_count=0) assert result == 1.0 result = getattr(s, method)(min_count=1) assert result == 1.0 # Skipna result = getattr(s, method)(skipna=True) assert result == 1.0 result = getattr(s, method)(skipna=True, min_count=0) assert result == 1.0 # GH#844 (changed in GH#9422) df = DataFrame(np.empty((10, 0)), dtype=dtype) assert (getattr(df, method)(1) == unit).all() s = Series([1], dtype=dtype) result = getattr(s, method)(min_count=2) assert isna(result) result = getattr(s, method)(skipna=False, min_count=2) assert isna(result) s = Series([np.nan], dtype=dtype) result = getattr(s, method)(min_count=2) assert isna(result) s = Series([np.nan, 1], dtype=dtype) result = getattr(s, method)(min_count=2) assert isna(result) @pytest.mark.parametrize("method", ["mean", "var"]) @pytest.mark.parametrize("dtype", ["Float64", "Int64", "boolean"]) def test_ops_consistency_on_empty_nullable(self, method, dtype): # GH#34814 # consistency for nullable dtypes on empty or ALL-NA mean # empty series eser = Series([], dtype=dtype) result = getattr(eser, method)() assert result is pd.NA # ALL-NA series nser = Series([np.nan], dtype=dtype) result = getattr(nser, method)() assert result is pd.NA @pytest.mark.parametrize("method", ["mean", "median", "std", "var"]) def test_ops_consistency_on_empty(self, method): # GH#7869 # consistency on empty # float result = getattr(Series(dtype=float), method)() assert isna(result) # timedelta64[ns] tdser = Series([], dtype="m8[ns]") if method == "var": msg = "|".join( [ "operation 'var' not allowed", r"cannot perform var with type timedelta64\[ns\]", "does not support reduction 'var'", ] ) with pytest.raises(TypeError, match=msg): getattr(tdser, method)() else: result = getattr(tdser, method)() assert result is NaT def test_nansum_buglet(self): ser = Series([1.0, np.nan], index=[0, 1]) result = np.nansum(ser) tm.assert_almost_equal(result, 1) @pytest.mark.parametrize("use_bottleneck", [True, False]) @pytest.mark.parametrize("dtype", ["int32", "int64"]) def test_sum_overflow_int(self, use_bottleneck, dtype): with pd.option_context("use_bottleneck", use_bottleneck): # GH#6915 # overflowing on the smaller int dtypes v = np.arange(5000000, dtype=dtype) s = Series(v) result = s.sum(skipna=False) assert int(result) == v.sum(dtype="int64") result = s.min(skipna=False) assert int(result) == 0 result = s.max(skipna=False) assert int(result) == v[-1] @pytest.mark.parametrize("use_bottleneck", [True, False]) @pytest.mark.parametrize("dtype", ["float32", "float64"]) def test_sum_overflow_float(self, use_bottleneck, dtype): with pd.option_context("use_bottleneck", use_bottleneck): v = np.arange(5000000, dtype=dtype) s = Series(v) result = s.sum(skipna=False) assert result == v.sum(dtype=dtype) result = s.min(skipna=False) assert np.allclose(float(result), 0.0) result = s.max(skipna=False) assert np.allclose(float(result), v[-1]) def test_mean_masked_overflow(self): # GH#48378 val = 100_000_000_000_000_000 n_elements = 100 na = np.array([val] * n_elements) ser = Series([val] * n_elements, dtype="Int64") result_numpy = np.mean(na) result_masked = ser.mean() assert result_masked - result_numpy == 0 assert result_masked == 1e17 @pytest.mark.parametrize("ddof, exp", [(1, 2.5), (0, 2.0)]) def test_var_masked_array(self, ddof, exp): # GH#48379 ser = Series([1, 2, 3, 4, 5], dtype="Int64") ser_numpy_dtype = Series([1, 2, 3, 4, 5], dtype="int64") result = ser.var(ddof=ddof) result_numpy_dtype = ser_numpy_dtype.var(ddof=ddof) assert result == result_numpy_dtype assert result == exp @pytest.mark.parametrize("dtype", ("m8[ns]", "m8[ns]", "M8[ns]", "M8[ns, UTC]")) @pytest.mark.parametrize("skipna", [True, False]) def test_empty_timeseries_reductions_return_nat(self, dtype, skipna): # covers GH#11245 assert Series([], dtype=dtype).min(skipna=skipna) is NaT assert Series([], dtype=dtype).max(skipna=skipna) is NaT def test_numpy_argmin(self): # See GH#16830 data = np.arange(1, 11) s = Series(data, index=data) result = np.argmin(s) expected = np.argmin(data) assert result == expected result = s.argmin() assert result == expected msg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=msg): np.argmin(s, out=data) def test_numpy_argmax(self): # See GH#16830 data = np.arange(1, 11) s = Series(data, index=data) result = np.argmax(s) expected = np.argmax(data) assert result == expected result = s.argmax() assert result == expected msg = "the 'out' parameter is not supported" with pytest.raises(ValueError, match=msg): np.argmax(s, out=data) def test_idxmin(self): # test idxmin # _check_stat_op approach can not be used here because of isna check. string_series = tm.makeStringSeries().rename("series") # add some NaNs string_series[5:15] = np.NaN # skipna or no assert string_series[string_series.idxmin()] == string_series.min() assert isna(string_series.idxmin(skipna=False)) # no NaNs nona = string_series.dropna() assert nona[nona.idxmin()] == nona.min() assert nona.index.values.tolist().index(nona.idxmin()) == nona.values.argmin() # all NaNs allna = string_series * np.nan assert isna(allna.idxmin()) # datetime64[ns] s = Series(date_range("20130102", periods=6)) result = s.idxmin() assert result == 0 s[0] = np.nan result = s.idxmin() assert result == 1 def test_idxmax(self): # test idxmax # _check_stat_op approach can not be used here because of isna check. string_series = tm.makeStringSeries().rename("series") # add some NaNs string_series[5:15] = np.NaN # skipna or no assert string_series[string_series.idxmax()] == string_series.max() assert isna(string_series.idxmax(skipna=False)) # no NaNs nona = string_series.dropna() assert nona[nona.idxmax()] == nona.max() assert nona.index.values.tolist().index(nona.idxmax()) == nona.values.argmax() # all NaNs allna = string_series * np.nan assert isna(allna.idxmax()) s = Series(date_range("20130102", periods=6)) result = s.idxmax() assert result == 5 s[5] = np.nan result = s.idxmax() assert result == 4 # Index with float64 dtype # GH#5914 s = Series([1, 2, 3], [1.1, 2.1, 3.1]) result = s.idxmax() assert result == 3.1 result = s.idxmin() assert result == 1.1 s = Series(s.index, s.index) result = s.idxmax() assert result == 3.1 result = s.idxmin() assert result == 1.1 def test_all_any(self): ts = tm.makeTimeSeries() bool_series = ts > 0 assert not bool_series.all() assert bool_series.any() # Alternative types, with implicit 'object' dtype. s = Series(["abc", True]) assert s.any() def test_numpy_all_any(self, index_or_series): # GH#40180 idx = index_or_series([0, 1, 2]) assert not np.all(idx) assert np.any(idx) idx = Index([1, 2, 3]) assert np.all(idx) def test_all_any_skipna(self): # Check skipna, with implicit 'object' dtype. s1 = Series([np.nan, True]) s2 = Series([np.nan, False]) assert s1.all(skipna=False) # nan && True => True assert s1.all(skipna=True) assert s2.any(skipna=False) assert not s2.any(skipna=True) def test_all_any_bool_only(self): s = Series([False, False, True, True, False, True], index=[0, 0, 1, 1, 2, 2]) # GH#47500 - test bool_only works assert s.any(bool_only=True) assert not s.all(bool_only=True) @pytest.mark.parametrize("bool_agg_func", ["any", "all"]) @pytest.mark.parametrize("skipna", [True, False]) def test_any_all_object_dtype(self, bool_agg_func, skipna): # GH#12863 ser = Series(["a", "b", "c", "d", "e"], dtype=object) result = getattr(ser, bool_agg_func)(skipna=skipna) expected = True assert result == expected @pytest.mark.parametrize("bool_agg_func", ["any", "all"]) @pytest.mark.parametrize( "data", [[False, None], [None, False], [False, np.nan], [np.nan, False]] ) def test_any_all_object_dtype_missing(self, data, bool_agg_func): # GH#27709 ser = Series(data) result = getattr(ser, bool_agg_func)(skipna=False) # None is treated is False, but np.nan is treated as True expected = bool_agg_func == "any" and None not in data assert result == expected @pytest.mark.parametrize("dtype", ["boolean", "Int64", "UInt64", "Float64"]) @pytest.mark.parametrize("bool_agg_func", ["any", "all"]) @pytest.mark.parametrize("skipna", [True, False]) @pytest.mark.parametrize( # expected_data indexed as [[skipna=False/any, skipna=False/all], # [skipna=True/any, skipna=True/all]] "data,expected_data", [ ([0, 0, 0], [[False, False], [False, False]]), ([1, 1, 1], [[True, True], [True, True]]), ([pd.NA, pd.NA, pd.NA], [[pd.NA, pd.NA], [False, True]]), ([0, pd.NA, 0], [[pd.NA, False], [False, False]]), ([1, pd.NA, 1], [[True, pd.NA], [True, True]]), ([1, pd.NA, 0], [[True, False], [True, False]]), ], ) def test_any_all_nullable_kleene_logic( self, bool_agg_func, skipna, data, dtype, expected_data ): # GH-37506, GH-41967 ser = Series(data, dtype=dtype) expected = expected_data[skipna][bool_agg_func == "all"] result = getattr(ser, bool_agg_func)(skipna=skipna) assert (result is pd.NA and expected is pd.NA) or result == expected def test_any_axis1_bool_only(self): # GH#32432 df = DataFrame({"A": [True, False], "B": [1, 2]}) result = df.any(axis=1, bool_only=True) expected = Series([True, False]) tm.assert_series_equal(result, expected) def test_any_all_datetimelike(self): # GH#38723 these may not be the desired long-term behavior (GH#34479) # but in the interim should be internally consistent dta = date_range("1995-01-02", periods=3)._data ser = Series(dta) df = DataFrame(ser) msg = "'(any|all)' with datetime64 dtypes is deprecated" with tm.assert_produces_warning(FutureWarning, match=msg): # GH#34479 assert dta.all() assert dta.any() assert ser.all() assert ser.any() assert df.any().all() assert df.all().all() dta = dta.tz_localize("UTC") ser = Series(dta) df = DataFrame(ser) with tm.assert_produces_warning(FutureWarning, match=msg): # GH#34479 assert dta.all() assert dta.any() assert ser.all() assert ser.any() assert df.any().all() assert df.all().all() tda = dta - dta[0] ser = Series(tda) df = DataFrame(ser) assert tda.any() assert not tda.all() assert ser.any() assert not ser.all() assert df.any().all() assert not df.all().any() def test_timedelta64_analytics(self): # index min/max dti = date_range("2012-1-1", periods=3, freq="D") td = Series(dti) - Timestamp("20120101") result = td.idxmin() assert result == 0 result = td.idxmax() assert result == 2 # GH#2982 # with NaT td[0] = np.nan result = td.idxmin() assert result == 1 result = td.idxmax() assert result == 2 # abs s1 = Series(date_range("20120101", periods=3)) s2 = Series(date_range("20120102", periods=3)) expected = Series(s2 - s1) result = np.abs(s1 - s2) tm.assert_series_equal(result, expected) result = (s1 - s2).abs() tm.assert_series_equal(result, expected) # max/min result = td.max() expected = Timedelta("2 days") assert result == expected result = td.min() expected = Timedelta("1 days") assert result == expected @pytest.mark.parametrize( "test_input,error_type", [ (Series([], dtype="float64"), ValueError), # For strings, or any Series with dtype 'O' (Series(["foo", "bar", "baz"]), TypeError), (Series([(1,), (2,)]), TypeError), # For mixed data types (Series(["foo", "foo", "bar", "bar", None, np.nan, "baz"]), TypeError), ], ) def test_assert_idxminmax_raises(self, test_input, error_type): """ Cases where ``Series.argmax`` and related should raise an exception """ msg = ( "reduction operation 'argmin' not allowed for this dtype|" "attempt to get argmin of an empty sequence" ) with pytest.raises(error_type, match=msg): test_input.idxmin() with pytest.raises(error_type, match=msg): test_input.idxmin(skipna=False) msg = ( "reduction operation 'argmax' not allowed for this dtype|" "attempt to get argmax of an empty sequence" ) with pytest.raises(error_type, match=msg): test_input.idxmax() with pytest.raises(error_type, match=msg): test_input.idxmax(skipna=False) def test_idxminmax_with_inf(self): # For numeric data with NA and Inf (GH #13595) s = Series([0, -np.inf, np.inf, np.nan]) assert s.idxmin() == 1 assert np.isnan(s.idxmin(skipna=False)) assert s.idxmax() == 2 assert np.isnan(s.idxmax(skipna=False)) # Using old-style behavior that treats floating point nan, -inf, and # +inf as missing with pd.option_context("mode.use_inf_as_na", True): assert s.idxmin() == 0 assert np.isnan(s.idxmin(skipna=False)) assert s.idxmax() == 0 np.isnan(s.idxmax(skipna=False)) class TestDatetime64SeriesReductions: # Note: the name TestDatetime64SeriesReductions indicates these tests # were moved from a series-specific test file, _not_ that these tests are # intended long-term to be series-specific @pytest.mark.parametrize( "nat_ser", [ Series([NaT, NaT]), Series([NaT, Timedelta("nat")]), Series([Timedelta("nat"), Timedelta("nat")]), ], ) def test_minmax_nat_series(self, nat_ser): # GH#23282 assert nat_ser.min() is NaT assert nat_ser.max() is NaT assert nat_ser.min(skipna=False) is NaT assert nat_ser.max(skipna=False) is NaT @pytest.mark.parametrize( "nat_df", [ DataFrame([NaT, NaT]), DataFrame([NaT, Timedelta("nat")]), DataFrame([Timedelta("nat"), Timedelta("nat")]), ], ) def test_minmax_nat_dataframe(self, nat_df): # GH#23282 assert nat_df.min()[0] is NaT assert nat_df.max()[0] is NaT assert nat_df.min(skipna=False)[0] is NaT assert nat_df.max(skipna=False)[0] is NaT def test_min_max(self): rng = date_range("1/1/2000", "12/31/2000") rng2 = rng.take(np.random.permutation(len(rng))) the_min = rng2.min() the_max = rng2.max() assert isinstance(the_min, Timestamp) assert isinstance(the_max, Timestamp) assert the_min == rng[0] assert the_max == rng[-1] assert rng.min() == rng[0] assert rng.max() == rng[-1] def test_min_max_series(self): rng = date_range("1/1/2000", periods=10, freq="4h") lvls = ["A", "A", "A", "B", "B", "B", "C", "C", "C", "C"] df = DataFrame({"TS": rng, "V": np.random.randn(len(rng)), "L": lvls}) result = df.TS.max() exp = Timestamp(df.TS.iat[-1]) assert isinstance(result, Timestamp) assert result == exp result = df.TS.min() exp = Timestamp(df.TS.iat[0]) assert isinstance(result, Timestamp) assert result == exp class TestCategoricalSeriesReductions: # Note: the name TestCategoricalSeriesReductions indicates these tests # were moved from a series-specific test file, _not_ that these tests are # intended long-term to be series-specific @pytest.mark.parametrize("function", ["min", "max"]) def test_min_max_unordered_raises(self, function): # unordered cats have no min/max cat = Series(Categorical(["a", "b", "c", "d"], ordered=False)) msg = f"Categorical is not ordered for operation {function}" with pytest.raises(TypeError, match=msg): getattr(cat, function)() @pytest.mark.parametrize( "values, categories", [ (list("abc"), list("abc")), (list("abc"), list("cba")), (list("abc") + [np.nan], list("cba")), ([1, 2, 3], [3, 2, 1]), ([1, 2, 3, np.nan], [3, 2, 1]), ], ) @pytest.mark.parametrize("function", ["min", "max"]) def test_min_max_ordered(self, values, categories, function): # GH 25303 cat = Series(Categorical(values, categories=categories, ordered=True)) result = getattr(cat, function)(skipna=True) expected = categories[0] if function == "min" else categories[2] assert result == expected @pytest.mark.parametrize("function", ["min", "max"]) @pytest.mark.parametrize("skipna", [True, False]) def test_min_max_ordered_with_nan_only(self, function, skipna): # https://github.com/pandas-dev/pandas/issues/33450 cat = Series(Categorical([np.nan], categories=[1, 2], ordered=True)) result = getattr(cat, function)(skipna=skipna) assert result is np.nan @pytest.mark.parametrize("function", ["min", "max"]) @pytest.mark.parametrize("skipna", [True, False]) def test_min_max_skipna(self, function, skipna): cat = Series( Categorical(["a", "b", np.nan, "a"], categories=["b", "a"], ordered=True) ) result = getattr(cat, function)(skipna=skipna) if skipna is True: expected = "b" if function == "min" else "a" assert result == expected else: assert result is np.nan class TestSeriesMode: # Note: the name TestSeriesMode indicates these tests # were moved from a series-specific test file, _not_ that these tests are # intended long-term to be series-specific @pytest.mark.parametrize( "dropna, expected", [(True, Series([], dtype=np.float64)), (False, Series([], dtype=np.float64))], ) def test_mode_empty(self, dropna, expected): s = Series([], dtype=np.float64) result = s.mode(dropna) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "dropna, data, expected", [ (True, [1, 1, 1, 2], [1]), (True, [1, 1, 1, 2, 3, 3, 3], [1, 3]), (False, [1, 1, 1, 2], [1]), (False, [1, 1, 1, 2, 3, 3, 3], [1, 3]), ], ) @pytest.mark.parametrize( "dt", list(np.typecodes["AllInteger"] + np.typecodes["Float"]) ) def test_mode_numerical(self, dropna, data, expected, dt): s = Series(data, dtype=dt) result = s.mode(dropna) expected = Series(expected, dtype=dt) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("dropna, expected", [(True, [1.0]), (False, [1, np.nan])]) def test_mode_numerical_nan(self, dropna, expected): s = Series([1, 1, 2, np.nan, np.nan]) result = s.mode(dropna) expected = Series(expected) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "dropna, expected1, expected2, expected3", [(True, ["b"], ["bar"], ["nan"]), (False, ["b"], [np.nan], ["nan"])], ) def test_mode_str_obj(self, dropna, expected1, expected2, expected3): # Test string and object types. data = ["a"] * 2 + ["b"] * 3 s = Series(data, dtype="c") result = s.mode(dropna) expected1 = Series(expected1, dtype="c") tm.assert_series_equal(result, expected1) data = ["foo", "bar", "bar", np.nan, np.nan, np.nan] s = Series(data, dtype=object) result = s.mode(dropna) expected2 = Series(expected2, dtype=object) tm.assert_series_equal(result, expected2) data = ["foo", "bar", "bar", np.nan, np.nan, np.nan] s = Series(data, dtype=object).astype(str) result = s.mode(dropna) expected3 = Series(expected3, dtype=str) tm.assert_series_equal(result, expected3) @pytest.mark.parametrize( "dropna, expected1, expected2", [(True, ["foo"], ["foo"]), (False, ["foo"], [np.nan])], ) def test_mode_mixeddtype(self, dropna, expected1, expected2): s = Series([1, "foo", "foo"]) result = s.mode(dropna) expected = Series(expected1) tm.assert_series_equal(result, expected) s = Series([1, "foo", "foo", np.nan, np.nan, np.nan]) result = s.mode(dropna) expected = Series(expected2, dtype=object) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "dropna, expected1, expected2", [ ( True, ["1900-05-03", "2011-01-03", "2013-01-02"], ["2011-01-03", "2013-01-02"], ), (False, [np.nan], [np.nan, "2011-01-03", "2013-01-02"]), ], ) def test_mode_datetime(self, dropna, expected1, expected2): s = Series( ["2011-01-03", "2013-01-02", "1900-05-03", "nan", "nan"], dtype="M8[ns]" ) result = s.mode(dropna) expected1 = Series(expected1, dtype="M8[ns]") tm.assert_series_equal(result, expected1) s = Series( [ "2011-01-03", "2013-01-02", "1900-05-03", "2011-01-03", "2013-01-02", "nan", "nan", ], dtype="M8[ns]", ) result = s.mode(dropna) expected2 = Series(expected2, dtype="M8[ns]") tm.assert_series_equal(result, expected2) @pytest.mark.parametrize( "dropna, expected1, expected2", [ (True, ["-1 days", "0 days", "1 days"], ["2 min", "1 day"]), (False, [np.nan], [np.nan, "2 min", "1 day"]), ], ) def test_mode_timedelta(self, dropna, expected1, expected2): # gh-5986: Test timedelta types. s = Series( ["1 days", "-1 days", "0 days", "nan", "nan"], dtype="timedelta64[ns]" ) result = s.mode(dropna) expected1 = Series(expected1, dtype="timedelta64[ns]") tm.assert_series_equal(result, expected1) s = Series( [ "1 day", "1 day", "-1 day", "-1 day 2 min", "2 min", "2 min", "nan", "nan", ], dtype="timedelta64[ns]", ) result = s.mode(dropna) expected2 = Series(expected2, dtype="timedelta64[ns]") tm.assert_series_equal(result, expected2) @pytest.mark.parametrize( "dropna, expected1, expected2, expected3", [ ( True, Categorical([1, 2], categories=[1, 2]), Categorical(["a"], categories=[1, "a"]), Categorical([3, 1], categories=[3, 2, 1], ordered=True), ), ( False, Categorical([np.nan], categories=[1, 2]), Categorical([np.nan, "a"], categories=[1, "a"]), Categorical([np.nan, 3, 1], categories=[3, 2, 1], ordered=True), ), ], ) def test_mode_category(self, dropna, expected1, expected2, expected3): s = Series(Categorical([1, 2, np.nan, np.nan])) result = s.mode(dropna) expected1 = Series(expected1, dtype="category") tm.assert_series_equal(result, expected1) s = Series(Categorical([1, "a", "a", np.nan, np.nan])) result = s.mode(dropna) expected2 = Series(expected2, dtype="category") tm.assert_series_equal(result, expected2) s = Series( Categorical( [1, 1, 2, 3, 3, np.nan, np.nan], categories=[3, 2, 1], ordered=True ) ) result = s.mode(dropna) expected3 = Series(expected3, dtype="category") tm.assert_series_equal(result, expected3) @pytest.mark.parametrize( "dropna, expected1, expected2", [(True, [2**63], [1, 2**63]), (False, [2**63], [1, 2**63])], ) def test_mode_intoverflow(self, dropna, expected1, expected2): # Test for uint64 overflow. s = Series([1, 2**63, 2**63], dtype=np.uint64) result = s.mode(dropna) expected1 = Series(expected1, dtype=np.uint64) tm.assert_series_equal(result, expected1) s = Series([1, 2**63], dtype=np.uint64) result = s.mode(dropna) expected2 = Series(expected2, dtype=np.uint64) tm.assert_series_equal(result, expected2) def test_mode_sortwarning(self): # Check for the warning that is raised when the mode # results cannot be sorted expected = Series(["foo", np.nan]) s = Series([1, "foo", "foo", np.nan, np.nan]) with tm.assert_produces_warning(UserWarning): result = s.mode(dropna=False) result = result.sort_values().reset_index(drop=True) tm.assert_series_equal(result, expected) def test_mode_boolean_with_na(self): # GH#42107 ser = Series([True, False, True, pd.NA], dtype="boolean") result = ser.mode() expected = Series({0: True}, dtype="boolean") tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "array,expected,dtype", [ ( [0, 1j, 1, 1, 1 + 1j, 1 + 2j], Series([1], dtype=np.complex128), np.complex128, ), ( [0, 1j, 1, 1, 1 + 1j, 1 + 2j], Series([1], dtype=np.complex64), np.complex64, ), ( [1 + 1j, 2j, 1 + 1j], Series([1 + 1j], dtype=np.complex128), np.complex128, ), ], ) def test_single_mode_value_complex(self, array, expected, dtype): result = Series(array, dtype=dtype).mode() tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "array,expected,dtype", [ ( # no modes [0, 1j, 1, 1 + 1j, 1 + 2j], Series([0j, 1j, 1 + 0j, 1 + 1j, 1 + 2j], dtype=np.complex128), np.complex128, ), ( [1 + 1j, 2j, 1 + 1j, 2j, 3], Series([2j, 1 + 1j], dtype=np.complex64), np.complex64, ), ], ) def test_multimode_complex(self, array, expected, dtype): # GH 17927 # mode tries to sort multimodal series. # Complex numbers are sorted by their magnitude result = Series(array, dtype=dtype).mode() tm.assert_series_equal(result, expected)