from datetime import datetime from dateutil.tz import tzlocal import numpy as np import pytest from pandas.compat import IS64 import pandas as pd from pandas import ( DateOffset, DatetimeIndex, Index, Series, Timestamp, bdate_range, date_range, ) import pandas._testing as tm from pandas.tseries.offsets import BDay, Day, Hour START, END = datetime(2009, 1, 1), datetime(2010, 1, 1) class TestDatetimeIndexOps: def test_ops_properties_basic(self, datetime_series): # sanity check that the behavior didn't change # GH#7206 for op in ["year", "day", "second", "weekday"]: msg = f"'Series' object has no attribute '{op}'" with pytest.raises(AttributeError, match=msg): getattr(datetime_series, op) # attribute access should still work! s = Series({"year": 2000, "month": 1, "day": 10}) assert s.year == 2000 assert s.month == 1 assert s.day == 10 msg = "'Series' object has no attribute 'weekday'" with pytest.raises(AttributeError, match=msg): s.weekday def test_repeat_range(self, tz_naive_fixture): tz = tz_naive_fixture rng = date_range("1/1/2000", "1/1/2001") result = rng.repeat(5) assert result.freq is None assert len(result) == 5 * len(rng) index = date_range("2001-01-01", periods=2, freq="D", tz=tz) exp = DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz ) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = date_range("2001-01-01", periods=2, freq="2D", tz=tz) exp = DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz ) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz) exp = DatetimeIndex( [ "2001-01-01", "2001-01-01", "2001-01-01", "NaT", "NaT", "NaT", "2003-01-01", "2003-01-01", "2003-01-01", ], tz=tz, ) for res in [index.repeat(3), np.repeat(index, 3)]: tm.assert_index_equal(res, exp) assert res.freq is None def test_repeat(self, tz_naive_fixture): tz = tz_naive_fixture reps = 2 msg = "the 'axis' parameter is not supported" rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz) expected_rng = DatetimeIndex( [ Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), ] ) res = rng.repeat(reps) tm.assert_index_equal(res, expected_rng) assert res.freq is None tm.assert_index_equal(np.repeat(rng, reps), expected_rng) with pytest.raises(ValueError, match=msg): np.repeat(rng, reps, axis=1) @pytest.mark.parametrize( "freq,expected", [ ("A", "day"), ("Q", "day"), ("M", "day"), ("D", "day"), ("H", "hour"), ("T", "minute"), ("S", "second"), ("L", "millisecond"), ("U", "microsecond"), ], ) def test_resolution(self, tz_naive_fixture, freq, expected): tz = tz_naive_fixture if freq == "A" and not IS64 and isinstance(tz, tzlocal): pytest.xfail(reason="OverflowError inside tzlocal past 2038") idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) assert idx.resolution == expected def test_value_counts_unique(self, tz_naive_fixture): tz = tz_naive_fixture # GH 7735 idx = date_range("2011-01-01 09:00", freq="H", periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz) exp_idx = date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz) expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") expected.index = expected.index._with_freq(None) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) expected = date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz) expected = expected._with_freq(None) tm.assert_index_equal(idx.unique(), expected) idx = DatetimeIndex( [ "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 08:00", "2013-01-01 08:00", pd.NaT, ], tz=tz, ) exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz) expected = Series([3, 2], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz) expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx) @pytest.mark.parametrize( "idx", [ DatetimeIndex( ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx" ), DatetimeIndex( ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H", name="tzidx", tz="Asia/Tokyo", ), ], ) def test_order_with_freq(self, idx): ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) assert ordered.freq == expected.freq assert ordered.freq.n == -1 @pytest.mark.parametrize( "index_dates,expected_dates", [ ( ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], ), ( ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], ), ( [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT], [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"], ), ], ) def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture): tz = tz_naive_fixture # without freq index = DatetimeIndex(index_dates, tz=tz, name="idx") expected = DatetimeIndex(expected_dates, tz=tz, name="idx") ordered = index.sort_values(na_position="first") tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = index.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True, na_position="first") tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 0, 4]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None def test_drop_duplicates_metadata(self, freq_sample): # GH 10115 idx = date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq idx_dup = idx.append(idx) assert idx_dup.freq is None # freq is reset result = idx_dup.drop_duplicates() expected = idx._with_freq(None) tm.assert_index_equal(result, expected) assert result.freq is None @pytest.mark.parametrize( "keep, expected, index", [ ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), ( False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10), ), ], ) def test_drop_duplicates(self, freq_sample, keep, expected, index): # to check Index/Series compat idx = date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") idx = idx.append(idx[:5]) tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) expected = idx[~expected] result = idx.drop_duplicates(keep=keep) tm.assert_index_equal(result, expected) result = Series(idx).drop_duplicates(keep=keep) tm.assert_series_equal(result, Series(expected, index=index)) def test_infer_freq(self, freq_sample): # GH 11018 idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10) result = DatetimeIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) assert result.freq == freq_sample def test_nat(self, tz_naive_fixture): tz = tz_naive_fixture assert DatetimeIndex._na_value is pd.NaT assert DatetimeIndex([])._na_value is pd.NaT idx = DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert idx.hasnans is False tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = DatetimeIndex(["2011-01-01", "NaT"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) assert idx.hasnans is True tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)]) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_freq_setter(self, values, freq, tz): # GH 20678 idx = DatetimeIndex(values, tz=tz) # can set to an offset, converting from string if necessary idx._data.freq = freq assert idx.freq == freq assert isinstance(idx.freq, DateOffset) # can reset to None idx._data.freq = None assert idx.freq is None def test_freq_setter_errors(self): # GH 20678 idx = DatetimeIndex(["20180101", "20180103", "20180105"]) # setting with an incompatible freq msg = ( "Inferred frequency 2D from passed values does not conform to " "passed frequency 5D" ) with pytest.raises(ValueError, match=msg): idx._data.freq = "5D" # setting with non-freq string with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo" def test_freq_view_safe(self): # Setting the freq for one DatetimeIndex shouldn't alter the freq # for another that views the same data dti = date_range("2016-01-01", periods=5) dta = dti._data dti2 = DatetimeIndex(dta)._with_freq(None) assert dti2.freq is None # Original was not altered assert dti.freq == "D" assert dta.freq == "D" class TestBusinessDatetimeIndex: def setup_method(self, method): self.rng = bdate_range(START, END) def test_comparison(self): d = self.rng[10] comp = self.rng > d assert comp[11] assert not comp[9] def test_copy(self): cp = self.rng.copy() repr(cp) tm.assert_index_equal(cp, self.rng) def test_identical(self): t1 = self.rng.copy() t2 = self.rng.copy() assert t1.identical(t2) # name t1 = t1.rename("foo") assert t1.equals(t2) assert not t1.identical(t2) t2 = t2.rename("foo") assert t1.identical(t2) # freq t2v = Index(t2.values) assert t1.equals(t2v) assert not t1.identical(t2v) class TestCustomDatetimeIndex: def setup_method(self, method): self.rng = bdate_range(START, END, freq="C") def test_comparison(self): d = self.rng[10] comp = self.rng > d assert comp[11] assert not comp[9] def test_copy(self): cp = self.rng.copy() repr(cp) tm.assert_index_equal(cp, self.rng)