import numpy as np import pytest from pandas._libs.tslibs import IncompatibleFrequency import pandas as pd from pandas import PeriodIndex, date_range, period_range import pandas._testing as tm def _permute(obj): return obj.take(np.random.permutation(len(obj))) class TestPeriodIndex: def test_union(self, sort): # union other1 = period_range("1/1/2000", freq="D", periods=5) rng1 = period_range("1/6/2000", freq="D", periods=5) expected1 = PeriodIndex( [ "2000-01-06", "2000-01-07", "2000-01-08", "2000-01-09", "2000-01-10", "2000-01-01", "2000-01-02", "2000-01-03", "2000-01-04", "2000-01-05", ], freq="D", ) rng2 = period_range("1/1/2000", freq="D", periods=5) other2 = period_range("1/4/2000", freq="D", periods=5) expected2 = period_range("1/1/2000", freq="D", periods=8) rng3 = period_range("1/1/2000", freq="D", periods=5) other3 = PeriodIndex([], freq="D") expected3 = period_range("1/1/2000", freq="D", periods=5) rng4 = period_range("2000-01-01 09:00", freq="H", periods=5) other4 = period_range("2000-01-02 09:00", freq="H", periods=5) expected4 = PeriodIndex( [ "2000-01-01 09:00", "2000-01-01 10:00", "2000-01-01 11:00", "2000-01-01 12:00", "2000-01-01 13:00", "2000-01-02 09:00", "2000-01-02 10:00", "2000-01-02 11:00", "2000-01-02 12:00", "2000-01-02 13:00", ], freq="H", ) rng5 = PeriodIndex( ["2000-01-01 09:01", "2000-01-01 09:03", "2000-01-01 09:05"], freq="T" ) other5 = PeriodIndex( ["2000-01-01 09:01", "2000-01-01 09:05", "2000-01-01 09:08"], freq="T" ) expected5 = PeriodIndex( [ "2000-01-01 09:01", "2000-01-01 09:03", "2000-01-01 09:05", "2000-01-01 09:08", ], freq="T", ) rng6 = period_range("2000-01-01", freq="M", periods=7) other6 = period_range("2000-04-01", freq="M", periods=7) expected6 = period_range("2000-01-01", freq="M", periods=10) rng7 = period_range("2003-01-01", freq="A", periods=5) other7 = period_range("1998-01-01", freq="A", periods=8) expected7 = PeriodIndex( [ "2003", "2004", "2005", "2006", "2007", "1998", "1999", "2000", "2001", "2002", ], freq="A", ) rng8 = PeriodIndex( ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"], freq="D" ) other8 = period_range("1/6/2000", freq="D", periods=5) expected8 = PeriodIndex( [ "1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000", "1/6/2000", "1/7/2000", "1/8/2000", "1/9/2000", "1/10/2000", ], freq="D", ) for rng, other, expected in [ (rng1, other1, expected1), (rng2, other2, expected2), (rng3, other3, expected3), (rng4, other4, expected4), (rng5, other5, expected5), (rng6, other6, expected6), (rng7, other7, expected7), (rng8, other8, expected8), ]: result_union = rng.union(other, sort=sort) if sort is None: expected = expected.sort_values() tm.assert_index_equal(result_union, expected) def test_union_misc(self, sort): index = period_range("1/1/2000", "1/20/2000", freq="D") result = index[:-5].union(index[10:], sort=sort) tm.assert_index_equal(result, index) # not in order result = _permute(index[:-5]).union(_permute(index[10:]), sort=sort) if sort is None: tm.assert_index_equal(result, index) assert tm.equalContents(result, index) # raise if different frequencies index = period_range("1/1/2000", "1/20/2000", freq="D") index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED") msg = r"Input has different freq=W-WED from PeriodIndex\(freq=D\)" with pytest.raises(IncompatibleFrequency, match=msg): index.union(index2, sort=sort) # TODO: belongs elsewhere def test_union_dataframe_index(self): rng1 = period_range("1/1/1999", "1/1/2012", freq="M") s1 = pd.Series(np.random.randn(len(rng1)), rng1) rng2 = period_range("1/1/1980", "12/1/2001", freq="M") s2 = pd.Series(np.random.randn(len(rng2)), rng2) df = pd.DataFrame({"s1": s1, "s2": s2}) exp = period_range("1/1/1980", "1/1/2012", freq="M") tm.assert_index_equal(df.index, exp) def test_intersection(self, sort): index = period_range("1/1/2000", "1/20/2000", freq="D") result = index[:-5].intersection(index[10:], sort=sort) tm.assert_index_equal(result, index[10:-5]) # not in order left = _permute(index[:-5]) right = _permute(index[10:]) result = left.intersection(right, sort=sort) if sort is None: tm.assert_index_equal(result, index[10:-5]) assert tm.equalContents(result, index[10:-5]) # raise if different frequencies index = period_range("1/1/2000", "1/20/2000", freq="D") index2 = period_range("1/1/2000", "1/20/2000", freq="W-WED") msg = r"Input has different freq=W-WED from PeriodIndex\(freq=D\)" with pytest.raises(IncompatibleFrequency, match=msg): index.intersection(index2, sort=sort) index3 = period_range("1/1/2000", "1/20/2000", freq="2D") msg = r"Input has different freq=2D from PeriodIndex\(freq=D\)" with pytest.raises(IncompatibleFrequency, match=msg): index.intersection(index3, sort=sort) def test_intersection_cases(self, sort): base = period_range("6/1/2000", "6/30/2000", freq="D", name="idx") # if target has the same name, it is preserved rng2 = period_range("5/15/2000", "6/20/2000", freq="D", name="idx") expected2 = period_range("6/1/2000", "6/20/2000", freq="D", name="idx") # if target name is different, it will be reset rng3 = period_range("5/15/2000", "6/20/2000", freq="D", name="other") expected3 = period_range("6/1/2000", "6/20/2000", freq="D", name=None) rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx") expected4 = PeriodIndex([], name="idx", freq="D") for (rng, expected) in [ (rng2, expected2), (rng3, expected3), (rng4, expected4), ]: result = base.intersection(rng, sort=sort) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq # non-monotonic base = PeriodIndex( ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], freq="D", name="idx", ) rng2 = PeriodIndex( ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], freq="D", name="idx", ) expected2 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name="idx") rng3 = PeriodIndex( ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], freq="D", name="other", ) expected3 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name=None) rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx") expected4 = PeriodIndex([], freq="D", name="idx") for (rng, expected) in [ (rng2, expected2), (rng3, expected3), (rng4, expected4), ]: result = base.intersection(rng, sort=sort) if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == "D" # empty same freq rng = date_range("6/1/2000", "6/15/2000", freq="T") result = rng[0:0].intersection(rng) assert len(result) == 0 result = rng.intersection(rng[0:0]) assert len(result) == 0 def test_difference(self, sort): # diff period_rng = ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"] rng1 = PeriodIndex(period_rng, freq="D") other1 = period_range("1/6/2000", freq="D", periods=5) expected1 = rng1 rng2 = PeriodIndex(period_rng, freq="D") other2 = period_range("1/4/2000", freq="D", periods=5) expected2 = PeriodIndex(["1/3/2000", "1/2/2000", "1/1/2000"], freq="D") rng3 = PeriodIndex(period_rng, freq="D") other3 = PeriodIndex([], freq="D") expected3 = rng3 period_rng = [ "2000-01-01 10:00", "2000-01-01 09:00", "2000-01-01 12:00", "2000-01-01 11:00", "2000-01-01 13:00", ] rng4 = PeriodIndex(period_rng, freq="H") other4 = period_range("2000-01-02 09:00", freq="H", periods=5) expected4 = rng4 rng5 = PeriodIndex( ["2000-01-01 09:03", "2000-01-01 09:01", "2000-01-01 09:05"], freq="T" ) other5 = PeriodIndex(["2000-01-01 09:01", "2000-01-01 09:05"], freq="T") expected5 = PeriodIndex(["2000-01-01 09:03"], freq="T") period_rng = [ "2000-02-01", "2000-01-01", "2000-06-01", "2000-07-01", "2000-05-01", "2000-03-01", "2000-04-01", ] rng6 = PeriodIndex(period_rng, freq="M") other6 = period_range("2000-04-01", freq="M", periods=7) expected6 = PeriodIndex(["2000-02-01", "2000-01-01", "2000-03-01"], freq="M") period_rng = ["2003", "2007", "2006", "2005", "2004"] rng7 = PeriodIndex(period_rng, freq="A") other7 = period_range("1998-01-01", freq="A", periods=8) expected7 = PeriodIndex(["2007", "2006"], freq="A") for rng, other, expected in [ (rng1, other1, expected1), (rng2, other2, expected2), (rng3, other3, expected3), (rng4, other4, expected4), (rng5, other5, expected5), (rng6, other6, expected6), (rng7, other7, expected7), ]: result_difference = rng.difference(other, sort=sort) if sort is None: expected = expected.sort_values() tm.assert_index_equal(result_difference, expected) def test_difference_freq(self, sort): # GH14323: difference of Period MUST preserve frequency # but the ability to union results must be preserved index = period_range("20160920", "20160925", freq="D") other = period_range("20160921", "20160924", freq="D") expected = PeriodIndex(["20160920", "20160925"], freq="D") idx_diff = index.difference(other, sort) tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal("freq", idx_diff, expected) other = period_range("20160922", "20160925", freq="D") idx_diff = index.difference(other, sort) expected = PeriodIndex(["20160920", "20160921"], freq="D") tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal("freq", idx_diff, expected) def test_intersection_equal_duplicates(self): # GH#38302 idx = pd.period_range("2011-01-01", periods=2) idx_dup = idx.append(idx) result = idx_dup.intersection(idx_dup) tm.assert_index_equal(result, idx)