278 lines
9.6 KiB
Python
278 lines
9.6 KiB
Python
import numpy as np
|
|
import pytest
|
|
|
|
import pandas as pd
|
|
from pandas import Series, TimedeltaIndex, timedelta_range
|
|
import pandas._testing as tm
|
|
|
|
from pandas.tseries.offsets import DateOffset, Day, Hour
|
|
|
|
|
|
class TestTimedeltaIndexOps:
|
|
def test_value_counts_unique(self):
|
|
# GH 7735
|
|
idx = timedelta_range("1 days 09:00:00", freq="H", periods=10)
|
|
# create repeated values, 'n'th element is repeated by n+1 times
|
|
idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1)))
|
|
|
|
exp_idx = timedelta_range("1 days 18:00:00", freq="-1H", periods=10)
|
|
exp_idx = exp_idx._with_freq(None)
|
|
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64")
|
|
|
|
obj = idx
|
|
tm.assert_series_equal(obj.value_counts(), expected)
|
|
|
|
obj = Series(idx)
|
|
tm.assert_series_equal(obj.value_counts(), expected)
|
|
|
|
expected = timedelta_range("1 days 09:00:00", freq="H", periods=10)
|
|
tm.assert_index_equal(idx.unique(), expected)
|
|
|
|
idx = TimedeltaIndex(
|
|
[
|
|
"1 days 09:00:00",
|
|
"1 days 09:00:00",
|
|
"1 days 09:00:00",
|
|
"1 days 08:00:00",
|
|
"1 days 08:00:00",
|
|
pd.NaT,
|
|
]
|
|
)
|
|
|
|
exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00"])
|
|
expected = Series([3, 2], index=exp_idx)
|
|
|
|
for obj in [idx, Series(idx)]:
|
|
tm.assert_series_equal(obj.value_counts(), expected)
|
|
|
|
exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00", pd.NaT])
|
|
expected = Series([3, 2, 1], index=exp_idx)
|
|
|
|
for obj in [idx, Series(idx)]:
|
|
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
|
|
|
|
tm.assert_index_equal(idx.unique(), exp_idx)
|
|
|
|
def test_nonunique_contains(self):
|
|
# GH 9512
|
|
for idx in map(
|
|
TimedeltaIndex,
|
|
(
|
|
[0, 1, 0],
|
|
[0, 0, -1],
|
|
[0, -1, -1],
|
|
["00:01:00", "00:01:00", "00:02:00"],
|
|
["00:01:00", "00:01:00", "00:00:01"],
|
|
),
|
|
):
|
|
assert idx[0] in idx
|
|
|
|
def test_unknown_attribute(self):
|
|
# see gh-9680
|
|
tdi = timedelta_range(start=0, periods=10, freq="1s")
|
|
ts = Series(np.random.normal(size=10), index=tdi)
|
|
assert "foo" not in ts.__dict__.keys()
|
|
msg = "'Series' object has no attribute 'foo'"
|
|
with pytest.raises(AttributeError, match=msg):
|
|
ts.foo
|
|
|
|
def test_order(self):
|
|
# GH 10295
|
|
idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"], freq="D", name="idx")
|
|
idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"], freq="H", name="idx")
|
|
|
|
for idx in [idx1, idx2]:
|
|
ordered = idx.sort_values()
|
|
tm.assert_index_equal(ordered, idx)
|
|
assert ordered.freq == idx.freq
|
|
|
|
ordered = idx.sort_values(ascending=False)
|
|
expected = idx[::-1]
|
|
tm.assert_index_equal(ordered, expected)
|
|
assert ordered.freq == expected.freq
|
|
assert ordered.freq.n == -1
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True)
|
|
tm.assert_index_equal(ordered, idx)
|
|
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False)
|
|
assert ordered.freq == idx.freq
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
|
tm.assert_index_equal(ordered, idx[::-1])
|
|
assert ordered.freq == expected.freq
|
|
assert ordered.freq.n == -1
|
|
|
|
idx1 = TimedeltaIndex(
|
|
["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1"
|
|
)
|
|
exp1 = TimedeltaIndex(
|
|
["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1"
|
|
)
|
|
|
|
idx2 = TimedeltaIndex(
|
|
["1 day", "3 day", "5 day", "2 day", "1 day"], name="idx2"
|
|
)
|
|
|
|
for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]:
|
|
ordered = idx.sort_values()
|
|
tm.assert_index_equal(ordered, expected)
|
|
assert ordered.freq is None
|
|
|
|
ordered = idx.sort_values(ascending=False)
|
|
tm.assert_index_equal(ordered, expected[::-1])
|
|
assert ordered.freq is None
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True)
|
|
tm.assert_index_equal(ordered, expected)
|
|
|
|
exp = np.array([0, 4, 3, 1, 2])
|
|
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
|
|
assert ordered.freq is None
|
|
|
|
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
|
tm.assert_index_equal(ordered, expected[::-1])
|
|
|
|
exp = np.array([2, 1, 3, 0, 4])
|
|
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
|
|
assert ordered.freq is None
|
|
|
|
def test_drop_duplicates_metadata(self, freq_sample):
|
|
# GH 10115
|
|
idx = timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
|
|
result = idx.drop_duplicates()
|
|
tm.assert_index_equal(idx, result)
|
|
assert idx.freq == result.freq
|
|
|
|
idx_dup = idx.append(idx)
|
|
assert idx_dup.freq is None # freq is reset
|
|
result = idx_dup.drop_duplicates()
|
|
expected = idx._with_freq(None)
|
|
tm.assert_index_equal(expected, result)
|
|
assert result.freq is None
|
|
|
|
@pytest.mark.parametrize(
|
|
"keep, expected, index",
|
|
[
|
|
("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)),
|
|
("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)),
|
|
(
|
|
False,
|
|
np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
|
|
np.arange(5, 10),
|
|
),
|
|
],
|
|
)
|
|
def test_drop_duplicates(self, freq_sample, keep, expected, index):
|
|
# to check Index/Series compat
|
|
idx = timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
|
|
idx = idx.append(idx[:5])
|
|
|
|
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
|
|
expected = idx[~expected]
|
|
|
|
result = idx.drop_duplicates(keep=keep)
|
|
tm.assert_index_equal(result, expected)
|
|
|
|
result = Series(idx).drop_duplicates(keep=keep)
|
|
tm.assert_series_equal(result, Series(expected, index=index))
|
|
|
|
def test_infer_freq(self, freq_sample):
|
|
# GH#11018
|
|
idx = timedelta_range("1", freq=freq_sample, periods=10)
|
|
result = TimedeltaIndex(idx.asi8, freq="infer")
|
|
tm.assert_index_equal(idx, result)
|
|
assert result.freq == freq_sample
|
|
|
|
def test_repeat(self):
|
|
index = timedelta_range("1 days", periods=2, freq="D")
|
|
exp = TimedeltaIndex(["1 days", "1 days", "2 days", "2 days"])
|
|
for res in [index.repeat(2), np.repeat(index, 2)]:
|
|
tm.assert_index_equal(res, exp)
|
|
assert res.freq is None
|
|
|
|
index = TimedeltaIndex(["1 days", "NaT", "3 days"])
|
|
exp = TimedeltaIndex(
|
|
[
|
|
"1 days",
|
|
"1 days",
|
|
"1 days",
|
|
"NaT",
|
|
"NaT",
|
|
"NaT",
|
|
"3 days",
|
|
"3 days",
|
|
"3 days",
|
|
]
|
|
)
|
|
for res in [index.repeat(3), np.repeat(index, 3)]:
|
|
tm.assert_index_equal(res, exp)
|
|
assert res.freq is None
|
|
|
|
def test_nat(self):
|
|
assert TimedeltaIndex._na_value is pd.NaT
|
|
assert TimedeltaIndex([])._na_value is pd.NaT
|
|
|
|
idx = TimedeltaIndex(["1 days", "2 days"])
|
|
assert idx._can_hold_na
|
|
|
|
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
|
|
assert idx.hasnans is False
|
|
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp))
|
|
|
|
idx = TimedeltaIndex(["1 days", "NaT"])
|
|
assert idx._can_hold_na
|
|
|
|
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
|
|
assert idx.hasnans is True
|
|
tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp))
|
|
|
|
@pytest.mark.parametrize("values", [["0 days", "2 days", "4 days"], []])
|
|
@pytest.mark.parametrize("freq", ["2D", Day(2), "48H", Hour(48)])
|
|
def test_freq_setter(self, values, freq):
|
|
# GH 20678
|
|
idx = TimedeltaIndex(values)
|
|
|
|
# can set to an offset, converting from string if necessary
|
|
idx._data.freq = freq
|
|
assert idx.freq == freq
|
|
assert isinstance(idx.freq, DateOffset)
|
|
|
|
# can reset to None
|
|
idx._data.freq = None
|
|
assert idx.freq is None
|
|
|
|
def test_freq_setter_errors(self):
|
|
# GH 20678
|
|
idx = TimedeltaIndex(["0 days", "2 days", "4 days"])
|
|
|
|
# setting with an incompatible freq
|
|
msg = (
|
|
"Inferred frequency 2D from passed values does not conform to "
|
|
"passed frequency 5D"
|
|
)
|
|
with pytest.raises(ValueError, match=msg):
|
|
idx._data.freq = "5D"
|
|
|
|
# setting with a non-fixed frequency
|
|
msg = r"<2 \* BusinessDays> is a non-fixed frequency"
|
|
with pytest.raises(ValueError, match=msg):
|
|
idx._data.freq = "2B"
|
|
|
|
# setting with non-freq string
|
|
with pytest.raises(ValueError, match="Invalid frequency"):
|
|
idx._data.freq = "foo"
|
|
|
|
def test_freq_view_safe(self):
|
|
# Setting the freq for one TimedeltaIndex shouldn't alter the freq
|
|
# for another that views the same data
|
|
|
|
tdi = TimedeltaIndex(["0 days", "2 days", "4 days"], freq="2D")
|
|
tda = tdi._data
|
|
|
|
tdi2 = TimedeltaIndex(tda)._with_freq(None)
|
|
assert tdi2.freq is None
|
|
|
|
# Original was not altered
|
|
assert tdi.freq == "2D"
|
|
assert tda.freq == "2D"
|