3RNN/Lib/site-packages/pandas/tests/scalar/timedelta/test_constructors.py
2024-05-26 19:49:15 +02:00

699 lines
22 KiB
Python

from datetime import timedelta
from itertools import product
import numpy as np
import pytest
from pandas._libs.tslibs import OutOfBoundsTimedelta
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
from pandas import (
Index,
NaT,
Timedelta,
TimedeltaIndex,
offsets,
to_timedelta,
)
import pandas._testing as tm
class TestTimedeltaConstructorUnitKeyword:
@pytest.mark.parametrize("unit", ["Y", "y", "M"])
def test_unit_m_y_raises(self, unit):
msg = "Units 'M', 'Y', and 'y' are no longer supported"
with pytest.raises(ValueError, match=msg):
Timedelta(10, unit)
with pytest.raises(ValueError, match=msg):
to_timedelta(10, unit)
with pytest.raises(ValueError, match=msg):
to_timedelta([1, 2], unit)
@pytest.mark.parametrize(
"unit,unit_depr",
[
("h", "H"),
("min", "T"),
("s", "S"),
("ms", "L"),
("ns", "N"),
("us", "U"),
],
)
def test_units_H_T_S_L_N_U_deprecated(self, unit, unit_depr):
# GH#52536
msg = f"'{unit_depr}' is deprecated and will be removed in a future version."
expected = Timedelta(1, unit=unit)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = Timedelta(1, unit=unit_depr)
tm.assert_equal(result, expected)
@pytest.mark.parametrize(
"unit, np_unit",
[(value, "W") for value in ["W", "w"]]
+ [(value, "D") for value in ["D", "d", "days", "day", "Days", "Day"]]
+ [
(value, "m")
for value in [
"m",
"minute",
"min",
"minutes",
"Minute",
"Min",
"Minutes",
]
]
+ [
(value, "s")
for value in [
"s",
"seconds",
"sec",
"second",
"Seconds",
"Sec",
"Second",
]
]
+ [
(value, "ms")
for value in [
"ms",
"milliseconds",
"millisecond",
"milli",
"millis",
"MS",
"Milliseconds",
"Millisecond",
"Milli",
"Millis",
]
]
+ [
(value, "us")
for value in [
"us",
"microseconds",
"microsecond",
"micro",
"micros",
"u",
"US",
"Microseconds",
"Microsecond",
"Micro",
"Micros",
"U",
]
]
+ [
(value, "ns")
for value in [
"ns",
"nanoseconds",
"nanosecond",
"nano",
"nanos",
"n",
"NS",
"Nanoseconds",
"Nanosecond",
"Nano",
"Nanos",
"N",
]
],
)
@pytest.mark.parametrize("wrapper", [np.array, list, Index])
def test_unit_parser(self, unit, np_unit, wrapper):
# validate all units, GH 6855, GH 21762
# array-likes
expected = TimedeltaIndex(
[np.timedelta64(i, np_unit) for i in np.arange(5).tolist()],
dtype="m8[ns]",
)
# TODO(2.0): the desired output dtype may have non-nano resolution
msg = f"'{unit}' is deprecated and will be removed in a future version."
if (unit, np_unit) in (("u", "us"), ("U", "us"), ("n", "ns"), ("N", "ns")):
warn = FutureWarning
else:
warn = FutureWarning
msg = "The 'unit' keyword in TimedeltaIndex construction is deprecated"
with tm.assert_produces_warning(warn, match=msg):
result = to_timedelta(wrapper(range(5)), unit=unit)
tm.assert_index_equal(result, expected)
result = TimedeltaIndex(wrapper(range(5)), unit=unit)
tm.assert_index_equal(result, expected)
str_repr = [f"{x}{unit}" for x in np.arange(5)]
result = to_timedelta(wrapper(str_repr))
tm.assert_index_equal(result, expected)
result = to_timedelta(wrapper(str_repr))
tm.assert_index_equal(result, expected)
# scalar
expected = Timedelta(np.timedelta64(2, np_unit).astype("timedelta64[ns]"))
result = to_timedelta(2, unit=unit)
assert result == expected
result = Timedelta(2, unit=unit)
assert result == expected
result = to_timedelta(f"2{unit}")
assert result == expected
result = Timedelta(f"2{unit}")
assert result == expected
def test_construct_from_kwargs_overflow():
# GH#55503
msg = "seconds=86400000000000000000, milliseconds=0, microseconds=0, nanoseconds=0"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta(days=10**6)
msg = "seconds=60000000000000000000, milliseconds=0, microseconds=0, nanoseconds=0"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta(minutes=10**9)
def test_construct_with_weeks_unit_overflow():
# GH#47268 don't silently wrap around
with pytest.raises(OutOfBoundsTimedelta, match="without overflow"):
Timedelta(1000000000000000000, unit="W")
with pytest.raises(OutOfBoundsTimedelta, match="without overflow"):
Timedelta(1000000000000000000.0, unit="W")
def test_construct_from_td64_with_unit():
# ignore the unit, as it may cause silently overflows leading to incorrect
# results, and in non-overflow cases is irrelevant GH#46827
obj = np.timedelta64(123456789000000000, "h")
with pytest.raises(OutOfBoundsTimedelta, match="123456789000000000 hours"):
Timedelta(obj, unit="ps")
with pytest.raises(OutOfBoundsTimedelta, match="123456789000000000 hours"):
Timedelta(obj, unit="ns")
with pytest.raises(OutOfBoundsTimedelta, match="123456789000000000 hours"):
Timedelta(obj)
def test_from_td64_retain_resolution():
# case where we retain millisecond resolution
obj = np.timedelta64(12345, "ms")
td = Timedelta(obj)
assert td._value == obj.view("i8")
assert td._creso == NpyDatetimeUnit.NPY_FR_ms.value
# Case where we cast to nearest-supported reso
obj2 = np.timedelta64(1234, "D")
td2 = Timedelta(obj2)
assert td2._creso == NpyDatetimeUnit.NPY_FR_s.value
assert td2 == obj2
assert td2.days == 1234
# Case that _would_ overflow if we didn't support non-nano
obj3 = np.timedelta64(1000000000000000000, "us")
td3 = Timedelta(obj3)
assert td3.total_seconds() == 1000000000000
assert td3._creso == NpyDatetimeUnit.NPY_FR_us.value
def test_from_pytimedelta_us_reso():
# pytimedelta has microsecond resolution, so Timedelta(pytd) inherits that
td = timedelta(days=4, minutes=3)
result = Timedelta(td)
assert result.to_pytimedelta() == td
assert result._creso == NpyDatetimeUnit.NPY_FR_us.value
def test_from_tick_reso():
tick = offsets.Nano()
assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_ns.value
tick = offsets.Micro()
assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_us.value
tick = offsets.Milli()
assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_ms.value
tick = offsets.Second()
assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value
# everything above Second gets cast to the closest supported reso: second
tick = offsets.Minute()
assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value
tick = offsets.Hour()
assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value
tick = offsets.Day()
assert Timedelta(tick)._creso == NpyDatetimeUnit.NPY_FR_s.value
def test_construction():
expected = np.timedelta64(10, "D").astype("m8[ns]").view("i8")
assert Timedelta(10, unit="d")._value == expected
assert Timedelta(10.0, unit="d")._value == expected
assert Timedelta("10 days")._value == expected
assert Timedelta(days=10)._value == expected
assert Timedelta(days=10.0)._value == expected
expected += np.timedelta64(10, "s").astype("m8[ns]").view("i8")
assert Timedelta("10 days 00:00:10")._value == expected
assert Timedelta(days=10, seconds=10)._value == expected
assert Timedelta(days=10, milliseconds=10 * 1000)._value == expected
assert Timedelta(days=10, microseconds=10 * 1000 * 1000)._value == expected
# rounding cases
assert Timedelta(82739999850000)._value == 82739999850000
assert "0 days 22:58:59.999850" in str(Timedelta(82739999850000))
assert Timedelta(123072001000000)._value == 123072001000000
assert "1 days 10:11:12.001" in str(Timedelta(123072001000000))
# string conversion with/without leading zero
# GH#9570
assert Timedelta("0:00:00") == timedelta(hours=0)
assert Timedelta("00:00:00") == timedelta(hours=0)
assert Timedelta("-1:00:00") == -timedelta(hours=1)
assert Timedelta("-01:00:00") == -timedelta(hours=1)
# more strings & abbrevs
# GH#8190
assert Timedelta("1 h") == timedelta(hours=1)
assert Timedelta("1 hour") == timedelta(hours=1)
assert Timedelta("1 hr") == timedelta(hours=1)
assert Timedelta("1 hours") == timedelta(hours=1)
assert Timedelta("-1 hours") == -timedelta(hours=1)
assert Timedelta("1 m") == timedelta(minutes=1)
assert Timedelta("1.5 m") == timedelta(seconds=90)
assert Timedelta("1 minute") == timedelta(minutes=1)
assert Timedelta("1 minutes") == timedelta(minutes=1)
assert Timedelta("1 s") == timedelta(seconds=1)
assert Timedelta("1 second") == timedelta(seconds=1)
assert Timedelta("1 seconds") == timedelta(seconds=1)
assert Timedelta("1 ms") == timedelta(milliseconds=1)
assert Timedelta("1 milli") == timedelta(milliseconds=1)
assert Timedelta("1 millisecond") == timedelta(milliseconds=1)
assert Timedelta("1 us") == timedelta(microseconds=1)
assert Timedelta("1 µs") == timedelta(microseconds=1)
assert Timedelta("1 micros") == timedelta(microseconds=1)
assert Timedelta("1 microsecond") == timedelta(microseconds=1)
assert Timedelta("1.5 microsecond") == Timedelta("00:00:00.000001500")
assert Timedelta("1 ns") == Timedelta("00:00:00.000000001")
assert Timedelta("1 nano") == Timedelta("00:00:00.000000001")
assert Timedelta("1 nanosecond") == Timedelta("00:00:00.000000001")
# combos
assert Timedelta("10 days 1 hour") == timedelta(days=10, hours=1)
assert Timedelta("10 days 1 h") == timedelta(days=10, hours=1)
assert Timedelta("10 days 1 h 1m 1s") == timedelta(
days=10, hours=1, minutes=1, seconds=1
)
assert Timedelta("-10 days 1 h 1m 1s") == -timedelta(
days=10, hours=1, minutes=1, seconds=1
)
assert Timedelta("-10 days 1 h 1m 1s") == -timedelta(
days=10, hours=1, minutes=1, seconds=1
)
assert Timedelta("-10 days 1 h 1m 1s 3us") == -timedelta(
days=10, hours=1, minutes=1, seconds=1, microseconds=3
)
assert Timedelta("-10 days 1 h 1.5m 1s 3us") == -timedelta(
days=10, hours=1, minutes=1, seconds=31, microseconds=3
)
# Currently invalid as it has a - on the hh:mm:dd part
# (only allowed on the days)
msg = "only leading negative signs are allowed"
with pytest.raises(ValueError, match=msg):
Timedelta("-10 days -1 h 1.5m 1s 3us")
# only leading neg signs are allowed
with pytest.raises(ValueError, match=msg):
Timedelta("10 days -1 h 1.5m 1s 3us")
# no units specified
msg = "no units specified"
with pytest.raises(ValueError, match=msg):
Timedelta("3.1415")
# invalid construction
msg = "cannot construct a Timedelta"
with pytest.raises(ValueError, match=msg):
Timedelta()
msg = "unit abbreviation w/o a number"
with pytest.raises(ValueError, match=msg):
Timedelta("foo")
msg = (
"cannot construct a Timedelta from "
"the passed arguments, allowed keywords are "
)
with pytest.raises(ValueError, match=msg):
Timedelta(day=10)
# floats
expected = np.timedelta64(10, "s").astype("m8[ns]").view("i8") + np.timedelta64(
500, "ms"
).astype("m8[ns]").view("i8")
assert Timedelta(10.5, unit="s")._value == expected
# offset
assert to_timedelta(offsets.Hour(2)) == Timedelta(hours=2)
assert Timedelta(offsets.Hour(2)) == Timedelta(hours=2)
assert Timedelta(offsets.Second(2)) == Timedelta(seconds=2)
# GH#11995: unicode
expected = Timedelta("1h")
result = Timedelta("1h")
assert result == expected
assert to_timedelta(offsets.Hour(2)) == Timedelta("0 days, 02:00:00")
msg = "unit abbreviation w/o a number"
with pytest.raises(ValueError, match=msg):
Timedelta("foo bar")
@pytest.mark.parametrize(
"item",
list(
{
"days": "D",
"seconds": "s",
"microseconds": "us",
"milliseconds": "ms",
"minutes": "m",
"hours": "h",
"weeks": "W",
}.items()
),
)
@pytest.mark.parametrize(
"npdtype", [np.int64, np.int32, np.int16, np.float64, np.float32, np.float16]
)
def test_td_construction_with_np_dtypes(npdtype, item):
# GH#8757: test construction with np dtypes
pykwarg, npkwarg = item
expected = np.timedelta64(1, npkwarg).astype("m8[ns]").view("i8")
assert Timedelta(**{pykwarg: npdtype(1)})._value == expected
@pytest.mark.parametrize(
"val",
[
"1s",
"-1s",
"1us",
"-1us",
"1 day",
"-1 day",
"-23:59:59.999999",
"-1 days +23:59:59.999999",
"-1ns",
"1ns",
"-23:59:59.999999999",
],
)
def test_td_from_repr_roundtrip(val):
# round-trip both for string and value
td = Timedelta(val)
assert Timedelta(td._value) == td
assert Timedelta(str(td)) == td
assert Timedelta(td._repr_base(format="all")) == td
assert Timedelta(td._repr_base()) == td
def test_overflow_on_construction():
# GH#3374
value = Timedelta("1day")._value * 20169940
msg = "Cannot cast 1742682816000000000000 from ns to 'ns' without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta(value)
# xref GH#17637
msg = "Cannot cast 139993 from D to 'ns' without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
Timedelta(7 * 19999, unit="D")
# used to overflow before non-ns support
td = Timedelta(timedelta(days=13 * 19999))
assert td._creso == NpyDatetimeUnit.NPY_FR_us.value
assert td.days == 13 * 19999
@pytest.mark.parametrize(
"val, unit",
[
(15251, "W"), # 1
(106752, "D"), # change from previous:
(2562048, "h"), # 0 hours
(153722868, "m"), # 13 minutes
(9223372037, "s"), # 44 seconds
],
)
def test_construction_out_of_bounds_td64ns(val, unit):
# TODO: parametrize over units just above/below the implementation bounds
# once GH#38964 is resolved
# Timedelta.max is just under 106752 days
td64 = np.timedelta64(val, unit)
assert td64.astype("m8[ns]").view("i8") < 0 # i.e. naive astype will be wrong
td = Timedelta(td64)
if unit != "M":
# with unit="M" the conversion to "s" is poorly defined
# (and numpy issues DeprecationWarning)
assert td.asm8 == td64
assert td.asm8.dtype == "m8[s]"
msg = r"Cannot cast 1067\d\d days .* to unit='ns' without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
td.as_unit("ns")
# But just back in bounds and we are OK
assert Timedelta(td64 - 1) == td64 - 1
td64 *= -1
assert td64.astype("m8[ns]").view("i8") > 0 # i.e. naive astype will be wrong
td2 = Timedelta(td64)
msg = r"Cannot cast -1067\d\d days .* to unit='ns' without overflow"
with pytest.raises(OutOfBoundsTimedelta, match=msg):
td2.as_unit("ns")
# But just back in bounds and we are OK
assert Timedelta(td64 + 1) == td64 + 1
@pytest.mark.parametrize(
"val, unit",
[
(15251 * 10**9, "W"),
(106752 * 10**9, "D"),
(2562048 * 10**9, "h"),
(153722868 * 10**9, "m"),
],
)
def test_construction_out_of_bounds_td64s(val, unit):
td64 = np.timedelta64(val, unit)
with pytest.raises(OutOfBoundsTimedelta, match=str(td64)):
Timedelta(td64)
# But just back in bounds and we are OK
assert Timedelta(td64 - 10**9) == td64 - 10**9
@pytest.mark.parametrize(
"fmt,exp",
[
(
"P6DT0H50M3.010010012S",
Timedelta(
days=6,
minutes=50,
seconds=3,
milliseconds=10,
microseconds=10,
nanoseconds=12,
),
),
(
"P-6DT0H50M3.010010012S",
Timedelta(
days=-6,
minutes=50,
seconds=3,
milliseconds=10,
microseconds=10,
nanoseconds=12,
),
),
("P4DT12H30M5S", Timedelta(days=4, hours=12, minutes=30, seconds=5)),
("P0DT0H0M0.000000123S", Timedelta(nanoseconds=123)),
("P0DT0H0M0.00001S", Timedelta(microseconds=10)),
("P0DT0H0M0.001S", Timedelta(milliseconds=1)),
("P0DT0H1M0S", Timedelta(minutes=1)),
("P1DT25H61M61S", Timedelta(days=1, hours=25, minutes=61, seconds=61)),
("PT1S", Timedelta(seconds=1)),
("PT0S", Timedelta(seconds=0)),
("P1WT0S", Timedelta(days=7, seconds=0)),
("P1D", Timedelta(days=1)),
("P1DT1H", Timedelta(days=1, hours=1)),
("P1W", Timedelta(days=7)),
("PT300S", Timedelta(seconds=300)),
("P1DT0H0M00000000000S", Timedelta(days=1)),
("PT-6H3M", Timedelta(hours=-6, minutes=3)),
("-PT6H3M", Timedelta(hours=-6, minutes=-3)),
("-PT-6H+3M", Timedelta(hours=6, minutes=-3)),
],
)
def test_iso_constructor(fmt, exp):
assert Timedelta(fmt) == exp
@pytest.mark.parametrize(
"fmt",
[
"PPPPPPPPPPPP",
"PDTHMS",
"P0DT999H999M999S",
"P1DT0H0M0.0000000000000S",
"P1DT0H0M0.S",
"P",
"-P",
],
)
def test_iso_constructor_raises(fmt):
msg = f"Invalid ISO 8601 Duration format - {fmt}"
with pytest.raises(ValueError, match=msg):
Timedelta(fmt)
@pytest.mark.parametrize(
"constructed_td, conversion",
[
(Timedelta(nanoseconds=100), "100ns"),
(
Timedelta(
days=1,
hours=1,
minutes=1,
weeks=1,
seconds=1,
milliseconds=1,
microseconds=1,
nanoseconds=1,
),
694861001001001,
),
(Timedelta(microseconds=1) + Timedelta(nanoseconds=1), "1us1ns"),
(Timedelta(microseconds=1) - Timedelta(nanoseconds=1), "999ns"),
(Timedelta(microseconds=1) + 5 * Timedelta(nanoseconds=-2), "990ns"),
],
)
def test_td_constructor_on_nanoseconds(constructed_td, conversion):
# GH#9273
assert constructed_td == Timedelta(conversion)
def test_td_constructor_value_error():
msg = "Invalid type <class 'str'>. Must be int or float."
with pytest.raises(TypeError, match=msg):
Timedelta(nanoseconds="abc")
def test_timedelta_constructor_identity():
# Test for #30543
expected = Timedelta(np.timedelta64(1, "s"))
result = Timedelta(expected)
assert result is expected
def test_timedelta_pass_td_and_kwargs_raises():
# don't silently ignore the kwargs GH#48898
td = Timedelta(days=1)
msg = (
"Cannot pass both a Timedelta input and timedelta keyword arguments, "
r"got \['days'\]"
)
with pytest.raises(ValueError, match=msg):
Timedelta(td, days=2)
@pytest.mark.parametrize(
"constructor, value, unit, expectation",
[
(Timedelta, "10s", "ms", (ValueError, "unit must not be specified")),
(to_timedelta, "10s", "ms", (ValueError, "unit must not be specified")),
(to_timedelta, ["1", 2, 3], "s", (ValueError, "unit must not be specified")),
],
)
def test_string_with_unit(constructor, value, unit, expectation):
exp, match = expectation
with pytest.raises(exp, match=match):
_ = constructor(value, unit=unit)
@pytest.mark.parametrize(
"value",
[
"".join(elements)
for repetition in (1, 2)
for elements in product("+-, ", repeat=repetition)
],
)
def test_string_without_numbers(value):
# GH39710 Timedelta input string with only symbols and no digits raises an error
msg = (
"symbols w/o a number"
if value != "--"
else "only leading negative signs are allowed"
)
with pytest.raises(ValueError, match=msg):
Timedelta(value)
def test_timedelta_new_npnat():
# GH#48898
nat = np.timedelta64("NaT", "h")
assert Timedelta(nat) is NaT
def test_subclass_respected():
# GH#49579
class MyCustomTimedelta(Timedelta):
pass
td = MyCustomTimedelta("1 minute")
assert isinstance(td, MyCustomTimedelta)
def test_non_nano_value():
# https://github.com/pandas-dev/pandas/issues/49076
result = Timedelta(10, unit="D").as_unit("s").value
# `.value` shows nanoseconds, even though unit is 's'
assert result == 864000000000000
# out-of-nanoseconds-bounds `.value` raises informative message
msg = (
r"Cannot convert Timedelta to nanoseconds without overflow. "
r"Use `.asm8.view\('i8'\)` to cast represent Timedelta in its "
r"own unit \(here, s\).$"
)
td = Timedelta(1_000, "D").as_unit("s") * 1_000
with pytest.raises(OverflowError, match=msg):
td.value
# check that the suggested workaround actually works
result = td.asm8.view("i8")
assert result == 86400000000