projektAI/venv/Lib/site-packages/pandas/core/indexes/accessors.py

495 lines
14 KiB
Python
Raw Normal View History

2021-06-06 22:13:05 +02:00
"""
datetimelike delegation
"""
from typing import TYPE_CHECKING
import warnings
import numpy as np
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_integer_dtype,
is_list_like,
is_period_dtype,
is_timedelta64_dtype,
)
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.accessor import PandasDelegate, delegate_names
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
from pandas.core.base import NoNewAttributesMixin, PandasObject
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.core.indexes.timedeltas import TimedeltaIndex
if TYPE_CHECKING:
from pandas import Series
class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin):
_hidden_attrs = PandasObject._hidden_attrs | {
"orig",
"name",
}
def __init__(self, data: "Series", orig):
if not isinstance(data, ABCSeries):
raise TypeError(
f"cannot convert an object of type {type(data)} to a datetimelike index"
)
self._parent = data
self.orig = orig
self.name = getattr(data, "name", None)
self._freeze()
def _get_values(self):
data = self._parent
if is_datetime64_dtype(data.dtype):
return DatetimeIndex(data, copy=False, name=self.name)
elif is_datetime64tz_dtype(data.dtype):
return DatetimeIndex(data, copy=False, name=self.name)
elif is_timedelta64_dtype(data.dtype):
return TimedeltaIndex(data, copy=False, name=self.name)
elif is_period_dtype(data.dtype):
return PeriodArray(data, copy=False)
raise TypeError(
f"cannot convert an object of type {type(data)} to a datetimelike index"
)
def _delegate_property_get(self, name):
from pandas import Series
values = self._get_values()
result = getattr(values, name)
# maybe need to upcast (ints)
if isinstance(result, np.ndarray):
if is_integer_dtype(result):
result = result.astype("int64")
elif not is_list_like(result):
return result
result = np.asarray(result)
if self.orig is not None:
index = self.orig.index
else:
index = self._parent.index
# return the result as a Series, which is by definition a copy
result = Series(result, index=index, name=self.name).__finalize__(self._parent)
# setting this object will show a SettingWithCopyWarning/Error
result._is_copy = (
"modifications to a property of a datetimelike "
"object are not supported and are discarded. "
"Change values on the original."
)
return result
def _delegate_property_set(self, name, value, *args, **kwargs):
raise ValueError(
"modifications to a property of a datetimelike object are not supported. "
"Change values on the original."
)
def _delegate_method(self, name, *args, **kwargs):
from pandas import Series
values = self._get_values()
method = getattr(values, name)
result = method(*args, **kwargs)
if not is_list_like(result):
return result
result = Series(result, index=self._parent.index, name=self.name).__finalize__(
self._parent
)
# setting this object will show a SettingWithCopyWarning/Error
result._is_copy = (
"modifications to a method of a datetimelike "
"object are not supported and are discarded. "
"Change values on the original."
)
return result
@delegate_names(
delegate=DatetimeArray, accessors=DatetimeArray._datetimelike_ops, typ="property"
)
@delegate_names(
delegate=DatetimeArray, accessors=DatetimeArray._datetimelike_methods, typ="method"
)
class DatetimeProperties(Properties):
"""
Accessor object for datetimelike properties of the Series values.
Examples
--------
>>> seconds_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="s"))
>>> seconds_series
0 2000-01-01 00:00:00
1 2000-01-01 00:00:01
2 2000-01-01 00:00:02
dtype: datetime64[ns]
>>> seconds_series.dt.second
0 0
1 1
2 2
dtype: int64
>>> hours_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="h"))
>>> hours_series
0 2000-01-01 00:00:00
1 2000-01-01 01:00:00
2 2000-01-01 02:00:00
dtype: datetime64[ns]
>>> hours_series.dt.hour
0 0
1 1
2 2
dtype: int64
>>> quarters_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="q"))
>>> quarters_series
0 2000-03-31
1 2000-06-30
2 2000-09-30
dtype: datetime64[ns]
>>> quarters_series.dt.quarter
0 1
1 2
2 3
dtype: int64
Returns a Series indexed like the original Series.
Raises TypeError if the Series does not contain datetimelike values.
"""
def to_pydatetime(self) -> np.ndarray:
"""
Return the data as an array of native Python datetime objects.
Timezone information is retained if present.
.. warning::
Python's datetime uses microsecond resolution, which is lower than
pandas (nanosecond). The values are truncated.
Returns
-------
numpy.ndarray
Object dtype array containing native Python datetime objects.
See Also
--------
datetime.datetime : Standard library value for a datetime.
Examples
--------
>>> s = pd.Series(pd.date_range('20180310', periods=2))
>>> s
0 2018-03-10
1 2018-03-11
dtype: datetime64[ns]
>>> s.dt.to_pydatetime()
array([datetime.datetime(2018, 3, 10, 0, 0),
datetime.datetime(2018, 3, 11, 0, 0)], dtype=object)
pandas' nanosecond precision is truncated to microseconds.
>>> s = pd.Series(pd.date_range('20180310', periods=2, freq='ns'))
>>> s
0 2018-03-10 00:00:00.000000000
1 2018-03-10 00:00:00.000000001
dtype: datetime64[ns]
>>> s.dt.to_pydatetime()
array([datetime.datetime(2018, 3, 10, 0, 0),
datetime.datetime(2018, 3, 10, 0, 0)], dtype=object)
"""
return self._get_values().to_pydatetime()
@property
def freq(self):
return self._get_values().inferred_freq
def isocalendar(self):
"""
Returns a DataFrame with the year, week, and day calculated according to
the ISO 8601 standard.
.. versionadded:: 1.1.0
Returns
-------
DataFrame
with columns year, week and day
See Also
--------
Timestamp.isocalendar : Function return a 3-tuple containing ISO year,
week number, and weekday for the given Timestamp object.
datetime.date.isocalendar : Return a named tuple object with
three components: year, week and weekday.
Examples
--------
>>> ser = pd.to_datetime(pd.Series(["2010-01-01", pd.NaT]))
>>> ser.dt.isocalendar()
year week day
0 2009 53 5
1 <NA> <NA> <NA>
>>> ser.dt.isocalendar().week
0 53
1 <NA>
Name: week, dtype: UInt32
"""
return self._get_values().isocalendar().set_index(self._parent.index)
@property
def weekofyear(self):
"""
The week ordinal of the year.
.. deprecated:: 1.1.0
Series.dt.weekofyear and Series.dt.week have been deprecated.
Please use Series.dt.isocalendar().week instead.
"""
warnings.warn(
"Series.dt.weekofyear and Series.dt.week have been deprecated. "
"Please use Series.dt.isocalendar().week instead.",
FutureWarning,
stacklevel=2,
)
week_series = self.isocalendar().week
week_series.name = self.name
if week_series.hasnans:
return week_series.astype("float64")
return week_series.astype("int64")
week = weekofyear
@delegate_names(
delegate=TimedeltaArray, accessors=TimedeltaArray._datetimelike_ops, typ="property"
)
@delegate_names(
delegate=TimedeltaArray,
accessors=TimedeltaArray._datetimelike_methods,
typ="method",
)
class TimedeltaProperties(Properties):
"""
Accessor object for datetimelike properties of the Series values.
Returns a Series indexed like the original Series.
Raises TypeError if the Series does not contain datetimelike values.
Examples
--------
>>> seconds_series = pd.Series(
... pd.timedelta_range(start="1 second", periods=3, freq="S")
... )
>>> seconds_series
0 0 days 00:00:01
1 0 days 00:00:02
2 0 days 00:00:03
dtype: timedelta64[ns]
>>> seconds_series.dt.seconds
0 1
1 2
2 3
dtype: int64
"""
def to_pytimedelta(self) -> np.ndarray:
"""
Return an array of native `datetime.timedelta` objects.
Python's standard `datetime` library uses a different representation
timedelta's. This method converts a Series of pandas Timedeltas
to `datetime.timedelta` format with the same length as the original
Series.
Returns
-------
numpy.ndarray
Array of 1D containing data with `datetime.timedelta` type.
See Also
--------
datetime.timedelta : A duration expressing the difference
between two date, time, or datetime.
Examples
--------
>>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="d"))
>>> s
0 0 days
1 1 days
2 2 days
3 3 days
4 4 days
dtype: timedelta64[ns]
>>> s.dt.to_pytimedelta()
array([datetime.timedelta(0), datetime.timedelta(days=1),
datetime.timedelta(days=2), datetime.timedelta(days=3),
datetime.timedelta(days=4)], dtype=object)
"""
return self._get_values().to_pytimedelta()
@property
def components(self):
"""
Return a Dataframe of the components of the Timedeltas.
Returns
-------
DataFrame
Examples
--------
>>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='s'))
>>> s
0 0 days 00:00:00
1 0 days 00:00:01
2 0 days 00:00:02
3 0 days 00:00:03
4 0 days 00:00:04
dtype: timedelta64[ns]
>>> s.dt.components
days hours minutes seconds milliseconds microseconds nanoseconds
0 0 0 0 0 0 0 0
1 0 0 0 1 0 0 0
2 0 0 0 2 0 0 0
3 0 0 0 3 0 0 0
4 0 0 0 4 0 0 0
"""
return (
self._get_values()
.components.set_index(self._parent.index)
.__finalize__(self._parent)
)
@property
def freq(self):
return self._get_values().inferred_freq
@delegate_names(
delegate=PeriodArray, accessors=PeriodArray._datetimelike_ops, typ="property"
)
@delegate_names(
delegate=PeriodArray, accessors=PeriodArray._datetimelike_methods, typ="method"
)
class PeriodProperties(Properties):
"""
Accessor object for datetimelike properties of the Series values.
Returns a Series indexed like the original Series.
Raises TypeError if the Series does not contain datetimelike values.
Examples
--------
>>> seconds_series = pd.Series(
... pd.period_range(
... start="2000-01-01 00:00:00", end="2000-01-01 00:00:03", freq="s"
... )
... )
>>> seconds_series
0 2000-01-01 00:00:00
1 2000-01-01 00:00:01
2 2000-01-01 00:00:02
3 2000-01-01 00:00:03
dtype: period[S]
>>> seconds_series.dt.second
0 0
1 1
2 2
3 3
dtype: int64
>>> hours_series = pd.Series(
... pd.period_range(start="2000-01-01 00:00", end="2000-01-01 03:00", freq="h")
... )
>>> hours_series
0 2000-01-01 00:00
1 2000-01-01 01:00
2 2000-01-01 02:00
3 2000-01-01 03:00
dtype: period[H]
>>> hours_series.dt.hour
0 0
1 1
2 2
3 3
dtype: int64
>>> quarters_series = pd.Series(
... pd.period_range(start="2000-01-01", end="2000-12-31", freq="Q-DEC")
... )
>>> quarters_series
0 2000Q1
1 2000Q2
2 2000Q3
3 2000Q4
dtype: period[Q-DEC]
>>> quarters_series.dt.quarter
0 1
1 2
2 3
3 4
dtype: int64
"""
class CombinedDatetimelikeProperties(
DatetimeProperties, TimedeltaProperties, PeriodProperties
):
def __new__(cls, data: "Series"):
# CombinedDatetimelikeProperties isn't really instantiated. Instead
# we need to choose which parent (datetime or timedelta) is
# appropriate. Since we're checking the dtypes anyway, we'll just
# do all the validation here.
if not isinstance(data, ABCSeries):
raise TypeError(
f"cannot convert an object of type {type(data)} to a datetimelike index"
)
orig = data if is_categorical_dtype(data.dtype) else None
if orig is not None:
data = data._constructor(
orig.array,
name=orig.name,
copy=False,
dtype=orig._values.categories.dtype,
)
if is_datetime64_dtype(data.dtype):
return DatetimeProperties(data, orig)
elif is_datetime64tz_dtype(data.dtype):
return DatetimeProperties(data, orig)
elif is_timedelta64_dtype(data.dtype):
return TimedeltaProperties(data, orig)
elif is_period_dtype(data.dtype):
return PeriodProperties(data, orig)
raise AttributeError("Can only use .dt accessor with datetimelike values")