526 lines
14 KiB
Python
526 lines
14 KiB
Python
|
from __future__ import annotations
|
||
|
|
||
|
from collections.abc import (
|
||
|
Hashable,
|
||
|
Iterator,
|
||
|
Mapping,
|
||
|
MutableMapping,
|
||
|
Sequence,
|
||
|
)
|
||
|
from datetime import (
|
||
|
date,
|
||
|
datetime,
|
||
|
timedelta,
|
||
|
tzinfo,
|
||
|
)
|
||
|
from os import PathLike
|
||
|
import sys
|
||
|
from typing import (
|
||
|
TYPE_CHECKING,
|
||
|
Any,
|
||
|
Callable,
|
||
|
Literal,
|
||
|
Optional,
|
||
|
Protocol,
|
||
|
Type as type_t,
|
||
|
TypeVar,
|
||
|
Union,
|
||
|
overload,
|
||
|
)
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
# To prevent import cycles place any internal imports in the branch below
|
||
|
# and use a string literal forward reference to it in subsequent types
|
||
|
# https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
|
||
|
if TYPE_CHECKING:
|
||
|
import numpy.typing as npt
|
||
|
|
||
|
from pandas._libs import (
|
||
|
NaTType,
|
||
|
Period,
|
||
|
Timedelta,
|
||
|
Timestamp,
|
||
|
)
|
||
|
from pandas._libs.tslibs import BaseOffset
|
||
|
|
||
|
from pandas.core.dtypes.dtypes import ExtensionDtype
|
||
|
|
||
|
from pandas import Interval
|
||
|
from pandas.arrays import (
|
||
|
DatetimeArray,
|
||
|
TimedeltaArray,
|
||
|
)
|
||
|
from pandas.core.arrays.base import ExtensionArray
|
||
|
from pandas.core.frame import DataFrame
|
||
|
from pandas.core.generic import NDFrame
|
||
|
from pandas.core.groupby.generic import (
|
||
|
DataFrameGroupBy,
|
||
|
GroupBy,
|
||
|
SeriesGroupBy,
|
||
|
)
|
||
|
from pandas.core.indexes.base import Index
|
||
|
from pandas.core.internals import (
|
||
|
ArrayManager,
|
||
|
BlockManager,
|
||
|
SingleArrayManager,
|
||
|
SingleBlockManager,
|
||
|
)
|
||
|
from pandas.core.resample import Resampler
|
||
|
from pandas.core.series import Series
|
||
|
from pandas.core.window.rolling import BaseWindow
|
||
|
|
||
|
from pandas.io.formats.format import EngFormatter
|
||
|
from pandas.tseries.holiday import AbstractHolidayCalendar
|
||
|
|
||
|
ScalarLike_co = Union[
|
||
|
int,
|
||
|
float,
|
||
|
complex,
|
||
|
str,
|
||
|
bytes,
|
||
|
np.generic,
|
||
|
]
|
||
|
|
||
|
# numpy compatible types
|
||
|
NumpyValueArrayLike = Union[ScalarLike_co, npt.ArrayLike]
|
||
|
# Name "npt._ArrayLikeInt_co" is not defined [name-defined]
|
||
|
NumpySorter = Optional[npt._ArrayLikeInt_co] # type: ignore[name-defined]
|
||
|
|
||
|
from typing import SupportsIndex
|
||
|
|
||
|
if sys.version_info >= (3, 10):
|
||
|
from typing import TypeGuard # pyright: ignore[reportUnusedImport]
|
||
|
else:
|
||
|
from typing_extensions import TypeGuard # pyright: ignore[reportUnusedImport]
|
||
|
|
||
|
if sys.version_info >= (3, 11):
|
||
|
from typing import Self # pyright: ignore[reportUnusedImport]
|
||
|
else:
|
||
|
from typing_extensions import Self # pyright: ignore[reportUnusedImport]
|
||
|
else:
|
||
|
npt: Any = None
|
||
|
Self: Any = None
|
||
|
TypeGuard: Any = None
|
||
|
|
||
|
HashableT = TypeVar("HashableT", bound=Hashable)
|
||
|
MutableMappingT = TypeVar("MutableMappingT", bound=MutableMapping)
|
||
|
|
||
|
# array-like
|
||
|
|
||
|
ArrayLike = Union["ExtensionArray", np.ndarray]
|
||
|
AnyArrayLike = Union[ArrayLike, "Index", "Series"]
|
||
|
TimeArrayLike = Union["DatetimeArray", "TimedeltaArray"]
|
||
|
|
||
|
# list-like
|
||
|
|
||
|
# from https://github.com/hauntsaninja/useful_types
|
||
|
# includes Sequence-like objects but excludes str and bytes
|
||
|
_T_co = TypeVar("_T_co", covariant=True)
|
||
|
|
||
|
|
||
|
class SequenceNotStr(Protocol[_T_co]):
|
||
|
@overload
|
||
|
def __getitem__(self, index: SupportsIndex, /) -> _T_co:
|
||
|
...
|
||
|
|
||
|
@overload
|
||
|
def __getitem__(self, index: slice, /) -> Sequence[_T_co]:
|
||
|
...
|
||
|
|
||
|
def __contains__(self, value: object, /) -> bool:
|
||
|
...
|
||
|
|
||
|
def __len__(self) -> int:
|
||
|
...
|
||
|
|
||
|
def __iter__(self) -> Iterator[_T_co]:
|
||
|
...
|
||
|
|
||
|
def index(self, value: Any, /, start: int = 0, stop: int = ...) -> int:
|
||
|
...
|
||
|
|
||
|
def count(self, value: Any, /) -> int:
|
||
|
...
|
||
|
|
||
|
def __reversed__(self) -> Iterator[_T_co]:
|
||
|
...
|
||
|
|
||
|
|
||
|
ListLike = Union[AnyArrayLike, SequenceNotStr, range]
|
||
|
|
||
|
# scalars
|
||
|
|
||
|
PythonScalar = Union[str, float, bool]
|
||
|
DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
|
||
|
PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
|
||
|
Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64, date]
|
||
|
IntStrT = TypeVar("IntStrT", bound=Union[int, str])
|
||
|
|
||
|
|
||
|
# timestamp and timedelta convertible types
|
||
|
|
||
|
TimestampConvertibleTypes = Union[
|
||
|
"Timestamp", date, np.datetime64, np.int64, float, str
|
||
|
]
|
||
|
TimestampNonexistent = Union[
|
||
|
Literal["shift_forward", "shift_backward", "NaT", "raise"], timedelta
|
||
|
]
|
||
|
TimedeltaConvertibleTypes = Union[
|
||
|
"Timedelta", timedelta, np.timedelta64, np.int64, float, str
|
||
|
]
|
||
|
Timezone = Union[str, tzinfo]
|
||
|
|
||
|
ToTimestampHow = Literal["s", "e", "start", "end"]
|
||
|
|
||
|
# NDFrameT is stricter and ensures that the same subclass of NDFrame always is
|
||
|
# used. E.g. `def func(a: NDFrameT) -> NDFrameT: ...` means that if a
|
||
|
# Series is passed into a function, a Series is always returned and if a DataFrame is
|
||
|
# passed in, a DataFrame is always returned.
|
||
|
NDFrameT = TypeVar("NDFrameT", bound="NDFrame")
|
||
|
|
||
|
NumpyIndexT = TypeVar("NumpyIndexT", np.ndarray, "Index")
|
||
|
|
||
|
AxisInt = int
|
||
|
Axis = Union[AxisInt, Literal["index", "columns", "rows"]]
|
||
|
IndexLabel = Union[Hashable, Sequence[Hashable]]
|
||
|
Level = Hashable
|
||
|
Shape = tuple[int, ...]
|
||
|
Suffixes = tuple[Optional[str], Optional[str]]
|
||
|
Ordered = Optional[bool]
|
||
|
JSONSerializable = Optional[Union[PythonScalar, list, dict]]
|
||
|
Frequency = Union[str, "BaseOffset"]
|
||
|
Axes = ListLike
|
||
|
|
||
|
RandomState = Union[
|
||
|
int,
|
||
|
np.ndarray,
|
||
|
np.random.Generator,
|
||
|
np.random.BitGenerator,
|
||
|
np.random.RandomState,
|
||
|
]
|
||
|
|
||
|
# dtypes
|
||
|
NpDtype = Union[str, np.dtype, type_t[Union[str, complex, bool, object]]]
|
||
|
Dtype = Union["ExtensionDtype", NpDtype]
|
||
|
AstypeArg = Union["ExtensionDtype", "npt.DTypeLike"]
|
||
|
# DtypeArg specifies all allowable dtypes in a functions its dtype argument
|
||
|
DtypeArg = Union[Dtype, dict[Hashable, Dtype]]
|
||
|
DtypeObj = Union[np.dtype, "ExtensionDtype"]
|
||
|
|
||
|
# converters
|
||
|
ConvertersArg = dict[Hashable, Callable[[Dtype], Dtype]]
|
||
|
|
||
|
# parse_dates
|
||
|
ParseDatesArg = Union[
|
||
|
bool, list[Hashable], list[list[Hashable]], dict[Hashable, list[Hashable]]
|
||
|
]
|
||
|
|
||
|
# For functions like rename that convert one label to another
|
||
|
Renamer = Union[Mapping[Any, Hashable], Callable[[Any], Hashable]]
|
||
|
|
||
|
# to maintain type information across generic functions and parametrization
|
||
|
T = TypeVar("T")
|
||
|
|
||
|
# used in decorators to preserve the signature of the function it decorates
|
||
|
# see https://mypy.readthedocs.io/en/stable/generics.html#declaring-decorators
|
||
|
FuncType = Callable[..., Any]
|
||
|
F = TypeVar("F", bound=FuncType)
|
||
|
|
||
|
# types of vectorized key functions for DataFrame::sort_values and
|
||
|
# DataFrame::sort_index, among others
|
||
|
ValueKeyFunc = Optional[Callable[["Series"], Union["Series", AnyArrayLike]]]
|
||
|
IndexKeyFunc = Optional[Callable[["Index"], Union["Index", AnyArrayLike]]]
|
||
|
|
||
|
# types of `func` kwarg for DataFrame.aggregate and Series.aggregate
|
||
|
AggFuncTypeBase = Union[Callable, str]
|
||
|
AggFuncTypeDict = MutableMapping[
|
||
|
Hashable, Union[AggFuncTypeBase, list[AggFuncTypeBase]]
|
||
|
]
|
||
|
AggFuncType = Union[
|
||
|
AggFuncTypeBase,
|
||
|
list[AggFuncTypeBase],
|
||
|
AggFuncTypeDict,
|
||
|
]
|
||
|
AggObjType = Union[
|
||
|
"Series",
|
||
|
"DataFrame",
|
||
|
"GroupBy",
|
||
|
"SeriesGroupBy",
|
||
|
"DataFrameGroupBy",
|
||
|
"BaseWindow",
|
||
|
"Resampler",
|
||
|
]
|
||
|
|
||
|
PythonFuncType = Callable[[Any], Any]
|
||
|
|
||
|
# filenames and file-like-objects
|
||
|
AnyStr_co = TypeVar("AnyStr_co", str, bytes, covariant=True)
|
||
|
AnyStr_contra = TypeVar("AnyStr_contra", str, bytes, contravariant=True)
|
||
|
|
||
|
|
||
|
class BaseBuffer(Protocol):
|
||
|
@property
|
||
|
def mode(self) -> str:
|
||
|
# for _get_filepath_or_buffer
|
||
|
...
|
||
|
|
||
|
def seek(self, __offset: int, __whence: int = ...) -> int:
|
||
|
# with one argument: gzip.GzipFile, bz2.BZ2File
|
||
|
# with two arguments: zip.ZipFile, read_sas
|
||
|
...
|
||
|
|
||
|
def seekable(self) -> bool:
|
||
|
# for bz2.BZ2File
|
||
|
...
|
||
|
|
||
|
def tell(self) -> int:
|
||
|
# for zip.ZipFile, read_stata, to_stata
|
||
|
...
|
||
|
|
||
|
|
||
|
class ReadBuffer(BaseBuffer, Protocol[AnyStr_co]):
|
||
|
def read(self, __n: int = ...) -> AnyStr_co:
|
||
|
# for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File
|
||
|
...
|
||
|
|
||
|
|
||
|
class WriteBuffer(BaseBuffer, Protocol[AnyStr_contra]):
|
||
|
def write(self, __b: AnyStr_contra) -> Any:
|
||
|
# for gzip.GzipFile, bz2.BZ2File
|
||
|
...
|
||
|
|
||
|
def flush(self) -> Any:
|
||
|
# for gzip.GzipFile, bz2.BZ2File
|
||
|
...
|
||
|
|
||
|
|
||
|
class ReadPickleBuffer(ReadBuffer[bytes], Protocol):
|
||
|
def readline(self) -> bytes:
|
||
|
...
|
||
|
|
||
|
|
||
|
class WriteExcelBuffer(WriteBuffer[bytes], Protocol):
|
||
|
def truncate(self, size: int | None = ...) -> int:
|
||
|
...
|
||
|
|
||
|
|
||
|
class ReadCsvBuffer(ReadBuffer[AnyStr_co], Protocol):
|
||
|
def __iter__(self) -> Iterator[AnyStr_co]:
|
||
|
# for engine=python
|
||
|
...
|
||
|
|
||
|
def fileno(self) -> int:
|
||
|
# for _MMapWrapper
|
||
|
...
|
||
|
|
||
|
def readline(self) -> AnyStr_co:
|
||
|
# for engine=python
|
||
|
...
|
||
|
|
||
|
@property
|
||
|
def closed(self) -> bool:
|
||
|
# for enine=pyarrow
|
||
|
...
|
||
|
|
||
|
|
||
|
FilePath = Union[str, "PathLike[str]"]
|
||
|
|
||
|
# for arbitrary kwargs passed during reading/writing files
|
||
|
StorageOptions = Optional[dict[str, Any]]
|
||
|
|
||
|
|
||
|
# compression keywords and compression
|
||
|
CompressionDict = dict[str, Any]
|
||
|
CompressionOptions = Optional[
|
||
|
Union[Literal["infer", "gzip", "bz2", "zip", "xz", "zstd", "tar"], CompressionDict]
|
||
|
]
|
||
|
|
||
|
# types in DataFrameFormatter
|
||
|
FormattersType = Union[
|
||
|
list[Callable], tuple[Callable, ...], Mapping[Union[str, int], Callable]
|
||
|
]
|
||
|
ColspaceType = Mapping[Hashable, Union[str, int]]
|
||
|
FloatFormatType = Union[str, Callable, "EngFormatter"]
|
||
|
ColspaceArgType = Union[
|
||
|
str, int, Sequence[Union[str, int]], Mapping[Hashable, Union[str, int]]
|
||
|
]
|
||
|
|
||
|
# Arguments for fillna()
|
||
|
FillnaOptions = Literal["backfill", "bfill", "ffill", "pad"]
|
||
|
InterpolateOptions = Literal[
|
||
|
"linear",
|
||
|
"time",
|
||
|
"index",
|
||
|
"values",
|
||
|
"nearest",
|
||
|
"zero",
|
||
|
"slinear",
|
||
|
"quadratic",
|
||
|
"cubic",
|
||
|
"barycentric",
|
||
|
"polynomial",
|
||
|
"krogh",
|
||
|
"piecewise_polynomial",
|
||
|
"spline",
|
||
|
"pchip",
|
||
|
"akima",
|
||
|
"cubicspline",
|
||
|
"from_derivatives",
|
||
|
]
|
||
|
|
||
|
# internals
|
||
|
Manager = Union[
|
||
|
"ArrayManager", "SingleArrayManager", "BlockManager", "SingleBlockManager"
|
||
|
]
|
||
|
SingleManager = Union["SingleArrayManager", "SingleBlockManager"]
|
||
|
Manager2D = Union["ArrayManager", "BlockManager"]
|
||
|
|
||
|
# indexing
|
||
|
# PositionalIndexer -> valid 1D positional indexer, e.g. can pass
|
||
|
# to ndarray.__getitem__
|
||
|
# ScalarIndexer is for a single value as the index
|
||
|
# SequenceIndexer is for list like or slices (but not tuples)
|
||
|
# PositionalIndexerTuple is extends the PositionalIndexer for 2D arrays
|
||
|
# These are used in various __getitem__ overloads
|
||
|
# TODO(typing#684): add Ellipsis, see
|
||
|
# https://github.com/python/typing/issues/684#issuecomment-548203158
|
||
|
# https://bugs.python.org/issue41810
|
||
|
# Using List[int] here rather than Sequence[int] to disallow tuples.
|
||
|
ScalarIndexer = Union[int, np.integer]
|
||
|
SequenceIndexer = Union[slice, list[int], np.ndarray]
|
||
|
PositionalIndexer = Union[ScalarIndexer, SequenceIndexer]
|
||
|
PositionalIndexerTuple = tuple[PositionalIndexer, PositionalIndexer]
|
||
|
PositionalIndexer2D = Union[PositionalIndexer, PositionalIndexerTuple]
|
||
|
if TYPE_CHECKING:
|
||
|
TakeIndexer = Union[Sequence[int], Sequence[np.integer], npt.NDArray[np.integer]]
|
||
|
else:
|
||
|
TakeIndexer = Any
|
||
|
|
||
|
# Shared by functions such as drop and astype
|
||
|
IgnoreRaise = Literal["ignore", "raise"]
|
||
|
|
||
|
# Windowing rank methods
|
||
|
WindowingRankType = Literal["average", "min", "max"]
|
||
|
|
||
|
# read_csv engines
|
||
|
CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"]
|
||
|
|
||
|
# read_json engines
|
||
|
JSONEngine = Literal["ujson", "pyarrow"]
|
||
|
|
||
|
# read_xml parsers
|
||
|
XMLParsers = Literal["lxml", "etree"]
|
||
|
|
||
|
# read_html flavors
|
||
|
HTMLFlavors = Literal["lxml", "html5lib", "bs4"]
|
||
|
|
||
|
# Interval closed type
|
||
|
IntervalLeftRight = Literal["left", "right"]
|
||
|
IntervalClosedType = Union[IntervalLeftRight, Literal["both", "neither"]]
|
||
|
|
||
|
# datetime and NaTType
|
||
|
DatetimeNaTType = Union[datetime, "NaTType"]
|
||
|
DateTimeErrorChoices = Union[IgnoreRaise, Literal["coerce"]]
|
||
|
|
||
|
# sort_index
|
||
|
SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"]
|
||
|
NaPosition = Literal["first", "last"]
|
||
|
|
||
|
# Arguments for nsmalles and n_largest
|
||
|
NsmallestNlargestKeep = Literal["first", "last", "all"]
|
||
|
|
||
|
# quantile interpolation
|
||
|
QuantileInterpolation = Literal["linear", "lower", "higher", "midpoint", "nearest"]
|
||
|
|
||
|
# plotting
|
||
|
PlottingOrientation = Literal["horizontal", "vertical"]
|
||
|
|
||
|
# dropna
|
||
|
AnyAll = Literal["any", "all"]
|
||
|
|
||
|
# merge
|
||
|
MergeHow = Literal["left", "right", "inner", "outer", "cross"]
|
||
|
MergeValidate = Literal[
|
||
|
"one_to_one",
|
||
|
"1:1",
|
||
|
"one_to_many",
|
||
|
"1:m",
|
||
|
"many_to_one",
|
||
|
"m:1",
|
||
|
"many_to_many",
|
||
|
"m:m",
|
||
|
]
|
||
|
|
||
|
# join
|
||
|
JoinHow = Literal["left", "right", "inner", "outer"]
|
||
|
JoinValidate = Literal[
|
||
|
"one_to_one",
|
||
|
"1:1",
|
||
|
"one_to_many",
|
||
|
"1:m",
|
||
|
"many_to_one",
|
||
|
"m:1",
|
||
|
"many_to_many",
|
||
|
"m:m",
|
||
|
]
|
||
|
|
||
|
# reindex
|
||
|
ReindexMethod = Union[FillnaOptions, Literal["nearest"]]
|
||
|
|
||
|
MatplotlibColor = Union[str, Sequence[float]]
|
||
|
TimeGrouperOrigin = Union[
|
||
|
"Timestamp", Literal["epoch", "start", "start_day", "end", "end_day"]
|
||
|
]
|
||
|
TimeAmbiguous = Union[Literal["infer", "NaT", "raise"], "npt.NDArray[np.bool_]"]
|
||
|
TimeNonexistent = Union[
|
||
|
Literal["shift_forward", "shift_backward", "NaT", "raise"], timedelta
|
||
|
]
|
||
|
DropKeep = Literal["first", "last", False]
|
||
|
CorrelationMethod = Union[
|
||
|
Literal["pearson", "kendall", "spearman"], Callable[[np.ndarray, np.ndarray], float]
|
||
|
]
|
||
|
AlignJoin = Literal["outer", "inner", "left", "right"]
|
||
|
DtypeBackend = Literal["pyarrow", "numpy_nullable"]
|
||
|
|
||
|
TimeUnit = Literal["s", "ms", "us", "ns"]
|
||
|
OpenFileErrors = Literal[
|
||
|
"strict",
|
||
|
"ignore",
|
||
|
"replace",
|
||
|
"surrogateescape",
|
||
|
"xmlcharrefreplace",
|
||
|
"backslashreplace",
|
||
|
"namereplace",
|
||
|
]
|
||
|
|
||
|
# update
|
||
|
UpdateJoin = Literal["left"]
|
||
|
|
||
|
# applymap
|
||
|
NaAction = Literal["ignore"]
|
||
|
|
||
|
# from_dict
|
||
|
FromDictOrient = Literal["columns", "index", "tight"]
|
||
|
|
||
|
# to_gbc
|
||
|
ToGbqIfexist = Literal["fail", "replace", "append"]
|
||
|
|
||
|
# to_stata
|
||
|
ToStataByteorder = Literal[">", "<", "little", "big"]
|
||
|
|
||
|
# ExcelWriter
|
||
|
ExcelWriterIfSheetExists = Literal["error", "new", "replace", "overlay"]
|
||
|
|
||
|
# Offsets
|
||
|
OffsetCalendar = Union[np.busdaycalendar, "AbstractHolidayCalendar"]
|
||
|
|
||
|
# read_csv: usecols
|
||
|
UsecolsArgType = Union[
|
||
|
SequenceNotStr[Hashable],
|
||
|
range,
|
||
|
AnyArrayLike,
|
||
|
Callable[[HashableT], bool],
|
||
|
None,
|
||
|
]
|