112 lines
3.7 KiB
Python
112 lines
3.7 KiB
Python
from __future__ import annotations
|
|
|
|
from collections import abc
|
|
from typing import TYPE_CHECKING
|
|
|
|
from pandas.core.interchange.column import PandasColumn
|
|
from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrameXchg
|
|
|
|
if TYPE_CHECKING:
|
|
from pandas import (
|
|
DataFrame,
|
|
Index,
|
|
)
|
|
|
|
|
|
class PandasDataFrameXchg(DataFrameXchg):
|
|
"""
|
|
A data frame class, with only the methods required by the interchange
|
|
protocol defined.
|
|
Instances of this (private) class are returned from
|
|
``pd.DataFrame.__dataframe__`` as objects with the methods and
|
|
attributes defined on this class.
|
|
"""
|
|
|
|
def __init__(
|
|
self, df: DataFrame, nan_as_null: bool = False, allow_copy: bool = True
|
|
) -> None:
|
|
"""
|
|
Constructor - an instance of this (private) class is returned from
|
|
`pd.DataFrame.__dataframe__`.
|
|
"""
|
|
self._df = df
|
|
# ``nan_as_null`` is a keyword intended for the consumer to tell the
|
|
# producer to overwrite null values in the data with ``NaN`` (or ``NaT``).
|
|
# This currently has no effect; once support for nullable extension
|
|
# dtypes is added, this value should be propagated to columns.
|
|
self._nan_as_null = nan_as_null
|
|
self._allow_copy = allow_copy
|
|
|
|
def __dataframe__(
|
|
self, nan_as_null: bool = False, allow_copy: bool = True
|
|
) -> PandasDataFrameXchg:
|
|
return PandasDataFrameXchg(self._df, nan_as_null, allow_copy)
|
|
|
|
@property
|
|
def metadata(self) -> dict[str, Index]:
|
|
# `index` isn't a regular column, and the protocol doesn't support row
|
|
# labels - so we export it as Pandas-specific metadata here.
|
|
return {"pandas.index": self._df.index}
|
|
|
|
def num_columns(self) -> int:
|
|
return len(self._df.columns)
|
|
|
|
def num_rows(self) -> int:
|
|
return len(self._df)
|
|
|
|
def num_chunks(self) -> int:
|
|
return 1
|
|
|
|
def column_names(self) -> Index:
|
|
return self._df.columns
|
|
|
|
def get_column(self, i: int) -> PandasColumn:
|
|
return PandasColumn(self._df.iloc[:, i], allow_copy=self._allow_copy)
|
|
|
|
def get_column_by_name(self, name: str) -> PandasColumn:
|
|
return PandasColumn(self._df[name], allow_copy=self._allow_copy)
|
|
|
|
def get_columns(self) -> list[PandasColumn]:
|
|
return [
|
|
PandasColumn(self._df[name], allow_copy=self._allow_copy)
|
|
for name in self._df.columns
|
|
]
|
|
|
|
def select_columns(self, indices) -> PandasDataFrameXchg:
|
|
if not isinstance(indices, abc.Sequence):
|
|
raise ValueError("`indices` is not a sequence")
|
|
if not isinstance(indices, list):
|
|
indices = list(indices)
|
|
|
|
return PandasDataFrameXchg(
|
|
self._df.iloc[:, indices], self._nan_as_null, self._allow_copy
|
|
)
|
|
|
|
def select_columns_by_name(self, names) -> PandasDataFrameXchg:
|
|
if not isinstance(names, abc.Sequence):
|
|
raise ValueError("`names` is not a sequence")
|
|
if not isinstance(names, list):
|
|
names = list(names)
|
|
|
|
return PandasDataFrameXchg(
|
|
self._df.loc[:, names], self._nan_as_null, self._allow_copy
|
|
)
|
|
|
|
def get_chunks(self, n_chunks=None):
|
|
"""
|
|
Return an iterator yielding the chunks.
|
|
"""
|
|
if n_chunks and n_chunks > 1:
|
|
size = len(self._df)
|
|
step = size // n_chunks
|
|
if size % n_chunks != 0:
|
|
step += 1
|
|
for start in range(0, step * n_chunks, step):
|
|
yield PandasDataFrameXchg(
|
|
self._df.iloc[start : start + step, :],
|
|
self._nan_as_null,
|
|
self._allow_copy,
|
|
)
|
|
else:
|
|
yield self
|