Inzynierka/Lib/site-packages/pandas/core/groupby/indexing.py

from __future__ import annotations

from typing import (
    TYPE_CHECKING,
    Iterable,
    Literal,
    cast,
)

import numpy as np

from pandas._typing import PositionalIndexer
from pandas.util._decorators import (
    cache_readonly,
    doc,
)

from pandas.core.dtypes.common import (
    is_integer,
    is_list_like,
)

if TYPE_CHECKING:
    from pandas import (
        DataFrame,
        Series,
    )
    from pandas.core.groupby import groupby


class GroupByIndexingMixin:
    """
    Mixin for adding ._positional_selector to GroupBy.
    """

    @cache_readonly
    def _positional_selector(self) -> GroupByPositionalSelector:
        """
        Return positional selection for each group.

        ``groupby._positional_selector[i:j]`` is similar to
        ``groupby.apply(lambda x: x.iloc[i:j])``
        but much faster and preserves the original index and order.

        ``_positional_selector[]`` is compatible with and extends :meth:`~GroupBy.head`
        and :meth:`~GroupBy.tail`. For example:

        - ``head(5)``
        - ``_positional_selector[5:-5]``
        - ``tail(5)``

        together return all the rows.

        Allowed inputs for the index are:

        - An integer valued iterable, e.g. ``range(2, 4)``.
        - A comma separated list of integers and slices, e.g. ``5``, ``2, 4``, ``2:4``.

        The output format is the same as :meth:`~GroupBy.head` and
        :meth:`~GroupBy.tail`, namely
        a subset of the ``DataFrame`` or ``Series`` with the index and order preserved.

        Returns
        -------
        Series
            The filtered subset of the original Series.
        DataFrame
            The filtered subset of the original DataFrame.

        See Also
        --------
        DataFrame.iloc : Purely integer-location based indexing for selection by
            position.
        GroupBy.head : Return first n rows of each group.
        GroupBy.tail : Return last n rows of each group.
        GroupBy.nth : Take the nth row from each group if n is an int, or a
            subset of rows, if n is a list of ints.

        Notes
        -----
        - The slice step cannot be negative.
        - If the index specification results in overlaps, the item is not duplicated.
        - If the index specification changes the order of items, then
          they are returned in their original order.
          By contrast, ``DataFrame.iloc`` can change the row order.
        - ``groupby()`` parameters such as as_index and dropna are ignored.

        The differences between ``_positional_selector[]`` and :meth:`~GroupBy.nth`
        with ``as_index=False`` are:

        - Input to ``_positional_selector`` can include
          one or more slices whereas ``nth``
          just handles an integer or a list of integers.
        - ``_positional_selector`` can  accept a slice relative to the
          last row of each group.
        - ``_positional_selector`` does not have an equivalent to the
          ``nth()`` ``dropna`` parameter.

        Examples
        --------
        >>> df = pd.DataFrame([["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]],
        ...                   columns=["A", "B"])
        >>> df.groupby("A")._positional_selector[1:2]
           A  B
        1  a  2
        4  b  5

        >>> df.groupby("A")._positional_selector[1, -1]
           A  B
        1  a  2
        2  a  3
        4  b  5
        """
        if TYPE_CHECKING:
            # pylint: disable-next=used-before-assignment
            groupby_self = cast(groupby.GroupBy, self)
        else:
            groupby_self = self

        return GroupByPositionalSelector(groupby_self)

    def _make_mask_from_positional_indexer(
        self,
        arg: PositionalIndexer | tuple,
    ) -> np.ndarray:
        if is_list_like(arg):
            if all(is_integer(i) for i in cast(Iterable, arg)):
                mask = self._make_mask_from_list(cast(Iterable[int], arg))
            else:
                mask = self._make_mask_from_tuple(cast(tuple, arg))

        elif isinstance(arg, slice):
            mask = self._make_mask_from_slice(arg)
        elif is_integer(arg):
            mask = self._make_mask_from_int(cast(int, arg))
        else:
            raise TypeError(
                f"Invalid index {type(arg)}. "
                "Must be integer, list-like, slice or a tuple of "
                "integers and slices"
            )

        if isinstance(mask, bool):
            if mask:
                mask = self._ascending_count >= 0
            else:
                mask = self._ascending_count < 0

        return cast(np.ndarray, mask)

    def _make_mask_from_int(self, arg: int) -> np.ndarray:
        if arg >= 0:
            return self._ascending_count == arg
        else:
            return self._descending_count == (-arg - 1)

    def _make_mask_from_list(self, args: Iterable[int]) -> bool | np.ndarray:
        positive = [arg for arg in args if arg >= 0]
        negative = [-arg - 1 for arg in args if arg < 0]

        mask: bool | np.ndarray = False

        if positive:
            mask |= np.isin(self._ascending_count, positive)

        if negative:
            mask |= np.isin(self._descending_count, negative)

        return mask

    def _make_mask_from_tuple(self, args: tuple) -> bool | np.ndarray:
        mask: bool | np.ndarray = False

        for arg in args:
            if is_integer(arg):
                mask |= self._make_mask_from_int(cast(int, arg))
            elif isinstance(arg, slice):
                mask |= self._make_mask_from_slice(arg)
            else:
                raise ValueError(
                    f"Invalid argument {type(arg)}. Should be int or slice."
                )

        return mask

    def _make_mask_from_slice(self, arg: slice) -> bool | np.ndarray:
        start = arg.start
        stop = arg.stop
        step = arg.step

        if step is not None and step < 0:
            raise ValueError(f"Invalid step {step}. Must be non-negative")

        mask: bool | np.ndarray = True

        if step is None:
            step = 1

        if start is None:
            if step > 1:
                mask &= self._ascending_count % step == 0

        elif start >= 0:
            mask &= self._ascending_count >= start

            if step > 1:
                mask &= (self._ascending_count - start) % step == 0

        else:
            mask &= self._descending_count < -start

            offset_array = self._descending_count + start + 1
            limit_array = (
                self._ascending_count + self._descending_count + (start + 1)
            ) < 0
            offset_array = np.where(limit_array, self._ascending_count, offset_array)

            mask &= offset_array % step == 0

        if stop is not None:
            if stop >= 0:
                mask &= self._ascending_count < stop
            else:
                mask &= self._descending_count >= -stop

        return mask

    @cache_readonly
    def _ascending_count(self) -> np.ndarray:
        if TYPE_CHECKING:
            groupby_self = cast(groupby.GroupBy, self)
        else:
            groupby_self = self

        return groupby_self._cumcount_array()

    @cache_readonly
    def _descending_count(self) -> np.ndarray:
        if TYPE_CHECKING:
            groupby_self = cast(groupby.GroupBy, self)
        else:
            groupby_self = self

        return groupby_self._cumcount_array(ascending=False)


@doc(GroupByIndexingMixin._positional_selector)
class GroupByPositionalSelector:
    def __init__(self, groupby_object: groupby.GroupBy) -> None:
        self.groupby_object = groupby_object

    def __getitem__(self, arg: PositionalIndexer | tuple) -> DataFrame | Series:
        """
        Select by positional index per group.

        Implements GroupBy._positional_selector

        Parameters
        ----------
        arg : PositionalIndexer | tuple
            Allowed values are:
            - int
            - int valued iterable such as list or range
            - slice with step either None or positive
            - tuple of integers and slices

        Returns
        -------
        Series
            The filtered subset of the original groupby Series.
        DataFrame
            The filtered subset of the original groupby DataFrame.

        See Also
        --------
        DataFrame.iloc : Integer-location based indexing for selection by position.
        GroupBy.head : Return first n rows of each group.
        GroupBy.tail : Return last n rows of each group.
        GroupBy._positional_selector : Return positional selection for each group.
        GroupBy.nth : Take the nth row from each group if n is an int, or a
            subset of rows, if n is a list of ints.
        """
        mask = self.groupby_object._make_mask_from_positional_indexer(arg)
        return self.groupby_object._mask_selected_obj(mask)


class GroupByNthSelector:
    """
    Dynamically substituted for GroupBy.nth to enable both call and index
    """

    def __init__(self, groupby_object: groupby.GroupBy) -> None:
        self.groupby_object = groupby_object

    def __call__(
        self,
        n: PositionalIndexer | tuple,
        dropna: Literal["any", "all", None] = None,
    ) -> DataFrame | Series:
        return self.groupby_object._nth(n, dropna)

    def __getitem__(self, n: PositionalIndexer | tuple) -> DataFrame | Series:
        return self.groupby_object._nth(n)
first commit 2023-06-02 12:51:02 +02:00			`from __future__ import annotations`

			`from typing import (`
			`TYPE_CHECKING,`
			`Iterable,`
			`Literal,`
			`cast,`
			`)`

			`import numpy as np`

			`from pandas._typing import PositionalIndexer`
			`from pandas.util._decorators import (`
			`cache_readonly,`
			`doc,`
			`)`

			`from pandas.core.dtypes.common import (`
			`is_integer,`
			`is_list_like,`
			`)`

			`if TYPE_CHECKING:`
			`from pandas import (`
			`DataFrame,`
			`Series,`
			`)`
			`from pandas.core.groupby import groupby`


			`class GroupByIndexingMixin:`
			`"""`
			`Mixin for adding ._positional_selector to GroupBy.`
			`"""`

			`@cache_readonly`
			`def _positional_selector(self) -> GroupByPositionalSelector:`
			`"""`
			`Return positional selection for each group.`

			``groupby._positional_selector[i:j]`` is similar to
			``groupby.apply(lambda x: x.iloc[i:j])``
			`but much faster and preserves the original index and order.`

			``_positional_selector[]`` is compatible with and extends :meth:`~GroupBy.head`
			and :meth:`~GroupBy.tail`. For example:

			- ``head(5)``
			- ``_positional_selector[5:-5]``
			- ``tail(5)``

			`together return all the rows.`

			`Allowed inputs for the index are:`

			- An integer valued iterable, e.g. ``range(2, 4)``.
			- A comma separated list of integers and slices, e.g. ``5``, ``2, 4``, ``2:4``.

			The output format is the same as :meth:`~GroupBy.head` and
			:meth:`~GroupBy.tail`, namely
			a subset of the ``DataFrame`` or ``Series`` with the index and order preserved.

			`Returns`
			`-------`
			`Series`
			`The filtered subset of the original Series.`
			`DataFrame`
			`The filtered subset of the original DataFrame.`

			`See Also`
			`--------`
			`DataFrame.iloc : Purely integer-location based indexing for selection by`
			`position.`
			`GroupBy.head : Return first n rows of each group.`
			`GroupBy.tail : Return last n rows of each group.`
			`GroupBy.nth : Take the nth row from each group if n is an int, or a`
			`subset of rows, if n is a list of ints.`

			`Notes`
			`-----`
			`- The slice step cannot be negative.`
			`- If the index specification results in overlaps, the item is not duplicated.`
			`- If the index specification changes the order of items, then`
			`they are returned in their original order.`
			By contrast, ``DataFrame.iloc`` can change the row order.
			- ``groupby()`` parameters such as as_index and dropna are ignored.

			The differences between ``_positional_selector[]`` and :meth:`~GroupBy.nth`
			with ``as_index=False`` are:

			- Input to ``_positional_selector`` can include
			one or more slices whereas ``nth``
			`just handles an integer or a list of integers.`
			- ``_positional_selector`` can accept a slice relative to the
			`last row of each group.`
			- ``_positional_selector`` does not have an equivalent to the
			``nth()`` ``dropna`` parameter.

			`Examples`
			`--------`
			`>>> df = pd.DataFrame([["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]],`
			`... columns=["A", "B"])`
			`>>> df.groupby("A")._positional_selector[1:2]`
			`A B`
			`1 a 2`
			`4 b 5`

			`>>> df.groupby("A")._positional_selector[1, -1]`
			`A B`
			`1 a 2`
			`2 a 3`
			`4 b 5`
			`"""`
			`if TYPE_CHECKING:`
			`# pylint: disable-next=used-before-assignment`
			`groupby_self = cast(groupby.GroupBy, self)`
			`else:`
			`groupby_self = self`

			`return GroupByPositionalSelector(groupby_self)`

			`def _make_mask_from_positional_indexer(`
			`self,`
			`arg: PositionalIndexer \| tuple,`
			`) -> np.ndarray:`
			`if is_list_like(arg):`
			`if all(is_integer(i) for i in cast(Iterable, arg)):`
			`mask = self._make_mask_from_list(cast(Iterable[int], arg))`
			`else:`
			`mask = self._make_mask_from_tuple(cast(tuple, arg))`

			`elif isinstance(arg, slice):`
			`mask = self._make_mask_from_slice(arg)`
			`elif is_integer(arg):`
			`mask = self._make_mask_from_int(cast(int, arg))`
			`else:`
			`raise TypeError(`
			`f"Invalid index {type(arg)}. "`
			`"Must be integer, list-like, slice or a tuple of "`
			`"integers and slices"`
			`)`

			`if isinstance(mask, bool):`
			`if mask:`
			`mask = self._ascending_count >= 0`
			`else:`
			`mask = self._ascending_count < 0`

			`return cast(np.ndarray, mask)`

			`def _make_mask_from_int(self, arg: int) -> np.ndarray:`
			`if arg >= 0:`
			`return self._ascending_count == arg`
			`else:`
			`return self._descending_count == (-arg - 1)`

			`def _make_mask_from_list(self, args: Iterable[int]) -> bool \| np.ndarray:`
			`positive = [arg for arg in args if arg >= 0]`
			`negative = [-arg - 1 for arg in args if arg < 0]`

			`mask: bool \| np.ndarray = False`

			`if positive:`
			`mask \|= np.isin(self._ascending_count, positive)`

			`if negative:`
			`mask \|= np.isin(self._descending_count, negative)`

			`return mask`

			`def _make_mask_from_tuple(self, args: tuple) -> bool \| np.ndarray:`
			`mask: bool \| np.ndarray = False`

			`for arg in args:`
			`if is_integer(arg):`
			`mask \|= self._make_mask_from_int(cast(int, arg))`
			`elif isinstance(arg, slice):`
			`mask \|= self._make_mask_from_slice(arg)`
			`else:`
			`raise ValueError(`
			`f"Invalid argument {type(arg)}. Should be int or slice."`
			`)`

			`return mask`

			`def _make_mask_from_slice(self, arg: slice) -> bool \| np.ndarray:`
			`start = arg.start`
			`stop = arg.stop`
			`step = arg.step`

			`if step is not None and step < 0:`
			`raise ValueError(f"Invalid step {step}. Must be non-negative")`

			`mask: bool \| np.ndarray = True`

			`if step is None:`
			`step = 1`

			`if start is None:`
			`if step > 1:`
			`mask &= self._ascending_count % step == 0`

			`elif start >= 0:`
			`mask &= self._ascending_count >= start`

			`if step > 1:`
			`mask &= (self._ascending_count - start) % step == 0`

			`else:`
			`mask &= self._descending_count < -start`

			`offset_array = self._descending_count + start + 1`
			`limit_array = (`
			`self._ascending_count + self._descending_count + (start + 1)`
			`) < 0`
			`offset_array = np.where(limit_array, self._ascending_count, offset_array)`

			`mask &= offset_array % step == 0`

			`if stop is not None:`
			`if stop >= 0:`
			`mask &= self._ascending_count < stop`
			`else:`
			`mask &= self._descending_count >= -stop`

			`return mask`

			`@cache_readonly`
			`def _ascending_count(self) -> np.ndarray:`
			`if TYPE_CHECKING:`
			`groupby_self = cast(groupby.GroupBy, self)`
			`else:`
			`groupby_self = self`

			`return groupby_self._cumcount_array()`

			`@cache_readonly`
			`def _descending_count(self) -> np.ndarray:`
			`if TYPE_CHECKING:`
			`groupby_self = cast(groupby.GroupBy, self)`
			`else:`
			`groupby_self = self`

			`return groupby_self._cumcount_array(ascending=False)`


			`@doc(GroupByIndexingMixin._positional_selector)`
			`class GroupByPositionalSelector:`
			`def __init__(self, groupby_object: groupby.GroupBy) -> None:`
			`self.groupby_object = groupby_object`

			`def __getitem__(self, arg: PositionalIndexer \| tuple) -> DataFrame \| Series:`
			`"""`
			`Select by positional index per group.`

			`Implements GroupBy._positional_selector`

			`Parameters`
			`----------`
			`arg : PositionalIndexer \| tuple`
			`Allowed values are:`
			`- int`
			`- int valued iterable such as list or range`
			`- slice with step either None or positive`
			`- tuple of integers and slices`

			`Returns`
			`-------`
			`Series`
			`The filtered subset of the original groupby Series.`
			`DataFrame`
			`The filtered subset of the original groupby DataFrame.`

			`See Also`
			`--------`
			`DataFrame.iloc : Integer-location based indexing for selection by position.`
			`GroupBy.head : Return first n rows of each group.`
			`GroupBy.tail : Return last n rows of each group.`
			`GroupBy._positional_selector : Return positional selection for each group.`
			`GroupBy.nth : Take the nth row from each group if n is an int, or a`
			`subset of rows, if n is a list of ints.`
			`"""`
			`mask = self.groupby_object._make_mask_from_positional_indexer(arg)`
			`return self.groupby_object._mask_selected_obj(mask)`


			`class GroupByNthSelector:`
			`"""`
			`Dynamically substituted for GroupBy.nth to enable both call and index`
			`"""`

			`def __init__(self, groupby_object: groupby.GroupBy) -> None:`
			`self.groupby_object = groupby_object`

			`def __call__(`
			`self,`
			`n: PositionalIndexer \| tuple,`
			`dropna: Literal["any", "all", None] = None,`
			`) -> DataFrame \| Series:`
			`return self.groupby_object._nth(n, dropna)`

			`def __getitem__(self, n: PositionalIndexer \| tuple) -> DataFrame \| Series:`
			`return self.groupby_object._nth(n)`