Inzynierka/Lib/site-packages/pandas/tests/arrays/numpy_/test_numpy.py

"""
Additional tests for PandasArray that aren't covered by
the interface tests.
"""
import numpy as np
import pytest

from pandas.core.dtypes.dtypes import PandasDtype

import pandas as pd
import pandas._testing as tm
from pandas.arrays import PandasArray


@pytest.fixture(
    params=[
        np.array(["a", "b"], dtype=object),
        np.array([0, 1], dtype=float),
        np.array([0, 1], dtype=int),
        np.array([0, 1 + 2j], dtype=complex),
        np.array([True, False], dtype=bool),
        np.array([0, 1], dtype="datetime64[ns]"),
        np.array([0, 1], dtype="timedelta64[ns]"),
    ]
)
def any_numpy_array(request):
    """
    Parametrized fixture for NumPy arrays with different dtypes.

    This excludes string and bytes.
    """
    return request.param


# ----------------------------------------------------------------------------
# PandasDtype


@pytest.mark.parametrize(
    "dtype, expected",
    [
        ("bool", True),
        ("int", True),
        ("uint", True),
        ("float", True),
        ("complex", True),
        ("str", False),
        ("bytes", False),
        ("datetime64[ns]", False),
        ("object", False),
        ("void", False),
    ],
)
def test_is_numeric(dtype, expected):
    dtype = PandasDtype(dtype)
    assert dtype._is_numeric is expected


@pytest.mark.parametrize(
    "dtype, expected",
    [
        ("bool", True),
        ("int", False),
        ("uint", False),
        ("float", False),
        ("complex", False),
        ("str", False),
        ("bytes", False),
        ("datetime64[ns]", False),
        ("object", False),
        ("void", False),
    ],
)
def test_is_boolean(dtype, expected):
    dtype = PandasDtype(dtype)
    assert dtype._is_boolean is expected


def test_repr():
    dtype = PandasDtype(np.dtype("int64"))
    assert repr(dtype) == "PandasDtype('int64')"


def test_constructor_from_string():
    result = PandasDtype.construct_from_string("int64")
    expected = PandasDtype(np.dtype("int64"))
    assert result == expected


def test_dtype_univalent(any_numpy_dtype):
    dtype = PandasDtype(any_numpy_dtype)

    result = PandasDtype(dtype)
    assert result == dtype


# ----------------------------------------------------------------------------
# Construction


def test_constructor_no_coercion():
    with pytest.raises(ValueError, match="NumPy array"):
        PandasArray([1, 2, 3])


def test_series_constructor_with_copy():
    ndarray = np.array([1, 2, 3])
    ser = pd.Series(PandasArray(ndarray), copy=True)

    assert ser.values is not ndarray


def test_series_constructor_with_astype():
    ndarray = np.array([1, 2, 3])
    result = pd.Series(PandasArray(ndarray), dtype="float64")
    expected = pd.Series([1.0, 2.0, 3.0], dtype="float64")
    tm.assert_series_equal(result, expected)


def test_from_sequence_dtype():
    arr = np.array([1, 2, 3], dtype="int64")
    result = PandasArray._from_sequence(arr, dtype="uint64")
    expected = PandasArray(np.array([1, 2, 3], dtype="uint64"))
    tm.assert_extension_array_equal(result, expected)


def test_constructor_copy():
    arr = np.array([0, 1])
    result = PandasArray(arr, copy=True)

    assert not tm.shares_memory(result, arr)


def test_constructor_with_data(any_numpy_array):
    nparr = any_numpy_array
    arr = PandasArray(nparr)
    assert arr.dtype.numpy_dtype == nparr.dtype


# ----------------------------------------------------------------------------
# Conversion


def test_to_numpy():
    arr = PandasArray(np.array([1, 2, 3]))
    result = arr.to_numpy()
    assert result is arr._ndarray

    result = arr.to_numpy(copy=True)
    assert result is not arr._ndarray

    result = arr.to_numpy(dtype="f8")
    expected = np.array([1, 2, 3], dtype="f8")
    tm.assert_numpy_array_equal(result, expected)


# ----------------------------------------------------------------------------
# Setitem


def test_setitem_series():
    ser = pd.Series([1, 2, 3])
    ser.array[0] = 10
    expected = pd.Series([10, 2, 3])
    tm.assert_series_equal(ser, expected)


def test_setitem(any_numpy_array):
    nparr = any_numpy_array
    arr = PandasArray(nparr, copy=True)

    arr[0] = arr[1]
    nparr[0] = nparr[1]

    tm.assert_numpy_array_equal(arr.to_numpy(), nparr)


# ----------------------------------------------------------------------------
# Reductions


def test_bad_reduce_raises():
    arr = np.array([1, 2, 3], dtype="int64")
    arr = PandasArray(arr)
    msg = "cannot perform not_a_method with type int"
    with pytest.raises(TypeError, match=msg):
        arr._reduce(msg)


def test_validate_reduction_keyword_args():
    arr = PandasArray(np.array([1, 2, 3]))
    msg = "the 'keepdims' parameter is not supported .*all"
    with pytest.raises(ValueError, match=msg):
        arr.all(keepdims=True)


def test_np_max_nested_tuples():
    # case where checking in ufunc.nout works while checking for tuples
    #  does not
    vals = [
        (("j", "k"), ("l", "m")),
        (("l", "m"), ("o", "p")),
        (("o", "p"), ("j", "k")),
    ]
    ser = pd.Series(vals)
    arr = ser.array

    assert arr.max() is arr[2]
    assert ser.max() is arr[2]

    result = np.maximum.reduce(arr)
    assert result == arr[2]

    result = np.maximum.reduce(ser)
    assert result == arr[2]


def test_np_reduce_2d():
    raw = np.arange(12).reshape(4, 3)
    arr = PandasArray(raw)

    res = np.maximum.reduce(arr, axis=0)
    tm.assert_extension_array_equal(res, arr[-1])

    alt = arr.max(axis=0)
    tm.assert_extension_array_equal(alt, arr[-1])


# ----------------------------------------------------------------------------
# Ops


@pytest.mark.parametrize("ufunc", [np.abs, np.negative, np.positive])
def test_ufunc_unary(ufunc):
    arr = PandasArray(np.array([-1.0, 0.0, 1.0]))
    result = ufunc(arr)
    expected = PandasArray(ufunc(arr._ndarray))
    tm.assert_extension_array_equal(result, expected)

    # same thing but with the 'out' keyword
    out = PandasArray(np.array([-9.0, -9.0, -9.0]))
    ufunc(arr, out=out)
    tm.assert_extension_array_equal(out, expected)


def test_ufunc():
    arr = PandasArray(np.array([-1.0, 0.0, 1.0]))

    r1, r2 = np.divmod(arr, np.add(arr, 2))
    e1, e2 = np.divmod(arr._ndarray, np.add(arr._ndarray, 2))
    e1 = PandasArray(e1)
    e2 = PandasArray(e2)
    tm.assert_extension_array_equal(r1, e1)
    tm.assert_extension_array_equal(r2, e2)


def test_basic_binop():
    # Just a basic smoke test. The EA interface tests exercise this
    # more thoroughly.
    x = PandasArray(np.array([1, 2, 3]))
    result = x + x
    expected = PandasArray(np.array([2, 4, 6]))
    tm.assert_extension_array_equal(result, expected)


@pytest.mark.parametrize("dtype", [None, object])
def test_setitem_object_typecode(dtype):
    arr = PandasArray(np.array(["a", "b", "c"], dtype=dtype))
    arr[0] = "t"
    expected = PandasArray(np.array(["t", "b", "c"], dtype=dtype))
    tm.assert_extension_array_equal(arr, expected)


def test_setitem_no_coercion():
    # https://github.com/pandas-dev/pandas/issues/28150
    arr = PandasArray(np.array([1, 2, 3]))
    with pytest.raises(ValueError, match="int"):
        arr[0] = "a"

    # With a value that we do coerce, check that we coerce the value
    #  and not the underlying array.
    arr[0] = 2.5
    assert isinstance(arr[0], (int, np.integer)), type(arr[0])


def test_setitem_preserves_views():
    # GH#28150, see also extension test of the same name
    arr = PandasArray(np.array([1, 2, 3]))
    view1 = arr.view()
    view2 = arr[:]
    view3 = np.asarray(arr)

    arr[0] = 9
    assert view1[0] == 9
    assert view2[0] == 9
    assert view3[0] == 9

    arr[-1] = 2.5
    view1[-1] = 5
    assert arr[-1] == 5


@pytest.mark.parametrize("dtype", [np.int64, np.uint64])
def test_quantile_empty(dtype):
    # we should get back np.nans, not -1s
    arr = PandasArray(np.array([], dtype=dtype))
    idx = pd.Index([0.0, 0.5])

    result = arr._quantile(idx, interpolation="linear")
    expected = PandasArray(np.array([np.nan, np.nan]))
    tm.assert_extension_array_equal(result, expected)


def test_factorize_unsigned():
    # don't raise when calling factorize on unsigned int PandasArray
    arr = np.array([1, 2, 3], dtype=np.uint64)
    obj = PandasArray(arr)

    res_codes, res_unique = obj.factorize()
    exp_codes, exp_unique = pd.factorize(arr)

    tm.assert_numpy_array_equal(res_codes, exp_codes)

    tm.assert_extension_array_equal(res_unique, PandasArray(exp_unique))