210 lines
5.5 KiB
Python
210 lines
5.5 KiB
Python
|
import re
|
||
|
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas.core.arrays.sparse import SparseDtype
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype, fill_value",
|
||
|
[
|
||
|
("int", 0),
|
||
|
("float", np.nan),
|
||
|
("bool", False),
|
||
|
("object", np.nan),
|
||
|
("datetime64[ns]", pd.NaT),
|
||
|
("timedelta64[ns]", pd.NaT),
|
||
|
],
|
||
|
)
|
||
|
def test_inferred_dtype(dtype, fill_value):
|
||
|
sparse_dtype = SparseDtype(dtype)
|
||
|
result = sparse_dtype.fill_value
|
||
|
if pd.isna(fill_value):
|
||
|
assert pd.isna(result) and type(result) == type(fill_value)
|
||
|
else:
|
||
|
assert result == fill_value
|
||
|
|
||
|
|
||
|
def test_from_sparse_dtype():
|
||
|
dtype = SparseDtype("float", 0)
|
||
|
result = SparseDtype(dtype)
|
||
|
assert result.fill_value == 0
|
||
|
|
||
|
|
||
|
def test_from_sparse_dtype_fill_value():
|
||
|
dtype = SparseDtype("int", 1)
|
||
|
result = SparseDtype(dtype, fill_value=2)
|
||
|
expected = SparseDtype("int", 2)
|
||
|
assert result == expected
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype, fill_value",
|
||
|
[
|
||
|
("int", None),
|
||
|
("float", None),
|
||
|
("bool", None),
|
||
|
("object", None),
|
||
|
("datetime64[ns]", None),
|
||
|
("timedelta64[ns]", None),
|
||
|
("int", np.nan),
|
||
|
("float", 0),
|
||
|
],
|
||
|
)
|
||
|
def test_equal(dtype, fill_value):
|
||
|
a = SparseDtype(dtype, fill_value)
|
||
|
b = SparseDtype(dtype, fill_value)
|
||
|
assert a == b
|
||
|
assert b == a
|
||
|
|
||
|
|
||
|
def test_nans_equal():
|
||
|
a = SparseDtype(float, float("nan"))
|
||
|
b = SparseDtype(float, np.nan)
|
||
|
assert a == b
|
||
|
assert b == a
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"a, b",
|
||
|
[
|
||
|
(SparseDtype("float64"), SparseDtype("float32")),
|
||
|
(SparseDtype("float64"), SparseDtype("float64", 0)),
|
||
|
(SparseDtype("float64"), SparseDtype("datetime64[ns]", np.nan)),
|
||
|
(SparseDtype(int, pd.NaT), SparseDtype(float, pd.NaT)),
|
||
|
(SparseDtype("float64"), np.dtype("float64")),
|
||
|
],
|
||
|
)
|
||
|
def test_not_equal(a, b):
|
||
|
assert a != b
|
||
|
|
||
|
|
||
|
def test_construct_from_string_raises():
|
||
|
with pytest.raises(
|
||
|
TypeError, match="Cannot construct a 'SparseDtype' from 'not a dtype'"
|
||
|
):
|
||
|
SparseDtype.construct_from_string("not a dtype")
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"dtype, expected",
|
||
|
[
|
||
|
(SparseDtype(int), True),
|
||
|
(SparseDtype(float), True),
|
||
|
(SparseDtype(bool), True),
|
||
|
(SparseDtype(object), False),
|
||
|
(SparseDtype(str), False),
|
||
|
],
|
||
|
)
|
||
|
def test_is_numeric(dtype, expected):
|
||
|
assert dtype._is_numeric is expected
|
||
|
|
||
|
|
||
|
def test_str_uses_object():
|
||
|
result = SparseDtype(str).subtype
|
||
|
assert result == np.dtype("object")
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"string, expected",
|
||
|
[
|
||
|
("Sparse[float64]", SparseDtype(np.dtype("float64"))),
|
||
|
("Sparse[float32]", SparseDtype(np.dtype("float32"))),
|
||
|
("Sparse[int]", SparseDtype(np.dtype("int"))),
|
||
|
("Sparse[str]", SparseDtype(np.dtype("str"))),
|
||
|
("Sparse[datetime64[ns]]", SparseDtype(np.dtype("datetime64[ns]"))),
|
||
|
("Sparse", SparseDtype(np.dtype("float"), np.nan)),
|
||
|
],
|
||
|
)
|
||
|
def test_construct_from_string(string, expected):
|
||
|
result = SparseDtype.construct_from_string(string)
|
||
|
assert result == expected
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"a, b, expected",
|
||
|
[
|
||
|
(SparseDtype(float, 0.0), SparseDtype(np.dtype("float"), 0.0), True),
|
||
|
(SparseDtype(int, 0), SparseDtype(int, 0), True),
|
||
|
(SparseDtype(float, float("nan")), SparseDtype(float, np.nan), True),
|
||
|
(SparseDtype(float, 0), SparseDtype(float, np.nan), False),
|
||
|
(SparseDtype(int, 0.0), SparseDtype(float, 0.0), False),
|
||
|
],
|
||
|
)
|
||
|
def test_hash_equal(a, b, expected):
|
||
|
result = a == b
|
||
|
assert result is expected
|
||
|
|
||
|
result = hash(a) == hash(b)
|
||
|
assert result is expected
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"string, expected",
|
||
|
[
|
||
|
("Sparse[int]", "int"),
|
||
|
("Sparse[int, 0]", "int"),
|
||
|
("Sparse[int64]", "int64"),
|
||
|
("Sparse[int64, 0]", "int64"),
|
||
|
("Sparse[datetime64[ns], 0]", "datetime64[ns]"),
|
||
|
],
|
||
|
)
|
||
|
def test_parse_subtype(string, expected):
|
||
|
subtype, _ = SparseDtype._parse_subtype(string)
|
||
|
assert subtype == expected
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"string", ["Sparse[int, 1]", "Sparse[float, 0.0]", "Sparse[bool, True]"]
|
||
|
)
|
||
|
def test_construct_from_string_fill_value_raises(string):
|
||
|
with pytest.raises(TypeError, match="fill_value in the string is not"):
|
||
|
SparseDtype.construct_from_string(string)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"original, dtype, expected",
|
||
|
[
|
||
|
(SparseDtype(int, 0), float, SparseDtype(float, 0.0)),
|
||
|
(SparseDtype(int, 1), float, SparseDtype(float, 1.0)),
|
||
|
(SparseDtype(int, 1), str, SparseDtype(object, "1")),
|
||
|
(SparseDtype(float, 1.5), int, SparseDtype(int, 1)),
|
||
|
],
|
||
|
)
|
||
|
def test_update_dtype(original, dtype, expected):
|
||
|
result = original.update_dtype(dtype)
|
||
|
assert result == expected
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"original, dtype, expected_error_msg",
|
||
|
[
|
||
|
(
|
||
|
SparseDtype(float, np.nan),
|
||
|
int,
|
||
|
re.escape("Cannot convert non-finite values (NA or inf) to integer"),
|
||
|
),
|
||
|
(
|
||
|
SparseDtype(str, "abc"),
|
||
|
int,
|
||
|
re.escape("invalid literal for int() with base 10: 'abc'"),
|
||
|
),
|
||
|
],
|
||
|
)
|
||
|
def test_update_dtype_raises(original, dtype, expected_error_msg):
|
||
|
with pytest.raises(ValueError, match=expected_error_msg):
|
||
|
original.update_dtype(dtype)
|
||
|
|
||
|
|
||
|
def test_repr():
|
||
|
# GH-34352
|
||
|
result = str(SparseDtype("int64", fill_value=0))
|
||
|
expected = "Sparse[int64, 0]"
|
||
|
assert result == expected
|
||
|
|
||
|
result = str(SparseDtype(object, fill_value="0"))
|
||
|
expected = "Sparse[object, '0']"
|
||
|
assert result == expected
|