287 lines
10 KiB
Python
287 lines
10 KiB
Python
import numpy as np
|
|
import pytest
|
|
|
|
from pandas._libs.sparse import IntIndex
|
|
import pandas.util._test_decorators as td
|
|
|
|
import pandas as pd
|
|
from pandas import isna
|
|
import pandas._testing as tm
|
|
from pandas.core.arrays.sparse import (
|
|
SparseArray,
|
|
SparseDtype,
|
|
)
|
|
|
|
|
|
class TestConstructors:
|
|
def test_constructor_dtype(self):
|
|
arr = SparseArray([np.nan, 1, 2, np.nan])
|
|
assert arr.dtype == SparseDtype(np.float64, np.nan)
|
|
assert arr.dtype.subtype == np.float64
|
|
assert np.isnan(arr.fill_value)
|
|
|
|
arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0)
|
|
assert arr.dtype == SparseDtype(np.float64, 0)
|
|
assert arr.fill_value == 0
|
|
|
|
arr = SparseArray([0, 1, 2, 4], dtype=np.float64)
|
|
assert arr.dtype == SparseDtype(np.float64, np.nan)
|
|
assert np.isnan(arr.fill_value)
|
|
|
|
arr = SparseArray([0, 1, 2, 4], dtype=np.int64)
|
|
assert arr.dtype == SparseDtype(np.int64, 0)
|
|
assert arr.fill_value == 0
|
|
|
|
arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64)
|
|
assert arr.dtype == SparseDtype(np.int64, 0)
|
|
assert arr.fill_value == 0
|
|
|
|
arr = SparseArray([0, 1, 2, 4], dtype=None)
|
|
assert arr.dtype == SparseDtype(np.int64, 0)
|
|
assert arr.fill_value == 0
|
|
|
|
arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None)
|
|
assert arr.dtype == SparseDtype(np.int64, 0)
|
|
assert arr.fill_value == 0
|
|
|
|
def test_constructor_dtype_str(self):
|
|
result = SparseArray([1, 2, 3], dtype="int")
|
|
expected = SparseArray([1, 2, 3], dtype=int)
|
|
tm.assert_sp_array_equal(result, expected)
|
|
|
|
def test_constructor_sparse_dtype(self):
|
|
result = SparseArray([1, 0, 0, 1], dtype=SparseDtype("int64", -1))
|
|
expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64)
|
|
tm.assert_sp_array_equal(result, expected)
|
|
assert result.sp_values.dtype == np.dtype("int64")
|
|
|
|
def test_constructor_sparse_dtype_str(self):
|
|
result = SparseArray([1, 0, 0, 1], dtype="Sparse[int32]")
|
|
expected = SparseArray([1, 0, 0, 1], dtype=np.int32)
|
|
tm.assert_sp_array_equal(result, expected)
|
|
assert result.sp_values.dtype == np.dtype("int32")
|
|
|
|
def test_constructor_object_dtype(self):
|
|
# GH#11856
|
|
arr = SparseArray(["A", "A", np.nan, "B"], dtype=object)
|
|
assert arr.dtype == SparseDtype(object)
|
|
assert np.isnan(arr.fill_value)
|
|
|
|
arr = SparseArray(["A", "A", np.nan, "B"], dtype=object, fill_value="A")
|
|
assert arr.dtype == SparseDtype(object, "A")
|
|
assert arr.fill_value == "A"
|
|
|
|
def test_constructor_object_dtype_bool_fill(self):
|
|
# GH#17574
|
|
data = [False, 0, 100.0, 0.0]
|
|
arr = SparseArray(data, dtype=object, fill_value=False)
|
|
assert arr.dtype == SparseDtype(object, False)
|
|
assert arr.fill_value is False
|
|
arr_expected = np.array(data, dtype=object)
|
|
it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
|
|
assert np.fromiter(it, dtype=np.bool_).all()
|
|
|
|
@pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int])
|
|
def test_constructor_na_dtype(self, dtype):
|
|
with pytest.raises(ValueError, match="Cannot convert"):
|
|
SparseArray([0, 1, np.nan], dtype=dtype)
|
|
|
|
def test_constructor_warns_when_losing_timezone(self):
|
|
# GH#32501 warn when losing timezone information
|
|
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
|
|
|
|
expected = SparseArray(np.asarray(dti, dtype="datetime64[ns]"))
|
|
|
|
with tm.assert_produces_warning(UserWarning):
|
|
result = SparseArray(dti)
|
|
|
|
tm.assert_sp_array_equal(result, expected)
|
|
|
|
with tm.assert_produces_warning(UserWarning):
|
|
result = SparseArray(pd.Series(dti))
|
|
|
|
tm.assert_sp_array_equal(result, expected)
|
|
|
|
def test_constructor_spindex_dtype(self):
|
|
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
|
|
# TODO: actionable?
|
|
# XXX: Behavior change: specifying SparseIndex no longer changes the
|
|
# fill_value
|
|
expected = SparseArray([0, 1, 2, 0], kind="integer")
|
|
tm.assert_sp_array_equal(arr, expected)
|
|
assert arr.dtype == SparseDtype(np.int64)
|
|
assert arr.fill_value == 0
|
|
|
|
arr = SparseArray(
|
|
data=[1, 2, 3],
|
|
sparse_index=IntIndex(4, [1, 2, 3]),
|
|
dtype=np.int64,
|
|
fill_value=0,
|
|
)
|
|
exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0)
|
|
tm.assert_sp_array_equal(arr, exp)
|
|
assert arr.dtype == SparseDtype(np.int64)
|
|
assert arr.fill_value == 0
|
|
|
|
arr = SparseArray(
|
|
data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=np.int64
|
|
)
|
|
exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64)
|
|
tm.assert_sp_array_equal(arr, exp)
|
|
assert arr.dtype == SparseDtype(np.int64)
|
|
assert arr.fill_value == 0
|
|
|
|
arr = SparseArray(
|
|
data=[1, 2, 3],
|
|
sparse_index=IntIndex(4, [1, 2, 3]),
|
|
dtype=None,
|
|
fill_value=0,
|
|
)
|
|
exp = SparseArray([0, 1, 2, 3], dtype=None)
|
|
tm.assert_sp_array_equal(arr, exp)
|
|
assert arr.dtype == SparseDtype(np.int64)
|
|
assert arr.fill_value == 0
|
|
|
|
@pytest.mark.parametrize("sparse_index", [None, IntIndex(1, [0])])
|
|
def test_constructor_spindex_dtype_scalar(self, sparse_index):
|
|
# scalar input
|
|
arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None)
|
|
exp = SparseArray([1], dtype=None)
|
|
tm.assert_sp_array_equal(arr, exp)
|
|
assert arr.dtype == SparseDtype(np.int64)
|
|
assert arr.fill_value == 0
|
|
|
|
arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None)
|
|
exp = SparseArray([1], dtype=None)
|
|
tm.assert_sp_array_equal(arr, exp)
|
|
assert arr.dtype == SparseDtype(np.int64)
|
|
assert arr.fill_value == 0
|
|
|
|
def test_constructor_spindex_dtype_scalar_broadcasts(self):
|
|
arr = SparseArray(
|
|
data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=None
|
|
)
|
|
exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None)
|
|
tm.assert_sp_array_equal(arr, exp)
|
|
assert arr.dtype == SparseDtype(np.int64)
|
|
assert arr.fill_value == 0
|
|
|
|
@pytest.mark.parametrize(
|
|
"data, fill_value",
|
|
[
|
|
(np.array([1, 2]), 0),
|
|
(np.array([1.0, 2.0]), np.nan),
|
|
([True, False], False),
|
|
([pd.Timestamp("2017-01-01")], pd.NaT),
|
|
],
|
|
)
|
|
def test_constructor_inferred_fill_value(self, data, fill_value):
|
|
result = SparseArray(data).fill_value
|
|
|
|
if isna(fill_value):
|
|
assert isna(result)
|
|
else:
|
|
assert result == fill_value
|
|
|
|
@pytest.mark.parametrize("format", ["coo", "csc", "csr"])
|
|
@pytest.mark.parametrize("size", [0, 10])
|
|
@td.skip_if_no_scipy
|
|
def test_from_spmatrix(self, size, format):
|
|
import scipy.sparse
|
|
|
|
mat = scipy.sparse.random(size, 1, density=0.5, format=format)
|
|
result = SparseArray.from_spmatrix(mat)
|
|
|
|
result = np.asarray(result)
|
|
expected = mat.toarray().ravel()
|
|
tm.assert_numpy_array_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize("format", ["coo", "csc", "csr"])
|
|
@td.skip_if_no_scipy
|
|
def test_from_spmatrix_including_explicit_zero(self, format):
|
|
import scipy.sparse
|
|
|
|
mat = scipy.sparse.random(10, 1, density=0.5, format=format)
|
|
mat.data[0] = 0
|
|
result = SparseArray.from_spmatrix(mat)
|
|
|
|
result = np.asarray(result)
|
|
expected = mat.toarray().ravel()
|
|
tm.assert_numpy_array_equal(result, expected)
|
|
|
|
@td.skip_if_no_scipy
|
|
def test_from_spmatrix_raises(self):
|
|
import scipy.sparse
|
|
|
|
mat = scipy.sparse.eye(5, 4, format="csc")
|
|
|
|
with pytest.raises(ValueError, match="not '4'"):
|
|
SparseArray.from_spmatrix(mat)
|
|
|
|
def test_constructor_from_too_large_array(self):
|
|
with pytest.raises(TypeError, match="expected dimension <= 1 data"):
|
|
SparseArray(np.arange(10).reshape((2, 5)))
|
|
|
|
def test_constructor_from_sparse(self):
|
|
zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
|
|
res = SparseArray(zarr)
|
|
assert res.fill_value == 0
|
|
tm.assert_almost_equal(res.sp_values, zarr.sp_values)
|
|
|
|
def test_constructor_copy(self):
|
|
arr_data = np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
|
|
arr = SparseArray(arr_data)
|
|
|
|
cp = SparseArray(arr, copy=True)
|
|
cp.sp_values[:3] = 0
|
|
assert not (arr.sp_values[:3] == 0).any()
|
|
|
|
not_copy = SparseArray(arr)
|
|
not_copy.sp_values[:3] = 0
|
|
assert (arr.sp_values[:3] == 0).all()
|
|
|
|
def test_constructor_bool(self):
|
|
# GH#10648
|
|
data = np.array([False, False, True, True, False, False])
|
|
arr = SparseArray(data, fill_value=False, dtype=bool)
|
|
|
|
assert arr.dtype == SparseDtype(bool)
|
|
tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
|
|
# Behavior change: np.asarray densifies.
|
|
# tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
|
|
tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32))
|
|
|
|
dense = arr.to_dense()
|
|
assert dense.dtype == bool
|
|
tm.assert_numpy_array_equal(dense, data)
|
|
|
|
def test_constructor_bool_fill_value(self):
|
|
arr = SparseArray([True, False, True], dtype=None)
|
|
assert arr.dtype == SparseDtype(np.bool_)
|
|
assert not arr.fill_value
|
|
|
|
arr = SparseArray([True, False, True], dtype=np.bool_)
|
|
assert arr.dtype == SparseDtype(np.bool_)
|
|
assert not arr.fill_value
|
|
|
|
arr = SparseArray([True, False, True], dtype=np.bool_, fill_value=True)
|
|
assert arr.dtype == SparseDtype(np.bool_, True)
|
|
assert arr.fill_value
|
|
|
|
def test_constructor_float32(self):
|
|
# GH#10648
|
|
data = np.array([1.0, np.nan, 3], dtype=np.float32)
|
|
arr = SparseArray(data, dtype=np.float32)
|
|
|
|
assert arr.dtype == SparseDtype(np.float32)
|
|
tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3], dtype=np.float32))
|
|
# Behavior change: np.asarray densifies.
|
|
# tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
|
|
tm.assert_numpy_array_equal(
|
|
arr.sp_index.indices, np.array([0, 2], dtype=np.int32)
|
|
)
|
|
|
|
dense = arr.to_dense()
|
|
assert dense.dtype == np.float32
|
|
tm.assert_numpy_array_equal(dense, data)
|