189 lines
6.4 KiB
Python
189 lines
6.4 KiB
Python
import numpy as np
|
|
import pytest
|
|
|
|
import pandas as pd
|
|
import pandas._testing as tm
|
|
|
|
|
|
class BaseMissingTests:
|
|
def test_isna(self, data_missing):
|
|
expected = np.array([True, False])
|
|
|
|
result = pd.isna(data_missing)
|
|
tm.assert_numpy_array_equal(result, expected)
|
|
|
|
result = pd.Series(data_missing).isna()
|
|
expected = pd.Series(expected)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# GH 21189
|
|
result = pd.Series(data_missing).drop([0, 1]).isna()
|
|
expected = pd.Series([], dtype=bool)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize("na_func", ["isna", "notna"])
|
|
def test_isna_returns_copy(self, data_missing, na_func):
|
|
result = pd.Series(data_missing)
|
|
expected = result.copy()
|
|
mask = getattr(result, na_func)()
|
|
if isinstance(mask.dtype, pd.SparseDtype):
|
|
mask = np.array(mask)
|
|
|
|
mask[:] = True
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_dropna_array(self, data_missing):
|
|
result = data_missing.dropna()
|
|
expected = data_missing[[1]]
|
|
tm.assert_extension_array_equal(result, expected)
|
|
|
|
def test_dropna_series(self, data_missing):
|
|
ser = pd.Series(data_missing)
|
|
result = ser.dropna()
|
|
expected = ser.iloc[[1]]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_dropna_frame(self, data_missing):
|
|
df = pd.DataFrame({"A": data_missing}, columns=pd.Index(["A"], dtype=object))
|
|
|
|
# defaults
|
|
result = df.dropna()
|
|
expected = df.iloc[[1]]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# axis = 1
|
|
result = df.dropna(axis="columns")
|
|
expected = pd.DataFrame(index=pd.RangeIndex(2), columns=pd.Index([]))
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# multiple
|
|
df = pd.DataFrame({"A": data_missing, "B": [1, np.nan]})
|
|
result = df.dropna()
|
|
expected = df.iloc[:0]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_fillna_scalar(self, data_missing):
|
|
valid = data_missing[1]
|
|
result = data_missing.fillna(valid)
|
|
expected = data_missing.fillna(valid)
|
|
tm.assert_extension_array_equal(result, expected)
|
|
|
|
@pytest.mark.filterwarnings(
|
|
"ignore:Series.fillna with 'method' is deprecated:FutureWarning"
|
|
)
|
|
def test_fillna_limit_pad(self, data_missing):
|
|
arr = data_missing.take([1, 0, 0, 0, 1])
|
|
result = pd.Series(arr).ffill(limit=2)
|
|
expected = pd.Series(data_missing.take([1, 1, 1, 0, 1]))
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize(
|
|
"limit_area, input_ilocs, expected_ilocs",
|
|
[
|
|
("outside", [1, 0, 0, 0, 1], [1, 0, 0, 0, 1]),
|
|
("outside", [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]),
|
|
("outside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 1]),
|
|
("outside", [0, 1, 0, 1, 0], [0, 1, 0, 1, 1]),
|
|
("inside", [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]),
|
|
("inside", [1, 0, 1, 0, 1], [1, 1, 1, 1, 1]),
|
|
("inside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 0]),
|
|
("inside", [0, 1, 0, 1, 0], [0, 1, 1, 1, 0]),
|
|
],
|
|
)
|
|
def test_ffill_limit_area(
|
|
self, data_missing, limit_area, input_ilocs, expected_ilocs
|
|
):
|
|
# GH#56616
|
|
arr = data_missing.take(input_ilocs)
|
|
result = pd.Series(arr).ffill(limit_area=limit_area)
|
|
expected = pd.Series(data_missing.take(expected_ilocs))
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
@pytest.mark.filterwarnings(
|
|
"ignore:Series.fillna with 'method' is deprecated:FutureWarning"
|
|
)
|
|
def test_fillna_limit_backfill(self, data_missing):
|
|
arr = data_missing.take([1, 0, 0, 0, 1])
|
|
result = pd.Series(arr).fillna(method="backfill", limit=2)
|
|
expected = pd.Series(data_missing.take([1, 0, 1, 1, 1]))
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_fillna_no_op_returns_copy(self, data):
|
|
data = data[~data.isna()]
|
|
|
|
valid = data[0]
|
|
result = data.fillna(valid)
|
|
assert result is not data
|
|
tm.assert_extension_array_equal(result, data)
|
|
|
|
result = data._pad_or_backfill(method="backfill")
|
|
assert result is not data
|
|
tm.assert_extension_array_equal(result, data)
|
|
|
|
def test_fillna_series(self, data_missing):
|
|
fill_value = data_missing[1]
|
|
ser = pd.Series(data_missing)
|
|
|
|
result = ser.fillna(fill_value)
|
|
expected = pd.Series(
|
|
data_missing._from_sequence(
|
|
[fill_value, fill_value], dtype=data_missing.dtype
|
|
)
|
|
)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# Fill with a series
|
|
result = ser.fillna(expected)
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
# Fill with a series not affecting the missing values
|
|
result = ser.fillna(ser)
|
|
tm.assert_series_equal(result, ser)
|
|
|
|
def test_fillna_series_method(self, data_missing, fillna_method):
|
|
fill_value = data_missing[1]
|
|
|
|
if fillna_method == "ffill":
|
|
data_missing = data_missing[::-1]
|
|
|
|
result = getattr(pd.Series(data_missing), fillna_method)()
|
|
expected = pd.Series(
|
|
data_missing._from_sequence(
|
|
[fill_value, fill_value], dtype=data_missing.dtype
|
|
)
|
|
)
|
|
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_fillna_frame(self, data_missing):
|
|
fill_value = data_missing[1]
|
|
|
|
result = pd.DataFrame({"A": data_missing, "B": [1, 2]}).fillna(fill_value)
|
|
|
|
expected = pd.DataFrame(
|
|
{
|
|
"A": data_missing._from_sequence(
|
|
[fill_value, fill_value], dtype=data_missing.dtype
|
|
),
|
|
"B": [1, 2],
|
|
}
|
|
)
|
|
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_fillna_fill_other(self, data):
|
|
result = pd.DataFrame({"A": data, "B": [np.nan] * len(data)}).fillna({"B": 0.0})
|
|
|
|
expected = pd.DataFrame({"A": data, "B": [0.0] * len(result)})
|
|
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
def test_use_inf_as_na_no_effect(self, data_missing):
|
|
ser = pd.Series(data_missing)
|
|
expected = ser.isna()
|
|
msg = "use_inf_as_na option is deprecated"
|
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
|
with pd.option_context("mode.use_inf_as_na", True):
|
|
result = ser.isna()
|
|
tm.assert_series_equal(result, expected)
|