140 lines
4.8 KiB
Python
140 lines
4.8 KiB
Python
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import DataFrame
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
|
||
|
class TestDataFrameFilter:
|
||
|
def test_filter(self, float_frame, float_string_frame):
|
||
|
# Items
|
||
|
filtered = float_frame.filter(["A", "B", "E"])
|
||
|
assert len(filtered.columns) == 2
|
||
|
assert "E" not in filtered
|
||
|
|
||
|
filtered = float_frame.filter(["A", "B", "E"], axis="columns")
|
||
|
assert len(filtered.columns) == 2
|
||
|
assert "E" not in filtered
|
||
|
|
||
|
# Other axis
|
||
|
idx = float_frame.index[0:4]
|
||
|
filtered = float_frame.filter(idx, axis="index")
|
||
|
expected = float_frame.reindex(index=idx)
|
||
|
tm.assert_frame_equal(filtered, expected)
|
||
|
|
||
|
# like
|
||
|
fcopy = float_frame.copy()
|
||
|
fcopy["AA"] = 1
|
||
|
|
||
|
filtered = fcopy.filter(like="A")
|
||
|
assert len(filtered.columns) == 2
|
||
|
assert "AA" in filtered
|
||
|
|
||
|
# like with ints in column names
|
||
|
df = DataFrame(0.0, index=[0, 1, 2], columns=[0, 1, "_A", "_B"])
|
||
|
filtered = df.filter(like="_")
|
||
|
assert len(filtered.columns) == 2
|
||
|
|
||
|
# regex with ints in column names
|
||
|
# from PR #10384
|
||
|
df = DataFrame(0.0, index=[0, 1, 2], columns=["A1", 1, "B", 2, "C"])
|
||
|
expected = DataFrame(
|
||
|
0.0, index=[0, 1, 2], columns=pd.Index([1, 2], dtype=object)
|
||
|
)
|
||
|
filtered = df.filter(regex="^[0-9]+$")
|
||
|
tm.assert_frame_equal(filtered, expected)
|
||
|
|
||
|
expected = DataFrame(0.0, index=[0, 1, 2], columns=[0, "0", 1, "1"])
|
||
|
# shouldn't remove anything
|
||
|
filtered = expected.filter(regex="^[0-9]+$")
|
||
|
tm.assert_frame_equal(filtered, expected)
|
||
|
|
||
|
# pass in None
|
||
|
with pytest.raises(TypeError, match="Must pass"):
|
||
|
float_frame.filter()
|
||
|
with pytest.raises(TypeError, match="Must pass"):
|
||
|
float_frame.filter(items=None)
|
||
|
with pytest.raises(TypeError, match="Must pass"):
|
||
|
float_frame.filter(axis=1)
|
||
|
|
||
|
# test mutually exclusive arguments
|
||
|
with pytest.raises(TypeError, match="mutually exclusive"):
|
||
|
float_frame.filter(items=["one", "three"], regex="e$", like="bbi")
|
||
|
with pytest.raises(TypeError, match="mutually exclusive"):
|
||
|
float_frame.filter(items=["one", "three"], regex="e$", axis=1)
|
||
|
with pytest.raises(TypeError, match="mutually exclusive"):
|
||
|
float_frame.filter(items=["one", "three"], regex="e$")
|
||
|
with pytest.raises(TypeError, match="mutually exclusive"):
|
||
|
float_frame.filter(items=["one", "three"], like="bbi", axis=0)
|
||
|
with pytest.raises(TypeError, match="mutually exclusive"):
|
||
|
float_frame.filter(items=["one", "three"], like="bbi")
|
||
|
|
||
|
# objects
|
||
|
filtered = float_string_frame.filter(like="foo")
|
||
|
assert "foo" in filtered
|
||
|
|
||
|
# unicode columns, won't ascii-encode
|
||
|
df = float_frame.rename(columns={"B": "\u2202"})
|
||
|
filtered = df.filter(like="C")
|
||
|
assert "C" in filtered
|
||
|
|
||
|
def test_filter_regex_search(self, float_frame):
|
||
|
fcopy = float_frame.copy()
|
||
|
fcopy["AA"] = 1
|
||
|
|
||
|
# regex
|
||
|
filtered = fcopy.filter(regex="[A]+")
|
||
|
assert len(filtered.columns) == 2
|
||
|
assert "AA" in filtered
|
||
|
|
||
|
# doesn't have to be at beginning
|
||
|
df = DataFrame(
|
||
|
{"aBBa": [1, 2], "BBaBB": [1, 2], "aCCa": [1, 2], "aCCaBB": [1, 2]}
|
||
|
)
|
||
|
|
||
|
result = df.filter(regex="BB")
|
||
|
exp = df[[x for x in df.columns if "BB" in x]]
|
||
|
tm.assert_frame_equal(result, exp)
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"name,expected",
|
||
|
[
|
||
|
("a", DataFrame({"a": [1, 2]})),
|
||
|
("a", DataFrame({"a": [1, 2]})),
|
||
|
("あ", DataFrame({"あ": [3, 4]})),
|
||
|
],
|
||
|
)
|
||
|
def test_filter_unicode(self, name, expected):
|
||
|
# GH13101
|
||
|
df = DataFrame({"a": [1, 2], "あ": [3, 4]})
|
||
|
|
||
|
tm.assert_frame_equal(df.filter(like=name), expected)
|
||
|
tm.assert_frame_equal(df.filter(regex=name), expected)
|
||
|
|
||
|
@pytest.mark.parametrize("name", ["a", "a"])
|
||
|
def test_filter_bytestring(self, name):
|
||
|
# GH13101
|
||
|
df = DataFrame({b"a": [1, 2], b"b": [3, 4]})
|
||
|
expected = DataFrame({b"a": [1, 2]})
|
||
|
|
||
|
tm.assert_frame_equal(df.filter(like=name), expected)
|
||
|
tm.assert_frame_equal(df.filter(regex=name), expected)
|
||
|
|
||
|
def test_filter_corner(self):
|
||
|
empty = DataFrame()
|
||
|
|
||
|
result = empty.filter([])
|
||
|
tm.assert_frame_equal(result, empty)
|
||
|
|
||
|
result = empty.filter(like="foo")
|
||
|
tm.assert_frame_equal(result, empty)
|
||
|
|
||
|
def test_filter_regex_non_string(self):
|
||
|
# GH#5798 trying to filter on non-string columns should drop,
|
||
|
# not raise
|
||
|
df = DataFrame(np.random.random((3, 2)), columns=["STRING", 123])
|
||
|
result = df.filter(regex="STRING")
|
||
|
expected = df[["STRING"]]
|
||
|
tm.assert_frame_equal(result, expected)
|