177 lines
5.2 KiB
Python
177 lines
5.2 KiB
Python
![]() |
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
from pandas import (
|
||
|
Categorical,
|
||
|
CategoricalDtype,
|
||
|
CategoricalIndex,
|
||
|
DataFrame,
|
||
|
MultiIndex,
|
||
|
Series,
|
||
|
Timestamp,
|
||
|
get_dummies,
|
||
|
period_range,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
from pandas.core.arrays import SparseArray
|
||
|
|
||
|
|
||
|
class TestGetitem:
|
||
|
def test_getitem_unused_level_raises(self):
|
||
|
# GH#20410
|
||
|
mi = MultiIndex(
|
||
|
levels=[["a_lot", "onlyone", "notevenone"], [1970, ""]],
|
||
|
codes=[[1, 0], [1, 0]],
|
||
|
)
|
||
|
df = DataFrame(-1, index=range(3), columns=mi)
|
||
|
|
||
|
with pytest.raises(KeyError, match="notevenone"):
|
||
|
df["notevenone"]
|
||
|
|
||
|
def test_getitem_periodindex(self):
|
||
|
rng = period_range("1/1/2000", periods=5)
|
||
|
df = DataFrame(np.random.randn(10, 5), columns=rng)
|
||
|
|
||
|
ts = df[rng[0]]
|
||
|
tm.assert_series_equal(ts, df.iloc[:, 0])
|
||
|
|
||
|
# GH#1211; smoketest unrelated to the rest of this test
|
||
|
repr(df)
|
||
|
|
||
|
ts = df["1/1/2000"]
|
||
|
tm.assert_series_equal(ts, df.iloc[:, 0])
|
||
|
|
||
|
def test_getitem_list_of_labels_categoricalindex_cols(self):
|
||
|
# GH#16115
|
||
|
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
|
||
|
|
||
|
expected = DataFrame(
|
||
|
[[1, 0], [0, 1]], dtype="uint8", index=[0, 1], columns=cats
|
||
|
)
|
||
|
dummies = get_dummies(cats)
|
||
|
result = dummies[list(dummies.columns)]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_getitem_sparse_column_return_type_and_dtype(self):
|
||
|
# https://github.com/pandas-dev/pandas/issues/23559
|
||
|
data = SparseArray([0, 1])
|
||
|
df = DataFrame({"A": data})
|
||
|
expected = Series(data, name="A")
|
||
|
result = df["A"]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
# Also check iloc and loc while we're here
|
||
|
result = df.iloc[:, 0]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = df.loc[:, "A"]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
class TestGetitemListLike:
|
||
|
def test_getitem_list_missing_key(self):
|
||
|
# GH#13822, incorrect error string with non-unique columns when missing
|
||
|
# column is accessed
|
||
|
df = DataFrame({"x": [1.0], "y": [2.0], "z": [3.0]})
|
||
|
df.columns = ["x", "x", "z"]
|
||
|
|
||
|
# Check that we get the correct value in the KeyError
|
||
|
with pytest.raises(KeyError, match=r"\['y'\] not in index"):
|
||
|
df[["x", "y", "z"]]
|
||
|
|
||
|
|
||
|
class TestGetitemCallable:
|
||
|
def test_getitem_callable(self, float_frame):
|
||
|
# GH#12533
|
||
|
result = float_frame[lambda x: "A"]
|
||
|
expected = float_frame.loc[:, "A"]
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
result = float_frame[lambda x: ["A", "B"]]
|
||
|
expected = float_frame.loc[:, ["A", "B"]]
|
||
|
tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]])
|
||
|
|
||
|
df = float_frame[:3]
|
||
|
result = df[lambda x: [True, False, True]]
|
||
|
expected = float_frame.iloc[[0, 2], :]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
def test_loc_multiindex_columns_one_level(self):
|
||
|
# GH#29749
|
||
|
df = DataFrame([[1, 2]], columns=[["a", "b"]])
|
||
|
expected = DataFrame([1], columns=[["a"]])
|
||
|
|
||
|
result = df["a"]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df.loc[:, "a"]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
|
||
|
class TestGetitemBooleanMask:
|
||
|
def test_getitem_bool_mask_categorical_index(self):
|
||
|
|
||
|
df3 = DataFrame(
|
||
|
{
|
||
|
"A": np.arange(6, dtype="int64"),
|
||
|
},
|
||
|
index=CategoricalIndex(
|
||
|
[1, 1, 2, 1, 3, 2],
|
||
|
dtype=CategoricalDtype([3, 2, 1], ordered=True),
|
||
|
name="B",
|
||
|
),
|
||
|
)
|
||
|
df4 = DataFrame(
|
||
|
{
|
||
|
"A": np.arange(6, dtype="int64"),
|
||
|
},
|
||
|
index=CategoricalIndex(
|
||
|
[1, 1, 2, 1, 3, 2],
|
||
|
dtype=CategoricalDtype([3, 2, 1], ordered=False),
|
||
|
name="B",
|
||
|
),
|
||
|
)
|
||
|
|
||
|
result = df3[df3.index == "a"]
|
||
|
expected = df3.iloc[[]]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df4[df4.index == "a"]
|
||
|
expected = df4.iloc[[]]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df3[df3.index == 1]
|
||
|
expected = df3.iloc[[0, 1, 3]]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df4[df4.index == 1]
|
||
|
expected = df4.iloc[[0, 1, 3]]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# since we have an ordered categorical
|
||
|
|
||
|
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||
|
# categories=[3, 2, 1],
|
||
|
# ordered=True,
|
||
|
# name='B')
|
||
|
result = df3[df3.index < 2]
|
||
|
expected = df3.iloc[[4]]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
result = df3[df3.index > 1]
|
||
|
expected = df3.iloc[[]]
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
|
||
|
# unordered
|
||
|
# cannot be compared
|
||
|
|
||
|
# CategoricalIndex([1, 1, 2, 1, 3, 2],
|
||
|
# categories=[3, 2, 1],
|
||
|
# ordered=False,
|
||
|
# name='B')
|
||
|
msg = "Unordered Categoricals can only compare equality or not"
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
df4[df4.index < 2]
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
df4[df4.index > 1]
|