226 lines
7.7 KiB
Python
226 lines
7.7 KiB
Python
|
from datetime import datetime
|
||
|
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import (
|
||
|
DataFrame,
|
||
|
Index,
|
||
|
MultiIndex,
|
||
|
RangeIndex,
|
||
|
Series,
|
||
|
date_range,
|
||
|
option_context,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
|
||
|
class TestResetIndex:
|
||
|
def test_reset_index_dti_round_trip(self):
|
||
|
dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")._with_freq(None)
|
||
|
d1 = DataFrame({"v": np.random.default_rng(2).random(len(dti))}, index=dti)
|
||
|
d2 = d1.reset_index()
|
||
|
assert d2.dtypes.iloc[0] == np.dtype("M8[ns]")
|
||
|
d3 = d2.set_index("index")
|
||
|
tm.assert_frame_equal(d1, d3, check_names=False)
|
||
|
|
||
|
# GH#2329
|
||
|
stamp = datetime(2012, 11, 22)
|
||
|
df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"])
|
||
|
df = df.set_index("Date")
|
||
|
|
||
|
assert df.index[0] == stamp
|
||
|
assert df.reset_index()["Date"].iloc[0] == stamp
|
||
|
|
||
|
def test_reset_index(self):
|
||
|
df = DataFrame(
|
||
|
1.1 * np.arange(120).reshape((30, 4)),
|
||
|
columns=Index(list("ABCD"), dtype=object),
|
||
|
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||
|
)[:5]
|
||
|
ser = df.stack(future_stack=True)
|
||
|
ser.index.names = ["hash", "category"]
|
||
|
|
||
|
ser.name = "value"
|
||
|
df = ser.reset_index()
|
||
|
assert "value" in df
|
||
|
|
||
|
df = ser.reset_index(name="value2")
|
||
|
assert "value2" in df
|
||
|
|
||
|
# check inplace
|
||
|
s = ser.reset_index(drop=True)
|
||
|
s2 = ser
|
||
|
return_value = s2.reset_index(drop=True, inplace=True)
|
||
|
assert return_value is None
|
||
|
tm.assert_series_equal(s, s2)
|
||
|
|
||
|
# level
|
||
|
index = MultiIndex(
|
||
|
levels=[["bar"], ["one", "two", "three"], [0, 1]],
|
||
|
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
|
||
|
)
|
||
|
s = Series(np.random.default_rng(2).standard_normal(6), index=index)
|
||
|
rs = s.reset_index(level=1)
|
||
|
assert len(rs.columns) == 2
|
||
|
|
||
|
rs = s.reset_index(level=[0, 2], drop=True)
|
||
|
tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
|
||
|
assert isinstance(rs, Series)
|
||
|
|
||
|
def test_reset_index_name(self):
|
||
|
s = Series([1, 2, 3], index=Index(range(3), name="x"))
|
||
|
assert s.reset_index().index.name is None
|
||
|
assert s.reset_index(drop=True).index.name is None
|
||
|
|
||
|
def test_reset_index_level(self):
|
||
|
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"])
|
||
|
|
||
|
for levels in ["A", "B"], [0, 1]:
|
||
|
# With MultiIndex
|
||
|
s = df.set_index(["A", "B"])["C"]
|
||
|
|
||
|
result = s.reset_index(level=levels[0])
|
||
|
tm.assert_frame_equal(result, df.set_index("B"))
|
||
|
|
||
|
result = s.reset_index(level=levels[:1])
|
||
|
tm.assert_frame_equal(result, df.set_index("B"))
|
||
|
|
||
|
result = s.reset_index(level=levels)
|
||
|
tm.assert_frame_equal(result, df)
|
||
|
|
||
|
result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True)
|
||
|
tm.assert_frame_equal(result, df[["C"]])
|
||
|
|
||
|
with pytest.raises(KeyError, match="Level E "):
|
||
|
s.reset_index(level=["A", "E"])
|
||
|
|
||
|
# With single-level Index
|
||
|
s = df.set_index("A")["B"]
|
||
|
|
||
|
result = s.reset_index(level=levels[0])
|
||
|
tm.assert_frame_equal(result, df[["A", "B"]])
|
||
|
|
||
|
result = s.reset_index(level=levels[:1])
|
||
|
tm.assert_frame_equal(result, df[["A", "B"]])
|
||
|
|
||
|
result = s.reset_index(level=levels[0], drop=True)
|
||
|
tm.assert_series_equal(result, df["B"])
|
||
|
|
||
|
with pytest.raises(IndexError, match="Too many levels"):
|
||
|
s.reset_index(level=[0, 1, 2])
|
||
|
|
||
|
# Check that .reset_index([],drop=True) doesn't fail
|
||
|
result = Series(range(4)).reset_index([], drop=True)
|
||
|
expected = Series(range(4))
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_reset_index_range(self):
|
||
|
# GH 12071
|
||
|
s = Series(range(2), name="A", dtype="int64")
|
||
|
series_result = s.reset_index()
|
||
|
assert isinstance(series_result.index, RangeIndex)
|
||
|
series_expected = DataFrame(
|
||
|
[[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2)
|
||
|
)
|
||
|
tm.assert_frame_equal(series_result, series_expected)
|
||
|
|
||
|
def test_reset_index_drop_errors(self):
|
||
|
# GH 20925
|
||
|
|
||
|
# KeyError raised for series index when passed level name is missing
|
||
|
s = Series(range(4))
|
||
|
with pytest.raises(KeyError, match="does not match index name"):
|
||
|
s.reset_index("wrong", drop=True)
|
||
|
with pytest.raises(KeyError, match="does not match index name"):
|
||
|
s.reset_index("wrong")
|
||
|
|
||
|
# KeyError raised for series when level to be dropped is missing
|
||
|
s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2))
|
||
|
with pytest.raises(KeyError, match="not found"):
|
||
|
s.reset_index("wrong", drop=True)
|
||
|
|
||
|
def test_reset_index_with_drop(self):
|
||
|
arrays = [
|
||
|
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
|
||
|
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||
|
]
|
||
|
tuples = zip(*arrays)
|
||
|
index = MultiIndex.from_tuples(tuples)
|
||
|
data = np.random.default_rng(2).standard_normal(8)
|
||
|
ser = Series(data, index=index)
|
||
|
ser.iloc[3] = np.nan
|
||
|
|
||
|
deleveled = ser.reset_index()
|
||
|
assert isinstance(deleveled, DataFrame)
|
||
|
assert len(deleveled.columns) == len(ser.index.levels) + 1
|
||
|
assert deleveled.index.name == ser.index.name
|
||
|
|
||
|
deleveled = ser.reset_index(drop=True)
|
||
|
assert isinstance(deleveled, Series)
|
||
|
assert deleveled.index.name == ser.index.name
|
||
|
|
||
|
def test_reset_index_inplace_and_drop_ignore_name(self):
|
||
|
# GH#44575
|
||
|
ser = Series(range(2), name="old")
|
||
|
ser.reset_index(name="new", drop=True, inplace=True)
|
||
|
expected = Series(range(2), name="old")
|
||
|
tm.assert_series_equal(ser, expected)
|
||
|
|
||
|
def test_reset_index_drop_infer_string(self):
|
||
|
# GH#56160
|
||
|
pytest.importorskip("pyarrow")
|
||
|
ser = Series(["a", "b", "c"], dtype=object)
|
||
|
with option_context("future.infer_string", True):
|
||
|
result = ser.reset_index(drop=True)
|
||
|
tm.assert_series_equal(result, ser)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"array, dtype",
|
||
|
[
|
||
|
(["a", "b"], object),
|
||
|
(
|
||
|
pd.period_range("12-1-2000", periods=2, freq="Q-DEC"),
|
||
|
pd.PeriodDtype(freq="Q-DEC"),
|
||
|
),
|
||
|
],
|
||
|
)
|
||
|
def test_reset_index_dtypes_on_empty_series_with_multiindex(
|
||
|
array, dtype, using_infer_string
|
||
|
):
|
||
|
# GH 19602 - Preserve dtype on empty Series with MultiIndex
|
||
|
idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array])
|
||
|
result = Series(dtype=object, index=idx)[:0].reset_index().dtypes
|
||
|
exp = "string" if using_infer_string else object
|
||
|
expected = Series(
|
||
|
{
|
||
|
"level_0": np.int64,
|
||
|
"level_1": np.float64,
|
||
|
"level_2": exp if dtype == object else dtype,
|
||
|
0: object,
|
||
|
}
|
||
|
)
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"names, expected_names",
|
||
|
[
|
||
|
(["A", "A"], ["A", "A"]),
|
||
|
(["level_1", None], ["level_1", "level_1"]),
|
||
|
],
|
||
|
)
|
||
|
@pytest.mark.parametrize("allow_duplicates", [False, True])
|
||
|
def test_column_name_duplicates(names, expected_names, allow_duplicates):
|
||
|
# GH#44755 reset_index with duplicate column labels
|
||
|
s = Series([1], index=MultiIndex.from_arrays([[1], [1]], names=names))
|
||
|
if allow_duplicates:
|
||
|
result = s.reset_index(allow_duplicates=True)
|
||
|
expected = DataFrame([[1, 1, 1]], columns=expected_names + [0])
|
||
|
tm.assert_frame_equal(result, expected)
|
||
|
else:
|
||
|
with pytest.raises(ValueError, match="cannot insert"):
|
||
|
s.reset_index()
|