projektAI/venv/Lib/site-packages/pandas/tests/frame/methods/test_sort_index.py
2021-06-06 22:13:05 +02:00

877 lines
29 KiB
Python

import numpy as np
import pytest
import pandas as pd
from pandas import (
CategoricalDtype,
CategoricalIndex,
DataFrame,
Index,
IntervalIndex,
MultiIndex,
Series,
Timestamp,
)
import pandas._testing as tm
class TestDataFrameSortIndex:
def test_sort_index_and_reconstruction_doc_example(self):
# doc example
df = DataFrame(
{"value": [1, 2, 3, 4]},
index=MultiIndex(
levels=[["a", "b"], ["bb", "aa"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]]
),
)
assert df.index.is_lexsorted()
assert not df.index.is_monotonic
# sort it
expected = DataFrame(
{"value": [2, 1, 4, 3]},
index=MultiIndex(
levels=[["a", "b"], ["aa", "bb"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]]
),
)
result = df.sort_index()
assert result.index.is_lexsorted()
assert result.index.is_monotonic
tm.assert_frame_equal(result, expected)
# reconstruct
result = df.sort_index().copy()
result.index = result.index._sort_levels_monotonic()
assert result.index.is_lexsorted()
assert result.index.is_monotonic
tm.assert_frame_equal(result, expected)
def test_sort_index_non_existent_label_multiindex(self):
# GH#12261
df = DataFrame(0, columns=[], index=MultiIndex.from_product([[], []]))
df.loc["b", "2"] = 1
df.loc["a", "3"] = 1
result = df.sort_index().index.is_monotonic
assert result is True
def test_sort_index_reorder_on_ops(self):
# GH#15687
df = DataFrame(
np.random.randn(8, 2),
index=MultiIndex.from_product(
[["a", "b"], ["big", "small"], ["red", "blu"]],
names=["letter", "size", "color"],
),
columns=["near", "far"],
)
df = df.sort_index()
def my_func(group):
group.index = ["newz", "newa"]
return group
result = df.groupby(level=["letter", "size"]).apply(my_func).sort_index()
expected = MultiIndex.from_product(
[["a", "b"], ["big", "small"], ["newa", "newz"]],
names=["letter", "size", None],
)
tm.assert_index_equal(result.index, expected)
def test_sort_index_nan_multiindex(self):
# GH#14784
# incorrect sorting w.r.t. nans
tuples = [[12, 13], [np.nan, np.nan], [np.nan, 3], [1, 2]]
mi = MultiIndex.from_tuples(tuples)
df = DataFrame(np.arange(16).reshape(4, 4), index=mi, columns=list("ABCD"))
s = Series(np.arange(4), index=mi)
df2 = DataFrame(
{
"date": pd.DatetimeIndex(
[
"20121002",
"20121007",
"20130130",
"20130202",
"20130305",
"20121002",
"20121207",
"20130130",
"20130202",
"20130305",
"20130202",
"20130305",
]
),
"user_id": [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5],
"whole_cost": [
1790,
np.nan,
280,
259,
np.nan,
623,
90,
312,
np.nan,
301,
359,
801,
],
"cost": [12, 15, 10, 24, 39, 1, 0, np.nan, 45, 34, 1, 12],
}
).set_index(["date", "user_id"])
# sorting frame, default nan position is last
result = df.sort_index()
expected = df.iloc[[3, 0, 2, 1], :]
tm.assert_frame_equal(result, expected)
# sorting frame, nan position last
result = df.sort_index(na_position="last")
expected = df.iloc[[3, 0, 2, 1], :]
tm.assert_frame_equal(result, expected)
# sorting frame, nan position first
result = df.sort_index(na_position="first")
expected = df.iloc[[1, 2, 3, 0], :]
tm.assert_frame_equal(result, expected)
# sorting frame with removed rows
result = df2.dropna().sort_index()
expected = df2.sort_index().dropna()
tm.assert_frame_equal(result, expected)
# sorting series, default nan position is last
result = s.sort_index()
expected = s.iloc[[3, 0, 2, 1]]
tm.assert_series_equal(result, expected)
# sorting series, nan position last
result = s.sort_index(na_position="last")
expected = s.iloc[[3, 0, 2, 1]]
tm.assert_series_equal(result, expected)
# sorting series, nan position first
result = s.sort_index(na_position="first")
expected = s.iloc[[1, 2, 3, 0]]
tm.assert_series_equal(result, expected)
def test_sort_index_nan(self):
# GH#3917
# Test DataFrame with nan label
df = DataFrame(
{"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]},
index=[1, 2, 3, 4, 5, 6, np.nan],
)
# NaN label, ascending=True, na_position='last'
sorted_df = df.sort_index(kind="quicksort", ascending=True, na_position="last")
expected = DataFrame(
{"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]},
index=[1, 2, 3, 4, 5, 6, np.nan],
)
tm.assert_frame_equal(sorted_df, expected)
# NaN label, ascending=True, na_position='first'
sorted_df = df.sort_index(na_position="first")
expected = DataFrame(
{"A": [4, 1, 2, np.nan, 1, 6, 8], "B": [5, 9, np.nan, 5, 2, 5, 4]},
index=[np.nan, 1, 2, 3, 4, 5, 6],
)
tm.assert_frame_equal(sorted_df, expected)
# NaN label, ascending=False, na_position='last'
sorted_df = df.sort_index(kind="quicksort", ascending=False)
expected = DataFrame(
{"A": [8, 6, 1, np.nan, 2, 1, 4], "B": [4, 5, 2, 5, np.nan, 9, 5]},
index=[6, 5, 4, 3, 2, 1, np.nan],
)
tm.assert_frame_equal(sorted_df, expected)
# NaN label, ascending=False, na_position='first'
sorted_df = df.sort_index(
kind="quicksort", ascending=False, na_position="first"
)
expected = DataFrame(
{"A": [4, 8, 6, 1, np.nan, 2, 1], "B": [5, 4, 5, 2, 5, np.nan, 9]},
index=[np.nan, 6, 5, 4, 3, 2, 1],
)
tm.assert_frame_equal(sorted_df, expected)
def test_sort_index_multi_index(self):
# GH#25775, testing that sorting by index works with a multi-index.
df = DataFrame(
{"a": [3, 1, 2], "b": [0, 0, 0], "c": [0, 1, 2], "d": list("abc")}
)
result = df.set_index(list("abc")).sort_index(level=list("ba"))
expected = DataFrame(
{"a": [1, 2, 3], "b": [0, 0, 0], "c": [1, 2, 0], "d": list("bca")}
)
expected = expected.set_index(list("abc"))
tm.assert_frame_equal(result, expected)
def test_sort_index_inplace(self):
frame = DataFrame(
np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"]
)
# axis=0
unordered = frame.loc[[3, 2, 4, 1]]
a_id = id(unordered["A"])
df = unordered.copy()
return_value = df.sort_index(inplace=True)
assert return_value is None
expected = frame
tm.assert_frame_equal(df, expected)
assert a_id != id(df["A"])
df = unordered.copy()
return_value = df.sort_index(ascending=False, inplace=True)
assert return_value is None
expected = frame[::-1]
tm.assert_frame_equal(df, expected)
# axis=1
unordered = frame.loc[:, ["D", "B", "C", "A"]]
df = unordered.copy()
return_value = df.sort_index(axis=1, inplace=True)
assert return_value is None
expected = frame
tm.assert_frame_equal(df, expected)
df = unordered.copy()
return_value = df.sort_index(axis=1, ascending=False, inplace=True)
assert return_value is None
expected = frame.iloc[:, ::-1]
tm.assert_frame_equal(df, expected)
def test_sort_index_different_sortorder(self):
A = np.arange(20).repeat(5)
B = np.tile(np.arange(5), 20)
indexer = np.random.permutation(100)
A = A.take(indexer)
B = B.take(indexer)
df = DataFrame({"A": A, "B": B, "C": np.random.randn(100)})
ex_indexer = np.lexsort((df.B.max() - df.B, df.A))
expected = df.take(ex_indexer)
# test with multiindex, too
idf = df.set_index(["A", "B"])
result = idf.sort_index(ascending=[1, 0])
expected = idf.take(ex_indexer)
tm.assert_frame_equal(result, expected)
# also, Series!
result = idf["C"].sort_index(ascending=[1, 0])
tm.assert_series_equal(result, expected["C"])
def test_sort_index_level(self):
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
df = DataFrame([[1, 2], [3, 4]], mi)
result = df.sort_index(level="A", sort_remaining=False)
expected = df
tm.assert_frame_equal(result, expected)
result = df.sort_index(level=["A", "B"], sort_remaining=False)
expected = df
tm.assert_frame_equal(result, expected)
# Error thrown by sort_index when
# first index is sorted last (GH#26053)
result = df.sort_index(level=["C", "B", "A"])
expected = df.iloc[[1, 0]]
tm.assert_frame_equal(result, expected)
result = df.sort_index(level=["B", "C", "A"])
expected = df.iloc[[1, 0]]
tm.assert_frame_equal(result, expected)
result = df.sort_index(level=["C", "A"])
expected = df.iloc[[1, 0]]
tm.assert_frame_equal(result, expected)
def test_sort_index_categorical_index(self):
df = DataFrame(
{
"A": np.arange(6, dtype="int64"),
"B": Series(list("aabbca")).astype(CategoricalDtype(list("cab"))),
}
).set_index("B")
result = df.sort_index()
expected = df.iloc[[4, 0, 1, 5, 2, 3]]
tm.assert_frame_equal(result, expected)
result = df.sort_index(ascending=False)
expected = df.iloc[[2, 3, 0, 1, 5, 4]]
tm.assert_frame_equal(result, expected)
def test_sort_index(self):
# GH#13496
frame = DataFrame(
np.arange(16).reshape(4, 4),
index=[1, 2, 3, 4],
columns=["A", "B", "C", "D"],
)
# axis=0 : sort rows by index labels
unordered = frame.loc[[3, 2, 4, 1]]
result = unordered.sort_index(axis=0)
expected = frame
tm.assert_frame_equal(result, expected)
result = unordered.sort_index(ascending=False)
expected = frame[::-1]
tm.assert_frame_equal(result, expected)
# axis=1 : sort columns by column names
unordered = frame.iloc[:, [2, 1, 3, 0]]
result = unordered.sort_index(axis=1)
tm.assert_frame_equal(result, frame)
result = unordered.sort_index(axis=1, ascending=False)
expected = frame.iloc[:, ::-1]
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize("level", ["A", 0]) # GH#21052
def test_sort_index_multiindex(self, level):
# GH#13496
# sort rows by specified level of multi-index
mi = MultiIndex.from_tuples(
[[2, 1, 3], [2, 1, 2], [1, 1, 1]], names=list("ABC")
)
df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mi)
expected_mi = MultiIndex.from_tuples(
[[1, 1, 1], [2, 1, 2], [2, 1, 3]], names=list("ABC")
)
expected = DataFrame([[5, 6], [3, 4], [1, 2]], index=expected_mi)
result = df.sort_index(level=level)
tm.assert_frame_equal(result, expected)
# sort_remaining=False
expected_mi = MultiIndex.from_tuples(
[[1, 1, 1], [2, 1, 3], [2, 1, 2]], names=list("ABC")
)
expected = DataFrame([[5, 6], [1, 2], [3, 4]], index=expected_mi)
result = df.sort_index(level=level, sort_remaining=False)
tm.assert_frame_equal(result, expected)
def test_sort_index_intervalindex(self):
# this is a de-facto sort via unstack
# confirming that we sort in the order of the bins
y = Series(np.random.randn(100))
x1 = Series(np.sign(np.random.randn(100)))
x2 = pd.cut(Series(np.random.randn(100)), bins=[-3, -0.5, 0, 0.5, 3])
model = pd.concat([y, x1, x2], axis=1, keys=["Y", "X1", "X2"])
result = model.groupby(["X1", "X2"], observed=True).mean().unstack()
expected = IntervalIndex.from_tuples(
[(-3.0, -0.5), (-0.5, 0.0), (0.0, 0.5), (0.5, 3.0)], closed="right"
)
result = result.columns.levels[1].categories
tm.assert_index_equal(result, expected)
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize(
"original_dict, sorted_dict, ascending, ignore_index, output_index",
[
({"A": [1, 2, 3]}, {"A": [2, 3, 1]}, False, True, [0, 1, 2]),
({"A": [1, 2, 3]}, {"A": [1, 3, 2]}, True, True, [0, 1, 2]),
({"A": [1, 2, 3]}, {"A": [2, 3, 1]}, False, False, [5, 3, 2]),
({"A": [1, 2, 3]}, {"A": [1, 3, 2]}, True, False, [2, 3, 5]),
],
)
def test_sort_index_ignore_index(
self, inplace, original_dict, sorted_dict, ascending, ignore_index, output_index
):
# GH 30114
original_index = [2, 5, 3]
df = DataFrame(original_dict, index=original_index)
expected_df = DataFrame(sorted_dict, index=output_index)
kwargs = {
"ascending": ascending,
"ignore_index": ignore_index,
"inplace": inplace,
}
if inplace:
result_df = df.copy()
result_df.sort_index(**kwargs)
else:
result_df = df.sort_index(**kwargs)
tm.assert_frame_equal(result_df, expected_df)
tm.assert_frame_equal(df, DataFrame(original_dict, index=original_index))
@pytest.mark.parametrize("inplace", [True, False])
@pytest.mark.parametrize(
"original_dict, sorted_dict, ascending, ignore_index, output_index",
[
(
{"M1": [1, 2], "M2": [3, 4]},
{"M1": [1, 2], "M2": [3, 4]},
True,
True,
[0, 1],
),
(
{"M1": [1, 2], "M2": [3, 4]},
{"M1": [2, 1], "M2": [4, 3]},
False,
True,
[0, 1],
),
(
{"M1": [1, 2], "M2": [3, 4]},
{"M1": [1, 2], "M2": [3, 4]},
True,
False,
MultiIndex.from_tuples([[2, 1], [3, 4]], names=list("AB")),
),
(
{"M1": [1, 2], "M2": [3, 4]},
{"M1": [2, 1], "M2": [4, 3]},
False,
False,
MultiIndex.from_tuples([[3, 4], [2, 1]], names=list("AB")),
),
],
)
def test_sort_index_ignore_index_multi_index(
self, inplace, original_dict, sorted_dict, ascending, ignore_index, output_index
):
# GH 30114, this is to test ignore_index on MulitIndex of index
mi = MultiIndex.from_tuples([[2, 1], [3, 4]], names=list("AB"))
df = DataFrame(original_dict, index=mi)
expected_df = DataFrame(sorted_dict, index=output_index)
kwargs = {
"ascending": ascending,
"ignore_index": ignore_index,
"inplace": inplace,
}
if inplace:
result_df = df.copy()
result_df.sort_index(**kwargs)
else:
result_df = df.sort_index(**kwargs)
tm.assert_frame_equal(result_df, expected_df)
tm.assert_frame_equal(df, DataFrame(original_dict, index=mi))
def test_sort_index_categorical_multiindex(self):
# GH#15058
df = DataFrame(
{
"a": range(6),
"l1": pd.Categorical(
["a", "a", "b", "b", "c", "c"],
categories=["c", "a", "b"],
ordered=True,
),
"l2": [0, 1, 0, 1, 0, 1],
}
)
result = df.set_index(["l1", "l2"]).sort_index()
expected = DataFrame(
[4, 5, 0, 1, 2, 3],
columns=["a"],
index=MultiIndex(
levels=[
CategoricalIndex(
["c", "a", "b"],
categories=["c", "a", "b"],
ordered=True,
name="l1",
dtype="category",
),
[0, 1],
],
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
names=["l1", "l2"],
),
)
tm.assert_frame_equal(result, expected)
def test_sort_index_and_reconstruction(self):
# GH#15622
# lexsortedness should be identical
# across MultiIndex construction methods
df = DataFrame([[1, 1], [2, 2]], index=list("ab"))
expected = DataFrame(
[[1, 1], [2, 2], [1, 1], [2, 2]],
index=MultiIndex.from_tuples(
[(0.5, "a"), (0.5, "b"), (0.8, "a"), (0.8, "b")]
),
)
assert expected.index.is_lexsorted()
result = DataFrame(
[[1, 1], [2, 2], [1, 1], [2, 2]],
index=MultiIndex.from_product([[0.5, 0.8], list("ab")]),
)
result = result.sort_index()
assert result.index.is_lexsorted()
assert result.index.is_monotonic
tm.assert_frame_equal(result, expected)
result = DataFrame(
[[1, 1], [2, 2], [1, 1], [2, 2]],
index=MultiIndex(
levels=[[0.5, 0.8], ["a", "b"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]]
),
)
result = result.sort_index()
assert result.index.is_lexsorted()
tm.assert_frame_equal(result, expected)
concatted = pd.concat([df, df], keys=[0.8, 0.5])
result = concatted.sort_index()
assert result.index.is_lexsorted()
assert result.index.is_monotonic
tm.assert_frame_equal(result, expected)
# GH#14015
df = DataFrame(
[[1, 2], [6, 7]],
columns=MultiIndex.from_tuples(
[(0, "20160811 12:00:00"), (0, "20160809 12:00:00")],
names=["l1", "Date"],
),
)
df.columns = df.columns.set_levels(
pd.to_datetime(df.columns.levels[1]), level=1
)
assert not df.columns.is_lexsorted()
assert not df.columns.is_monotonic
result = df.sort_index(axis=1)
assert result.columns.is_lexsorted()
assert result.columns.is_monotonic
result = df.sort_index(axis=1, level=1)
assert result.columns.is_lexsorted()
assert result.columns.is_monotonic
# TODO: better name, de-duplicate with test_sort_index_level above
def test_sort_index_level2(self):
mi = MultiIndex(
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
frame = DataFrame(
np.random.randn(10, 3),
index=mi,
columns=Index(["A", "B", "C"], name="exp"),
)
df = frame.copy()
df.index = np.arange(len(df))
# axis=1
# series
a_sorted = frame["A"].sort_index(level=0)
# preserve names
assert a_sorted.index.names == frame.index.names
# inplace
rs = frame.copy()
return_value = rs.sort_index(level=0, inplace=True)
assert return_value is None
tm.assert_frame_equal(rs, frame.sort_index(level=0))
def test_sort_index_level_large_cardinality(self):
# GH#2684 (int64)
index = MultiIndex.from_arrays([np.arange(4000)] * 3)
df = DataFrame(np.random.randn(4000), index=index, dtype=np.int64)
# it works!
result = df.sort_index(level=0)
assert result.index.lexsort_depth == 3
# GH#2684 (int32)
index = MultiIndex.from_arrays([np.arange(4000)] * 3)
df = DataFrame(np.random.randn(4000), index=index, dtype=np.int32)
# it works!
result = df.sort_index(level=0)
assert (result.dtypes.values == df.dtypes.values).all()
assert result.index.lexsort_depth == 3
def test_sort_index_level_by_name(self):
mi = MultiIndex(
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
frame = DataFrame(
np.random.randn(10, 3),
index=mi,
columns=Index(["A", "B", "C"], name="exp"),
)
frame.index.names = ["first", "second"]
result = frame.sort_index(level="second")
expected = frame.sort_index(level=1)
tm.assert_frame_equal(result, expected)
def test_sort_index_level_mixed(self):
mi = MultiIndex(
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
frame = DataFrame(
np.random.randn(10, 3),
index=mi,
columns=Index(["A", "B", "C"], name="exp"),
)
sorted_before = frame.sort_index(level=1)
df = frame.copy()
df["foo"] = "bar"
sorted_after = df.sort_index(level=1)
tm.assert_frame_equal(sorted_before, sorted_after.drop(["foo"], axis=1))
dft = frame.T
sorted_before = dft.sort_index(level=1, axis=1)
dft["foo", "three"] = "bar"
sorted_after = dft.sort_index(level=1, axis=1)
tm.assert_frame_equal(
sorted_before.drop([("foo", "three")], axis=1),
sorted_after.drop([("foo", "three")], axis=1),
)
def test_sort_index_preserve_levels(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
result = frame.sort_index()
assert result.index.names == frame.index.names
@pytest.mark.parametrize(
"gen,extra",
[
([1.0, 3.0, 2.0, 5.0], 4.0),
([1, 3, 2, 5], 4),
(
[
Timestamp("20130101"),
Timestamp("20130103"),
Timestamp("20130102"),
Timestamp("20130105"),
],
Timestamp("20130104"),
),
(["1one", "3one", "2one", "5one"], "4one"),
],
)
def test_sort_index_multilevel_repr_8017(self, gen, extra):
np.random.seed(0)
data = np.random.randn(3, 4)
columns = MultiIndex.from_tuples([("red", i) for i in gen])
df = DataFrame(data, index=list("def"), columns=columns)
df2 = pd.concat(
[
df,
DataFrame(
"world",
index=list("def"),
columns=MultiIndex.from_tuples([("red", extra)]),
),
],
axis=1,
)
# check that the repr is good
# make sure that we have a correct sparsified repr
# e.g. only 1 header of read
assert str(df2).splitlines()[0].split() == ["red"]
# GH 8017
# sorting fails after columns added
# construct single-dtype then sort
result = df.copy().sort_index(axis=1)
expected = df.iloc[:, [0, 2, 1, 3]]
tm.assert_frame_equal(result, expected)
result = df2.sort_index(axis=1)
expected = df2.iloc[:, [0, 2, 1, 4, 3]]
tm.assert_frame_equal(result, expected)
# setitem then sort
result = df.copy()
result[("red", extra)] = "world"
result = result.sort_index(axis=1)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"categories",
[
pytest.param(["a", "b", "c"], id="str"),
pytest.param(
[pd.Interval(0, 1), pd.Interval(1, 2), pd.Interval(2, 3)],
id="pd.Interval",
),
],
)
def test_sort_index_with_categories(self, categories):
# GH#23452
df = DataFrame(
{"foo": range(len(categories))},
index=CategoricalIndex(
data=categories, categories=categories, ordered=True
),
)
df.index = df.index.reorder_categories(df.index.categories[::-1])
result = df.sort_index()
expected = DataFrame(
{"foo": reversed(range(len(categories)))},
index=CategoricalIndex(
data=categories[::-1], categories=categories[::-1], ordered=True
),
)
tm.assert_frame_equal(result, expected)
@pytest.mark.parametrize(
"ascending",
[
None,
[True, None],
[False, "True"],
],
)
def test_sort_index_ascending_bad_value_raises(self, ascending):
# GH 39434
df = DataFrame(np.arange(64))
length = len(df.index)
df.index = [(i - length / 2) % length for i in range(length)]
match = 'For argument "ascending" expected type bool'
with pytest.raises(ValueError, match=match):
df.sort_index(axis=0, ascending=ascending, na_position="first")
class TestDataFrameSortIndexKey:
def test_sort_multi_index_key(self):
# GH 25775, testing that sorting by index works with a multi-index.
df = DataFrame(
{"a": [3, 1, 2], "b": [0, 0, 0], "c": [0, 1, 2], "d": list("abc")}
).set_index(list("abc"))
result = df.sort_index(level=list("ac"), key=lambda x: x)
expected = DataFrame(
{"a": [1, 2, 3], "b": [0, 0, 0], "c": [1, 2, 0], "d": list("bca")}
).set_index(list("abc"))
tm.assert_frame_equal(result, expected)
result = df.sort_index(level=list("ac"), key=lambda x: -x)
expected = DataFrame(
{"a": [3, 2, 1], "b": [0, 0, 0], "c": [0, 2, 1], "d": list("acb")}
).set_index(list("abc"))
tm.assert_frame_equal(result, expected)
def test_sort_index_key(self): # issue 27237
df = DataFrame(np.arange(6, dtype="int64"), index=list("aaBBca"))
result = df.sort_index()
expected = df.iloc[[2, 3, 0, 1, 5, 4]]
tm.assert_frame_equal(result, expected)
result = df.sort_index(key=lambda x: x.str.lower())
expected = df.iloc[[0, 1, 5, 2, 3, 4]]
tm.assert_frame_equal(result, expected)
result = df.sort_index(key=lambda x: x.str.lower(), ascending=False)
expected = df.iloc[[4, 2, 3, 0, 1, 5]]
tm.assert_frame_equal(result, expected)
def test_sort_index_key_int(self):
df = DataFrame(np.arange(6, dtype="int64"), index=np.arange(6, dtype="int64"))
result = df.sort_index()
tm.assert_frame_equal(result, df)
result = df.sort_index(key=lambda x: -x)
expected = df.sort_index(ascending=False)
tm.assert_frame_equal(result, expected)
result = df.sort_index(key=lambda x: 2 * x)
tm.assert_frame_equal(result, df)
def test_sort_multi_index_key_str(self):
# GH 25775, testing that sorting by index works with a multi-index.
df = DataFrame(
{"a": ["B", "a", "C"], "b": [0, 1, 0], "c": list("abc"), "d": [0, 1, 2]}
).set_index(list("abc"))
result = df.sort_index(level="a", key=lambda x: x.str.lower())
expected = DataFrame(
{"a": ["a", "B", "C"], "b": [1, 0, 0], "c": list("bac"), "d": [1, 0, 2]}
).set_index(list("abc"))
tm.assert_frame_equal(result, expected)
result = df.sort_index(
level=list("abc"), # can refer to names
key=lambda x: x.str.lower() if x.name in ["a", "c"] else -x,
)
expected = DataFrame(
{"a": ["a", "B", "C"], "b": [1, 0, 0], "c": list("bac"), "d": [1, 0, 2]}
).set_index(list("abc"))
tm.assert_frame_equal(result, expected)
def test_changes_length_raises(self):
df = DataFrame({"A": [1, 2, 3]})
with pytest.raises(ValueError, match="change the shape"):
df.sort_index(key=lambda x: x[:1])
def test_sort_index_multiindex_sparse_column(self):
# GH 29735, testing that sort_index on a multiindexed frame with sparse
# columns fills with 0.
expected = DataFrame(
{
i: pd.array([0.0, 0.0, 0.0, 0.0], dtype=pd.SparseDtype("float64", 0.0))
for i in range(0, 4)
},
index=MultiIndex.from_product([[1, 2], [1, 2]]),
)
result = expected.sort_index(level=0)
tm.assert_frame_equal(result, expected)