import numpy as np import pytest import pandas as pd from pandas import ( CategoricalDtype, CategoricalIndex, DataFrame, Index, IntervalIndex, MultiIndex, Series, Timestamp, ) import pandas._testing as tm class TestDataFrameSortIndex: def test_sort_index_and_reconstruction_doc_example(self): # doc example df = DataFrame( {"value": [1, 2, 3, 4]}, index=MultiIndex( levels=[["a", "b"], ["bb", "aa"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] ), ) assert df.index.is_lexsorted() assert not df.index.is_monotonic # sort it expected = DataFrame( {"value": [2, 1, 4, 3]}, index=MultiIndex( levels=[["a", "b"], ["aa", "bb"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] ), ) result = df.sort_index() assert result.index.is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) # reconstruct result = df.sort_index().copy() result.index = result.index._sort_levels_monotonic() assert result.index.is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) def test_sort_index_non_existent_label_multiindex(self): # GH#12261 df = DataFrame(0, columns=[], index=MultiIndex.from_product([[], []])) df.loc["b", "2"] = 1 df.loc["a", "3"] = 1 result = df.sort_index().index.is_monotonic assert result is True def test_sort_index_reorder_on_ops(self): # GH#15687 df = DataFrame( np.random.randn(8, 2), index=MultiIndex.from_product( [["a", "b"], ["big", "small"], ["red", "blu"]], names=["letter", "size", "color"], ), columns=["near", "far"], ) df = df.sort_index() def my_func(group): group.index = ["newz", "newa"] return group result = df.groupby(level=["letter", "size"]).apply(my_func).sort_index() expected = MultiIndex.from_product( [["a", "b"], ["big", "small"], ["newa", "newz"]], names=["letter", "size", None], ) tm.assert_index_equal(result.index, expected) def test_sort_index_nan_multiindex(self): # GH#14784 # incorrect sorting w.r.t. nans tuples = [[12, 13], [np.nan, np.nan], [np.nan, 3], [1, 2]] mi = MultiIndex.from_tuples(tuples) df = DataFrame(np.arange(16).reshape(4, 4), index=mi, columns=list("ABCD")) s = Series(np.arange(4), index=mi) df2 = DataFrame( { "date": pd.DatetimeIndex( [ "20121002", "20121007", "20130130", "20130202", "20130305", "20121002", "20121207", "20130130", "20130202", "20130305", "20130202", "20130305", ] ), "user_id": [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5], "whole_cost": [ 1790, np.nan, 280, 259, np.nan, 623, 90, 312, np.nan, 301, 359, 801, ], "cost": [12, 15, 10, 24, 39, 1, 0, np.nan, 45, 34, 1, 12], } ).set_index(["date", "user_id"]) # sorting frame, default nan position is last result = df.sort_index() expected = df.iloc[[3, 0, 2, 1], :] tm.assert_frame_equal(result, expected) # sorting frame, nan position last result = df.sort_index(na_position="last") expected = df.iloc[[3, 0, 2, 1], :] tm.assert_frame_equal(result, expected) # sorting frame, nan position first result = df.sort_index(na_position="first") expected = df.iloc[[1, 2, 3, 0], :] tm.assert_frame_equal(result, expected) # sorting frame with removed rows result = df2.dropna().sort_index() expected = df2.sort_index().dropna() tm.assert_frame_equal(result, expected) # sorting series, default nan position is last result = s.sort_index() expected = s.iloc[[3, 0, 2, 1]] tm.assert_series_equal(result, expected) # sorting series, nan position last result = s.sort_index(na_position="last") expected = s.iloc[[3, 0, 2, 1]] tm.assert_series_equal(result, expected) # sorting series, nan position first result = s.sort_index(na_position="first") expected = s.iloc[[1, 2, 3, 0]] tm.assert_series_equal(result, expected) def test_sort_index_nan(self): # GH#3917 # Test DataFrame with nan label df = DataFrame( {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]}, index=[1, 2, 3, 4, 5, 6, np.nan], ) # NaN label, ascending=True, na_position='last' sorted_df = df.sort_index(kind="quicksort", ascending=True, na_position="last") expected = DataFrame( {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]}, index=[1, 2, 3, 4, 5, 6, np.nan], ) tm.assert_frame_equal(sorted_df, expected) # NaN label, ascending=True, na_position='first' sorted_df = df.sort_index(na_position="first") expected = DataFrame( {"A": [4, 1, 2, np.nan, 1, 6, 8], "B": [5, 9, np.nan, 5, 2, 5, 4]}, index=[np.nan, 1, 2, 3, 4, 5, 6], ) tm.assert_frame_equal(sorted_df, expected) # NaN label, ascending=False, na_position='last' sorted_df = df.sort_index(kind="quicksort", ascending=False) expected = DataFrame( {"A": [8, 6, 1, np.nan, 2, 1, 4], "B": [4, 5, 2, 5, np.nan, 9, 5]}, index=[6, 5, 4, 3, 2, 1, np.nan], ) tm.assert_frame_equal(sorted_df, expected) # NaN label, ascending=False, na_position='first' sorted_df = df.sort_index( kind="quicksort", ascending=False, na_position="first" ) expected = DataFrame( {"A": [4, 8, 6, 1, np.nan, 2, 1], "B": [5, 4, 5, 2, 5, np.nan, 9]}, index=[np.nan, 6, 5, 4, 3, 2, 1], ) tm.assert_frame_equal(sorted_df, expected) def test_sort_index_multi_index(self): # GH#25775, testing that sorting by index works with a multi-index. df = DataFrame( {"a": [3, 1, 2], "b": [0, 0, 0], "c": [0, 1, 2], "d": list("abc")} ) result = df.set_index(list("abc")).sort_index(level=list("ba")) expected = DataFrame( {"a": [1, 2, 3], "b": [0, 0, 0], "c": [1, 2, 0], "d": list("bca")} ) expected = expected.set_index(list("abc")) tm.assert_frame_equal(result, expected) def test_sort_index_inplace(self): frame = DataFrame( np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"] ) # axis=0 unordered = frame.loc[[3, 2, 4, 1]] a_id = id(unordered["A"]) df = unordered.copy() return_value = df.sort_index(inplace=True) assert return_value is None expected = frame tm.assert_frame_equal(df, expected) assert a_id != id(df["A"]) df = unordered.copy() return_value = df.sort_index(ascending=False, inplace=True) assert return_value is None expected = frame[::-1] tm.assert_frame_equal(df, expected) # axis=1 unordered = frame.loc[:, ["D", "B", "C", "A"]] df = unordered.copy() return_value = df.sort_index(axis=1, inplace=True) assert return_value is None expected = frame tm.assert_frame_equal(df, expected) df = unordered.copy() return_value = df.sort_index(axis=1, ascending=False, inplace=True) assert return_value is None expected = frame.iloc[:, ::-1] tm.assert_frame_equal(df, expected) def test_sort_index_different_sortorder(self): A = np.arange(20).repeat(5) B = np.tile(np.arange(5), 20) indexer = np.random.permutation(100) A = A.take(indexer) B = B.take(indexer) df = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) ex_indexer = np.lexsort((df.B.max() - df.B, df.A)) expected = df.take(ex_indexer) # test with multiindex, too idf = df.set_index(["A", "B"]) result = idf.sort_index(ascending=[1, 0]) expected = idf.take(ex_indexer) tm.assert_frame_equal(result, expected) # also, Series! result = idf["C"].sort_index(ascending=[1, 0]) tm.assert_series_equal(result, expected["C"]) def test_sort_index_level(self): mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) df = DataFrame([[1, 2], [3, 4]], mi) result = df.sort_index(level="A", sort_remaining=False) expected = df tm.assert_frame_equal(result, expected) result = df.sort_index(level=["A", "B"], sort_remaining=False) expected = df tm.assert_frame_equal(result, expected) # Error thrown by sort_index when # first index is sorted last (GH#26053) result = df.sort_index(level=["C", "B", "A"]) expected = df.iloc[[1, 0]] tm.assert_frame_equal(result, expected) result = df.sort_index(level=["B", "C", "A"]) expected = df.iloc[[1, 0]] tm.assert_frame_equal(result, expected) result = df.sort_index(level=["C", "A"]) expected = df.iloc[[1, 0]] tm.assert_frame_equal(result, expected) def test_sort_index_categorical_index(self): df = DataFrame( { "A": np.arange(6, dtype="int64"), "B": Series(list("aabbca")).astype(CategoricalDtype(list("cab"))), } ).set_index("B") result = df.sort_index() expected = df.iloc[[4, 0, 1, 5, 2, 3]] tm.assert_frame_equal(result, expected) result = df.sort_index(ascending=False) expected = df.iloc[[2, 3, 0, 1, 5, 4]] tm.assert_frame_equal(result, expected) def test_sort_index(self): # GH#13496 frame = DataFrame( np.arange(16).reshape(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"], ) # axis=0 : sort rows by index labels unordered = frame.loc[[3, 2, 4, 1]] result = unordered.sort_index(axis=0) expected = frame tm.assert_frame_equal(result, expected) result = unordered.sort_index(ascending=False) expected = frame[::-1] tm.assert_frame_equal(result, expected) # axis=1 : sort columns by column names unordered = frame.iloc[:, [2, 1, 3, 0]] result = unordered.sort_index(axis=1) tm.assert_frame_equal(result, frame) result = unordered.sort_index(axis=1, ascending=False) expected = frame.iloc[:, ::-1] tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("level", ["A", 0]) # GH#21052 def test_sort_index_multiindex(self, level): # GH#13496 # sort rows by specified level of multi-index mi = MultiIndex.from_tuples( [[2, 1, 3], [2, 1, 2], [1, 1, 1]], names=list("ABC") ) df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mi) expected_mi = MultiIndex.from_tuples( [[1, 1, 1], [2, 1, 2], [2, 1, 3]], names=list("ABC") ) expected = DataFrame([[5, 6], [3, 4], [1, 2]], index=expected_mi) result = df.sort_index(level=level) tm.assert_frame_equal(result, expected) # sort_remaining=False expected_mi = MultiIndex.from_tuples( [[1, 1, 1], [2, 1, 3], [2, 1, 2]], names=list("ABC") ) expected = DataFrame([[5, 6], [1, 2], [3, 4]], index=expected_mi) result = df.sort_index(level=level, sort_remaining=False) tm.assert_frame_equal(result, expected) def test_sort_index_intervalindex(self): # this is a de-facto sort via unstack # confirming that we sort in the order of the bins y = Series(np.random.randn(100)) x1 = Series(np.sign(np.random.randn(100))) x2 = pd.cut(Series(np.random.randn(100)), bins=[-3, -0.5, 0, 0.5, 3]) model = pd.concat([y, x1, x2], axis=1, keys=["Y", "X1", "X2"]) result = model.groupby(["X1", "X2"], observed=True).mean().unstack() expected = IntervalIndex.from_tuples( [(-3.0, -0.5), (-0.5, 0.0), (0.0, 0.5), (0.5, 3.0)], closed="right" ) result = result.columns.levels[1].categories tm.assert_index_equal(result, expected) @pytest.mark.parametrize("inplace", [True, False]) @pytest.mark.parametrize( "original_dict, sorted_dict, ascending, ignore_index, output_index", [ ({"A": [1, 2, 3]}, {"A": [2, 3, 1]}, False, True, [0, 1, 2]), ({"A": [1, 2, 3]}, {"A": [1, 3, 2]}, True, True, [0, 1, 2]), ({"A": [1, 2, 3]}, {"A": [2, 3, 1]}, False, False, [5, 3, 2]), ({"A": [1, 2, 3]}, {"A": [1, 3, 2]}, True, False, [2, 3, 5]), ], ) def test_sort_index_ignore_index( self, inplace, original_dict, sorted_dict, ascending, ignore_index, output_index ): # GH 30114 original_index = [2, 5, 3] df = DataFrame(original_dict, index=original_index) expected_df = DataFrame(sorted_dict, index=output_index) kwargs = { "ascending": ascending, "ignore_index": ignore_index, "inplace": inplace, } if inplace: result_df = df.copy() result_df.sort_index(**kwargs) else: result_df = df.sort_index(**kwargs) tm.assert_frame_equal(result_df, expected_df) tm.assert_frame_equal(df, DataFrame(original_dict, index=original_index)) @pytest.mark.parametrize("inplace", [True, False]) @pytest.mark.parametrize( "original_dict, sorted_dict, ascending, ignore_index, output_index", [ ( {"M1": [1, 2], "M2": [3, 4]}, {"M1": [1, 2], "M2": [3, 4]}, True, True, [0, 1], ), ( {"M1": [1, 2], "M2": [3, 4]}, {"M1": [2, 1], "M2": [4, 3]}, False, True, [0, 1], ), ( {"M1": [1, 2], "M2": [3, 4]}, {"M1": [1, 2], "M2": [3, 4]}, True, False, MultiIndex.from_tuples([[2, 1], [3, 4]], names=list("AB")), ), ( {"M1": [1, 2], "M2": [3, 4]}, {"M1": [2, 1], "M2": [4, 3]}, False, False, MultiIndex.from_tuples([[3, 4], [2, 1]], names=list("AB")), ), ], ) def test_sort_index_ignore_index_multi_index( self, inplace, original_dict, sorted_dict, ascending, ignore_index, output_index ): # GH 30114, this is to test ignore_index on MulitIndex of index mi = MultiIndex.from_tuples([[2, 1], [3, 4]], names=list("AB")) df = DataFrame(original_dict, index=mi) expected_df = DataFrame(sorted_dict, index=output_index) kwargs = { "ascending": ascending, "ignore_index": ignore_index, "inplace": inplace, } if inplace: result_df = df.copy() result_df.sort_index(**kwargs) else: result_df = df.sort_index(**kwargs) tm.assert_frame_equal(result_df, expected_df) tm.assert_frame_equal(df, DataFrame(original_dict, index=mi)) def test_sort_index_categorical_multiindex(self): # GH#15058 df = DataFrame( { "a": range(6), "l1": pd.Categorical( ["a", "a", "b", "b", "c", "c"], categories=["c", "a", "b"], ordered=True, ), "l2": [0, 1, 0, 1, 0, 1], } ) result = df.set_index(["l1", "l2"]).sort_index() expected = DataFrame( [4, 5, 0, 1, 2, 3], columns=["a"], index=MultiIndex( levels=[ CategoricalIndex( ["c", "a", "b"], categories=["c", "a", "b"], ordered=True, name="l1", dtype="category", ), [0, 1], ], codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]], names=["l1", "l2"], ), ) tm.assert_frame_equal(result, expected) def test_sort_index_and_reconstruction(self): # GH#15622 # lexsortedness should be identical # across MultiIndex construction methods df = DataFrame([[1, 1], [2, 2]], index=list("ab")) expected = DataFrame( [[1, 1], [2, 2], [1, 1], [2, 2]], index=MultiIndex.from_tuples( [(0.5, "a"), (0.5, "b"), (0.8, "a"), (0.8, "b")] ), ) assert expected.index.is_lexsorted() result = DataFrame( [[1, 1], [2, 2], [1, 1], [2, 2]], index=MultiIndex.from_product([[0.5, 0.8], list("ab")]), ) result = result.sort_index() assert result.index.is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) result = DataFrame( [[1, 1], [2, 2], [1, 1], [2, 2]], index=MultiIndex( levels=[[0.5, 0.8], ["a", "b"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]] ), ) result = result.sort_index() assert result.index.is_lexsorted() tm.assert_frame_equal(result, expected) concatted = pd.concat([df, df], keys=[0.8, 0.5]) result = concatted.sort_index() assert result.index.is_lexsorted() assert result.index.is_monotonic tm.assert_frame_equal(result, expected) # GH#14015 df = DataFrame( [[1, 2], [6, 7]], columns=MultiIndex.from_tuples( [(0, "20160811 12:00:00"), (0, "20160809 12:00:00")], names=["l1", "Date"], ), ) df.columns = df.columns.set_levels( pd.to_datetime(df.columns.levels[1]), level=1 ) assert not df.columns.is_lexsorted() assert not df.columns.is_monotonic result = df.sort_index(axis=1) assert result.columns.is_lexsorted() assert result.columns.is_monotonic result = df.sort_index(axis=1, level=1) assert result.columns.is_lexsorted() assert result.columns.is_monotonic # TODO: better name, de-duplicate with test_sort_index_level above def test_sort_index_level2(self): mi = MultiIndex( levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=["first", "second"], ) frame = DataFrame( np.random.randn(10, 3), index=mi, columns=Index(["A", "B", "C"], name="exp"), ) df = frame.copy() df.index = np.arange(len(df)) # axis=1 # series a_sorted = frame["A"].sort_index(level=0) # preserve names assert a_sorted.index.names == frame.index.names # inplace rs = frame.copy() return_value = rs.sort_index(level=0, inplace=True) assert return_value is None tm.assert_frame_equal(rs, frame.sort_index(level=0)) def test_sort_index_level_large_cardinality(self): # GH#2684 (int64) index = MultiIndex.from_arrays([np.arange(4000)] * 3) df = DataFrame(np.random.randn(4000), index=index, dtype=np.int64) # it works! result = df.sort_index(level=0) assert result.index.lexsort_depth == 3 # GH#2684 (int32) index = MultiIndex.from_arrays([np.arange(4000)] * 3) df = DataFrame(np.random.randn(4000), index=index, dtype=np.int32) # it works! result = df.sort_index(level=0) assert (result.dtypes.values == df.dtypes.values).all() assert result.index.lexsort_depth == 3 def test_sort_index_level_by_name(self): mi = MultiIndex( levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=["first", "second"], ) frame = DataFrame( np.random.randn(10, 3), index=mi, columns=Index(["A", "B", "C"], name="exp"), ) frame.index.names = ["first", "second"] result = frame.sort_index(level="second") expected = frame.sort_index(level=1) tm.assert_frame_equal(result, expected) def test_sort_index_level_mixed(self): mi = MultiIndex( levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=["first", "second"], ) frame = DataFrame( np.random.randn(10, 3), index=mi, columns=Index(["A", "B", "C"], name="exp"), ) sorted_before = frame.sort_index(level=1) df = frame.copy() df["foo"] = "bar" sorted_after = df.sort_index(level=1) tm.assert_frame_equal(sorted_before, sorted_after.drop(["foo"], axis=1)) dft = frame.T sorted_before = dft.sort_index(level=1, axis=1) dft["foo", "three"] = "bar" sorted_after = dft.sort_index(level=1, axis=1) tm.assert_frame_equal( sorted_before.drop([("foo", "three")], axis=1), sorted_after.drop([("foo", "three")], axis=1), ) def test_sort_index_preserve_levels(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data result = frame.sort_index() assert result.index.names == frame.index.names @pytest.mark.parametrize( "gen,extra", [ ([1.0, 3.0, 2.0, 5.0], 4.0), ([1, 3, 2, 5], 4), ( [ Timestamp("20130101"), Timestamp("20130103"), Timestamp("20130102"), Timestamp("20130105"), ], Timestamp("20130104"), ), (["1one", "3one", "2one", "5one"], "4one"), ], ) def test_sort_index_multilevel_repr_8017(self, gen, extra): np.random.seed(0) data = np.random.randn(3, 4) columns = MultiIndex.from_tuples([("red", i) for i in gen]) df = DataFrame(data, index=list("def"), columns=columns) df2 = pd.concat( [ df, DataFrame( "world", index=list("def"), columns=MultiIndex.from_tuples([("red", extra)]), ), ], axis=1, ) # check that the repr is good # make sure that we have a correct sparsified repr # e.g. only 1 header of read assert str(df2).splitlines()[0].split() == ["red"] # GH 8017 # sorting fails after columns added # construct single-dtype then sort result = df.copy().sort_index(axis=1) expected = df.iloc[:, [0, 2, 1, 3]] tm.assert_frame_equal(result, expected) result = df2.sort_index(axis=1) expected = df2.iloc[:, [0, 2, 1, 4, 3]] tm.assert_frame_equal(result, expected) # setitem then sort result = df.copy() result[("red", extra)] = "world" result = result.sort_index(axis=1) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "categories", [ pytest.param(["a", "b", "c"], id="str"), pytest.param( [pd.Interval(0, 1), pd.Interval(1, 2), pd.Interval(2, 3)], id="pd.Interval", ), ], ) def test_sort_index_with_categories(self, categories): # GH#23452 df = DataFrame( {"foo": range(len(categories))}, index=CategoricalIndex( data=categories, categories=categories, ordered=True ), ) df.index = df.index.reorder_categories(df.index.categories[::-1]) result = df.sort_index() expected = DataFrame( {"foo": reversed(range(len(categories)))}, index=CategoricalIndex( data=categories[::-1], categories=categories[::-1], ordered=True ), ) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "ascending", [ None, [True, None], [False, "True"], ], ) def test_sort_index_ascending_bad_value_raises(self, ascending): # GH 39434 df = DataFrame(np.arange(64)) length = len(df.index) df.index = [(i - length / 2) % length for i in range(length)] match = 'For argument "ascending" expected type bool' with pytest.raises(ValueError, match=match): df.sort_index(axis=0, ascending=ascending, na_position="first") class TestDataFrameSortIndexKey: def test_sort_multi_index_key(self): # GH 25775, testing that sorting by index works with a multi-index. df = DataFrame( {"a": [3, 1, 2], "b": [0, 0, 0], "c": [0, 1, 2], "d": list("abc")} ).set_index(list("abc")) result = df.sort_index(level=list("ac"), key=lambda x: x) expected = DataFrame( {"a": [1, 2, 3], "b": [0, 0, 0], "c": [1, 2, 0], "d": list("bca")} ).set_index(list("abc")) tm.assert_frame_equal(result, expected) result = df.sort_index(level=list("ac"), key=lambda x: -x) expected = DataFrame( {"a": [3, 2, 1], "b": [0, 0, 0], "c": [0, 2, 1], "d": list("acb")} ).set_index(list("abc")) tm.assert_frame_equal(result, expected) def test_sort_index_key(self): # issue 27237 df = DataFrame(np.arange(6, dtype="int64"), index=list("aaBBca")) result = df.sort_index() expected = df.iloc[[2, 3, 0, 1, 5, 4]] tm.assert_frame_equal(result, expected) result = df.sort_index(key=lambda x: x.str.lower()) expected = df.iloc[[0, 1, 5, 2, 3, 4]] tm.assert_frame_equal(result, expected) result = df.sort_index(key=lambda x: x.str.lower(), ascending=False) expected = df.iloc[[4, 2, 3, 0, 1, 5]] tm.assert_frame_equal(result, expected) def test_sort_index_key_int(self): df = DataFrame(np.arange(6, dtype="int64"), index=np.arange(6, dtype="int64")) result = df.sort_index() tm.assert_frame_equal(result, df) result = df.sort_index(key=lambda x: -x) expected = df.sort_index(ascending=False) tm.assert_frame_equal(result, expected) result = df.sort_index(key=lambda x: 2 * x) tm.assert_frame_equal(result, df) def test_sort_multi_index_key_str(self): # GH 25775, testing that sorting by index works with a multi-index. df = DataFrame( {"a": ["B", "a", "C"], "b": [0, 1, 0], "c": list("abc"), "d": [0, 1, 2]} ).set_index(list("abc")) result = df.sort_index(level="a", key=lambda x: x.str.lower()) expected = DataFrame( {"a": ["a", "B", "C"], "b": [1, 0, 0], "c": list("bac"), "d": [1, 0, 2]} ).set_index(list("abc")) tm.assert_frame_equal(result, expected) result = df.sort_index( level=list("abc"), # can refer to names key=lambda x: x.str.lower() if x.name in ["a", "c"] else -x, ) expected = DataFrame( {"a": ["a", "B", "C"], "b": [1, 0, 0], "c": list("bac"), "d": [1, 0, 2]} ).set_index(list("abc")) tm.assert_frame_equal(result, expected) def test_changes_length_raises(self): df = DataFrame({"A": [1, 2, 3]}) with pytest.raises(ValueError, match="change the shape"): df.sort_index(key=lambda x: x[:1]) def test_sort_index_multiindex_sparse_column(self): # GH 29735, testing that sort_index on a multiindexed frame with sparse # columns fills with 0. expected = DataFrame( { i: pd.array([0.0, 0.0, 0.0, 0.0], dtype=pd.SparseDtype("float64", 0.0)) for i in range(0, 4) }, index=MultiIndex.from_product([[1, 2], [1, 2]]), ) result = expected.sort_index(level=0) tm.assert_frame_equal(result, expected)