213 lines
7.8 KiB
Python
213 lines
7.8 KiB
Python
import numpy as np
|
|
import pytest
|
|
|
|
from pandas import Categorical, DataFrame, Series
|
|
import pandas._testing as tm
|
|
|
|
|
|
class TestSeriesSortValues:
|
|
def test_sort_values(self, datetime_series):
|
|
|
|
# check indexes are reordered corresponding with the values
|
|
ser = Series([3, 2, 4, 1], ["A", "B", "C", "D"])
|
|
expected = Series([1, 2, 3, 4], ["D", "B", "A", "C"])
|
|
result = ser.sort_values()
|
|
tm.assert_series_equal(expected, result)
|
|
|
|
ts = datetime_series.copy()
|
|
ts[:5] = np.NaN
|
|
vals = ts.values
|
|
|
|
result = ts.sort_values()
|
|
assert np.isnan(result[-5:]).all()
|
|
tm.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:]))
|
|
|
|
# na_position
|
|
result = ts.sort_values(na_position="first")
|
|
assert np.isnan(result[:5]).all()
|
|
tm.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:]))
|
|
|
|
# something object-type
|
|
ser = Series(["A", "B"], [1, 2])
|
|
# no failure
|
|
ser.sort_values()
|
|
|
|
# ascending=False
|
|
ordered = ts.sort_values(ascending=False)
|
|
expected = np.sort(ts.dropna().values)[::-1]
|
|
tm.assert_almost_equal(expected, ordered.dropna().values)
|
|
ordered = ts.sort_values(ascending=False, na_position="first")
|
|
tm.assert_almost_equal(expected, ordered.dropna().values)
|
|
|
|
# ascending=[False] should behave the same as ascending=False
|
|
ordered = ts.sort_values(ascending=[False])
|
|
expected = ts.sort_values(ascending=False)
|
|
tm.assert_series_equal(expected, ordered)
|
|
ordered = ts.sort_values(ascending=[False], na_position="first")
|
|
expected = ts.sort_values(ascending=False, na_position="first")
|
|
tm.assert_series_equal(expected, ordered)
|
|
|
|
msg = "ascending must be boolean"
|
|
with pytest.raises(ValueError, match=msg):
|
|
ts.sort_values(ascending=None)
|
|
msg = r"Length of ascending \(0\) must be 1 for Series"
|
|
with pytest.raises(ValueError, match=msg):
|
|
ts.sort_values(ascending=[])
|
|
msg = r"Length of ascending \(3\) must be 1 for Series"
|
|
with pytest.raises(ValueError, match=msg):
|
|
ts.sort_values(ascending=[1, 2, 3])
|
|
msg = r"Length of ascending \(2\) must be 1 for Series"
|
|
with pytest.raises(ValueError, match=msg):
|
|
ts.sort_values(ascending=[False, False])
|
|
msg = "ascending must be boolean"
|
|
with pytest.raises(ValueError, match=msg):
|
|
ts.sort_values(ascending="foobar")
|
|
|
|
# inplace=True
|
|
ts = datetime_series.copy()
|
|
return_value = ts.sort_values(ascending=False, inplace=True)
|
|
assert return_value is None
|
|
tm.assert_series_equal(ts, datetime_series.sort_values(ascending=False))
|
|
tm.assert_index_equal(
|
|
ts.index, datetime_series.sort_values(ascending=False).index
|
|
)
|
|
|
|
# GH#5856/5853
|
|
# Series.sort_values operating on a view
|
|
df = DataFrame(np.random.randn(10, 4))
|
|
s = df.iloc[:, 0]
|
|
|
|
msg = (
|
|
"This Series is a view of some other array, to sort in-place "
|
|
"you must create a copy"
|
|
)
|
|
with pytest.raises(ValueError, match=msg):
|
|
s.sort_values(inplace=True)
|
|
|
|
def test_sort_values_categorical(self):
|
|
|
|
c = Categorical(["a", "b", "b", "a"], ordered=False)
|
|
cat = Series(c.copy())
|
|
|
|
# sort in the categories order
|
|
expected = Series(
|
|
Categorical(["a", "a", "b", "b"], ordered=False), index=[0, 3, 1, 2]
|
|
)
|
|
result = cat.sort_values()
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
cat = Series(Categorical(["a", "c", "b", "d"], ordered=True))
|
|
res = cat.sort_values()
|
|
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
|
|
tm.assert_numpy_array_equal(res.__array__(), exp)
|
|
|
|
cat = Series(
|
|
Categorical(
|
|
["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True
|
|
)
|
|
)
|
|
res = cat.sort_values()
|
|
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
|
|
tm.assert_numpy_array_equal(res.__array__(), exp)
|
|
|
|
res = cat.sort_values(ascending=False)
|
|
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
|
|
tm.assert_numpy_array_equal(res.__array__(), exp)
|
|
|
|
raw_cat1 = Categorical(
|
|
["a", "b", "c", "d"], categories=["a", "b", "c", "d"], ordered=False
|
|
)
|
|
raw_cat2 = Categorical(
|
|
["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True
|
|
)
|
|
s = ["a", "b", "c", "d"]
|
|
df = DataFrame(
|
|
{"unsort": raw_cat1, "sort": raw_cat2, "string": s, "values": [1, 2, 3, 4]}
|
|
)
|
|
|
|
# Cats must be sorted in a dataframe
|
|
res = df.sort_values(by=["string"], ascending=False)
|
|
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
|
|
tm.assert_numpy_array_equal(res["sort"].values.__array__(), exp)
|
|
assert res["sort"].dtype == "category"
|
|
|
|
res = df.sort_values(by=["sort"], ascending=False)
|
|
exp = df.sort_values(by=["string"], ascending=True)
|
|
tm.assert_series_equal(res["values"], exp["values"])
|
|
assert res["sort"].dtype == "category"
|
|
assert res["unsort"].dtype == "category"
|
|
|
|
# unordered cat, but we allow this
|
|
df.sort_values(by=["unsort"], ascending=False)
|
|
|
|
# multi-columns sort
|
|
# GH#7848
|
|
df = DataFrame(
|
|
{"id": [6, 5, 4, 3, 2, 1], "raw_grade": ["a", "b", "b", "a", "a", "e"]}
|
|
)
|
|
df["grade"] = Categorical(df["raw_grade"], ordered=True)
|
|
df["grade"] = df["grade"].cat.set_categories(["b", "e", "a"])
|
|
|
|
# sorts 'grade' according to the order of the categories
|
|
result = df.sort_values(by=["grade"])
|
|
expected = df.iloc[[1, 2, 5, 0, 3, 4]]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
# multi
|
|
result = df.sort_values(by=["grade", "id"])
|
|
expected = df.iloc[[2, 1, 5, 4, 3, 0]]
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
@pytest.mark.parametrize("inplace", [True, False])
|
|
@pytest.mark.parametrize(
|
|
"original_list, sorted_list, ignore_index, output_index",
|
|
[
|
|
([2, 3, 6, 1], [6, 3, 2, 1], True, [0, 1, 2, 3]),
|
|
([2, 3, 6, 1], [6, 3, 2, 1], False, [2, 1, 0, 3]),
|
|
],
|
|
)
|
|
def test_sort_values_ignore_index(
|
|
self, inplace, original_list, sorted_list, ignore_index, output_index
|
|
):
|
|
# GH 30114
|
|
ser = Series(original_list)
|
|
expected = Series(sorted_list, index=output_index)
|
|
kwargs = {"ignore_index": ignore_index, "inplace": inplace}
|
|
|
|
if inplace:
|
|
result_ser = ser.copy()
|
|
result_ser.sort_values(ascending=False, **kwargs)
|
|
else:
|
|
result_ser = ser.sort_values(ascending=False, **kwargs)
|
|
|
|
tm.assert_series_equal(result_ser, expected)
|
|
tm.assert_series_equal(ser, Series(original_list))
|
|
|
|
|
|
class TestSeriesSortingKey:
|
|
def test_sort_values_key(self):
|
|
series = Series(np.array(["Hello", "goodbye"]))
|
|
|
|
result = series.sort_values(0)
|
|
expected = series
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result = series.sort_values(0, key=lambda x: x.str.lower())
|
|
expected = series[::-1]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
def test_sort_values_key_nan(self):
|
|
series = Series(np.array([0, 5, np.nan, 3, 2, np.nan]))
|
|
|
|
result = series.sort_values(0)
|
|
expected = series.iloc[[0, 4, 3, 1, 2, 5]]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result = series.sort_values(0, key=lambda x: x + 5)
|
|
expected = series.iloc[[0, 4, 3, 1, 2, 5]]
|
|
tm.assert_series_equal(result, expected)
|
|
|
|
result = series.sort_values(0, key=lambda x: -x, ascending=False)
|
|
expected = series.iloc[[0, 4, 3, 1, 2, 5]]
|
|
tm.assert_series_equal(result, expected)
|