LSR/env/lib/python3.6/site-packages/pandas/tests/indexing/test_floats.py
2020-06-04 17:24:47 +02:00

926 lines
29 KiB
Python

import numpy as np
import pytest
from pandas import DataFrame, Float64Index, Index, Int64Index, RangeIndex, Series
import pandas._testing as tm
class TestFloatIndexers:
def check(self, result, original, indexer, getitem):
"""
comparator for results
we need to take care if we are indexing on a
Series or a frame
"""
if isinstance(original, Series):
expected = original.iloc[indexer]
else:
if getitem:
expected = original.iloc[:, indexer]
else:
expected = original.iloc[indexer]
tm.assert_almost_equal(result, expected)
def test_scalar_error(self):
# GH 4892
# float_indexers should raise exceptions
# on appropriate Index types & accessors
# this duplicates the code below
# but is specifically testing for the error
# message
for index in [
tm.makeStringIndex,
tm.makeUnicodeIndex,
tm.makeCategoricalIndex,
tm.makeDateIndex,
tm.makeTimedeltaIndex,
tm.makePeriodIndex,
tm.makeIntIndex,
tm.makeRangeIndex,
]:
i = index(5)
s = Series(np.arange(len(i)), index=i)
msg = "Cannot index by location index"
with pytest.raises(TypeError, match=msg):
s.iloc[3.0]
msg = (
"cannot do positional indexing on {klass} with these "
r"indexers \[3\.0\] of {kind}".format(klass=type(i), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
s.iloc[3.0] = 0
def test_scalar_non_numeric(self):
# GH 4892
# float_indexers should raise exceptions
# on appropriate Index types & accessors
for index in [
tm.makeStringIndex,
tm.makeUnicodeIndex,
tm.makeCategoricalIndex,
tm.makeDateIndex,
tm.makeTimedeltaIndex,
tm.makePeriodIndex,
]:
i = index(5)
for s in [
Series(np.arange(len(i)), index=i),
DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i),
]:
# getting
for idxr, getitem in [(lambda x: x.iloc, False), (lambda x: x, True)]:
# gettitem on a DataFrame is a KeyError as it is indexing
# via labels on the columns
if getitem and isinstance(s, DataFrame):
error = KeyError
msg = r"^3(\.0)?$"
else:
error = TypeError
msg = (
r"cannot do (label|index|positional) indexing "
r"on {klass} with these indexers \[3\.0\] of "
r"{kind}|"
"Cannot index by location index with a "
"non-integer key".format(klass=type(i), kind=str(float))
)
with pytest.raises(error, match=msg):
idxr(s)[3.0]
# label based can be a TypeError or KeyError
if s.index.inferred_type in {
"categorical",
"string",
"unicode",
"mixed",
}:
error = KeyError
msg = r"^3$"
else:
error = TypeError
msg = (
r"cannot do (label|index) indexing "
r"on {klass} with these indexers \[3\.0\] of "
r"{kind}".format(klass=type(i), kind=str(float))
)
with pytest.raises(error, match=msg):
s.loc[3.0]
# contains
assert 3.0 not in s
# setting with a float fails with iloc
msg = (
r"cannot do (label|index|positional) indexing"
r" on {klass} with these indexers \[3\.0\] of"
r" {kind}".format(klass=type(i), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
s.iloc[3.0] = 0
# setting with an indexer
if s.index.inferred_type in ["categorical"]:
# Value or Type Error
pass
elif s.index.inferred_type in ["datetime64", "timedelta64", "period"]:
# these should prob work
# and are inconsistent between series/dataframe ATM
# for idxr in [lambda x: x]:
# s2 = s.copy()
#
# with pytest.raises(TypeError):
# idxr(s2)[3.0] = 0
pass
else:
s2 = s.copy()
s2.loc[3.0] = 10
assert s2.index.is_object()
for idxr in [lambda x: x]:
s2 = s.copy()
idxr(s2)[3.0] = 0
assert s2.index.is_object()
# fallsback to position selection, series only
s = Series(np.arange(len(i)), index=i)
s[3]
msg = (
r"cannot do (label|index) indexing"
r" on {klass} with these indexers \[3\.0\] of"
r" {kind}".format(klass=type(i), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
s[3.0]
def test_scalar_with_mixed(self):
s2 = Series([1, 2, 3], index=["a", "b", "c"])
s3 = Series([1, 2, 3], index=["a", "b", 1.5])
# lookup in a pure stringstr
# with an invalid indexer
for idxr in [lambda x: x, lambda x: x.iloc]:
msg = (
r"cannot do label indexing"
r" on {klass} with these indexers \[1\.0\] of"
r" {kind}|"
"Cannot index by location index with a non-integer key".format(
klass=str(Index), kind=str(float)
)
)
with pytest.raises(TypeError, match=msg):
idxr(s2)[1.0]
with pytest.raises(KeyError, match=r"^1$"):
s2.loc[1.0]
result = s2.loc["b"]
expected = 2
assert result == expected
# mixed index so we have label
# indexing
for idxr in [lambda x: x]:
msg = (
r"cannot do label indexing"
r" on {klass} with these indexers \[1\.0\] of"
r" {kind}".format(klass=str(Index), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
idxr(s3)[1.0]
result = idxr(s3)[1]
expected = 2
assert result == expected
msg = "Cannot index by location index with a non-integer key"
with pytest.raises(TypeError, match=msg):
s3.iloc[1.0]
with pytest.raises(KeyError, match=r"^1$"):
s3.loc[1.0]
result = s3.loc[1.5]
expected = 3
assert result == expected
def test_scalar_integer(self):
# test how scalar float indexers work on int indexes
# integer index
for i in [Int64Index(range(5)), RangeIndex(5)]:
for s in [
Series(np.arange(len(i))),
DataFrame(np.random.randn(len(i), len(i)), index=i, columns=i),
]:
# coerce to equal int
for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]:
result = idxr(s)[3.0]
self.check(result, s, 3, getitem)
# coerce to equal int
for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]:
if isinstance(s, Series):
def compare(x, y):
assert x == y
expected = 100
else:
compare = tm.assert_series_equal
if getitem:
expected = Series(100, index=range(len(s)), name=3)
else:
expected = Series(100.0, index=range(len(s)), name=3)
s2 = s.copy()
idxr(s2)[3.0] = 100
result = idxr(s2)[3.0]
compare(result, expected)
result = idxr(s2)[3]
compare(result, expected)
# contains
# coerce to equal int
assert 3.0 in s
def test_scalar_float(self):
# scalar float indexers work on a float index
index = Index(np.arange(5.0))
for s in [
Series(np.arange(len(index)), index=index),
DataFrame(
np.random.randn(len(index), len(index)), index=index, columns=index
),
]:
# assert all operations except for iloc are ok
indexer = index[3]
for idxr, getitem in [(lambda x: x.loc, False), (lambda x: x, True)]:
# getting
result = idxr(s)[indexer]
self.check(result, s, 3, getitem)
# setting
s2 = s.copy()
result = idxr(s2)[indexer]
self.check(result, s, 3, getitem)
# random integer is a KeyError
with pytest.raises(KeyError, match=r"^3\.5$"):
idxr(s)[3.5]
# contains
assert 3.0 in s
# iloc succeeds with an integer
expected = s.iloc[3]
s2 = s.copy()
s2.iloc[3] = expected
result = s2.iloc[3]
self.check(result, s, 3, False)
# iloc raises with a float
msg = "Cannot index by location index with a non-integer key"
with pytest.raises(TypeError, match=msg):
s.iloc[3.0]
msg = (
r"cannot do positional indexing"
r" on {klass} with these indexers \[3\.0\] of"
r" {kind}".format(klass=str(Float64Index), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
s2.iloc[3.0] = 0
def test_slice_non_numeric(self):
# GH 4892
# float_indexers should raise exceptions
# on appropriate Index types & accessors
for index in [
tm.makeStringIndex,
tm.makeUnicodeIndex,
tm.makeDateIndex,
tm.makeTimedeltaIndex,
tm.makePeriodIndex,
]:
index = index(5)
for s in [
Series(range(5), index=index),
DataFrame(np.random.randn(5, 2), index=index),
]:
# getitem
for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:
msg = (
"cannot do slice indexing "
r"on {klass} with these indexers \[(3|4)\.0\] of "
"{kind}".format(klass=type(index), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
s.iloc[l]
for idxr in [lambda x: x.loc, lambda x: x.iloc, lambda x: x]:
msg = (
"cannot do slice indexing "
r"on {klass} with these indexers "
r"\[(3|4)(\.0)?\] "
r"of ({kind_float}|{kind_int})".format(
klass=type(index),
kind_float=str(float),
kind_int=str(int),
)
)
with pytest.raises(TypeError, match=msg):
idxr(s)[l]
# setitem
for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:
msg = (
"cannot do slice indexing "
r"on {klass} with these indexers \[(3|4)\.0\] of "
"{kind}".format(klass=type(index), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
s.iloc[l] = 0
for idxr in [lambda x: x.loc, lambda x: x.iloc, lambda x: x]:
msg = (
"cannot do slice indexing"
r" on {klass} with these indexers"
r" \[(3|4)(\.0)?\]"
r" of ({kind_float}|{kind_int})".format(
klass=type(index),
kind_float=str(float),
kind_int=str(int),
)
)
with pytest.raises(TypeError, match=msg):
idxr(s)[l] = 0
def test_slice_integer(self):
# same as above, but for Integer based indexes
# these coerce to a like integer
# oob indicates if we are out of bounds
# of positional indexing
for index, oob in [
(Int64Index(range(5)), False),
(RangeIndex(5), False),
(Int64Index(range(5)) + 10, True),
]:
# s is an in-range index
s = Series(range(5), index=index)
# getitem
for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:
for idxr in [lambda x: x.loc]:
result = idxr(s)[l]
# these are all label indexing
# except getitem which is positional
# empty
if oob:
indexer = slice(0, 0)
else:
indexer = slice(3, 5)
self.check(result, s, indexer, False)
# positional indexing
msg = (
"cannot do slice indexing "
r"on {klass} with these indexers \[(3|4)\.0\] of "
"{kind}".format(klass=type(index), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
s[l]
# getitem out-of-bounds
for l in [slice(-6, 6), slice(-6.0, 6.0)]:
for idxr in [lambda x: x.loc]:
result = idxr(s)[l]
# these are all label indexing
# except getitem which is positional
# empty
if oob:
indexer = slice(0, 0)
else:
indexer = slice(-6, 6)
self.check(result, s, indexer, False)
# positional indexing
msg = (
"cannot do slice indexing "
r"on {klass} with these indexers \[-6\.0\] of "
"{kind}".format(klass=type(index), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
s[slice(-6.0, 6.0)]
# getitem odd floats
for l, res1 in [
(slice(2.5, 4), slice(3, 5)),
(slice(2, 3.5), slice(2, 4)),
(slice(2.5, 3.5), slice(3, 4)),
]:
for idxr in [lambda x: x.loc]:
result = idxr(s)[l]
if oob:
res = slice(0, 0)
else:
res = res1
self.check(result, s, res, False)
# positional indexing
msg = (
"cannot do slice indexing "
r"on {klass} with these indexers \[(2|3)\.5\] of "
"{kind}".format(klass=type(index), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
s[l]
# setitem
for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:
for idxr in [lambda x: x.loc]:
sc = s.copy()
idxr(sc)[l] = 0
result = idxr(sc)[l].values.ravel()
assert (result == 0).all()
# positional indexing
msg = (
"cannot do slice indexing "
r"on {klass} with these indexers \[(3|4)\.0\] of "
"{kind}".format(klass=type(index), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
s[l] = 0
def test_integer_positional_indexing(self):
""" make sure that we are raising on positional indexing
w.r.t. an integer index """
s = Series(range(2, 6), index=range(2, 6))
result = s[2:4]
expected = s.iloc[2:4]
tm.assert_series_equal(result, expected)
for idxr in [lambda x: x, lambda x: x.iloc]:
for l in [slice(2, 4.0), slice(2.0, 4), slice(2.0, 4.0)]:
klass = RangeIndex
msg = (
"cannot do slice indexing "
r"on {klass} with these indexers \[(2|4)\.0\] of "
"{kind}".format(klass=str(klass), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
idxr(s)[l]
def test_slice_integer_frame_getitem(self):
# similar to above, but on the getitem dim (of a DataFrame)
for index in [Int64Index(range(5)), RangeIndex(5)]:
s = DataFrame(np.random.randn(5, 2), index=index)
def f(idxr):
# getitem
for l in [slice(0.0, 1), slice(0, 1.0), slice(0.0, 1.0)]:
result = idxr(s)[l]
indexer = slice(0, 2)
self.check(result, s, indexer, False)
# positional indexing
msg = (
"cannot do slice indexing "
r"on {klass} with these indexers \[(0|1)\.0\] of "
"{kind}".format(klass=type(index), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
s[l]
# getitem out-of-bounds
for l in [slice(-10, 10), slice(-10.0, 10.0)]:
result = idxr(s)[l]
self.check(result, s, slice(-10, 10), True)
# positional indexing
msg = (
"cannot do slice indexing "
r"on {klass} with these indexers \[-10\.0\] of "
"{kind}".format(klass=type(index), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
s[slice(-10.0, 10.0)]
# getitem odd floats
for l, res in [
(slice(0.5, 1), slice(1, 2)),
(slice(0, 0.5), slice(0, 1)),
(slice(0.5, 1.5), slice(1, 2)),
]:
result = idxr(s)[l]
self.check(result, s, res, False)
# positional indexing
msg = (
"cannot do slice indexing "
r"on {klass} with these indexers \[0\.5\] of "
"{kind}".format(klass=type(index), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
s[l]
# setitem
for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:
sc = s.copy()
idxr(sc)[l] = 0
result = idxr(sc)[l].values.ravel()
assert (result == 0).all()
# positional indexing
msg = (
"cannot do slice indexing "
r"on {klass} with these indexers \[(3|4)\.0\] of "
"{kind}".format(klass=type(index), kind=str(float))
)
with pytest.raises(TypeError, match=msg):
s[l] = 0
f(lambda x: x.loc)
def test_slice_float(self):
# same as above, but for floats
index = Index(np.arange(5.0)) + 0.1
for s in [
Series(range(5), index=index),
DataFrame(np.random.randn(5, 2), index=index),
]:
for l in [slice(3.0, 4), slice(3, 4.0), slice(3.0, 4.0)]:
expected = s.iloc[3:4]
for idxr in [lambda x: x.loc, lambda x: x]:
# getitem
result = idxr(s)[l]
if isinstance(s, Series):
tm.assert_series_equal(result, expected)
else:
tm.assert_frame_equal(result, expected)
# setitem
s2 = s.copy()
idxr(s2)[l] = 0
result = idxr(s2)[l].values.ravel()
assert (result == 0).all()
def test_floating_index_doc_example(self):
index = Index([1.5, 2, 3, 4.5, 5])
s = Series(range(5), index=index)
assert s[3] == 2
assert s.loc[3] == 2
assert s.loc[3] == 2
assert s.iloc[3] == 3
def test_floating_misc(self):
# related 236
# scalar/slicing of a float index
s = Series(np.arange(5), index=np.arange(5) * 2.5, dtype=np.int64)
# label based slicing
result1 = s[1.0:3.0]
result2 = s.loc[1.0:3.0]
result3 = s.loc[1.0:3.0]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
# exact indexing when found
result1 = s[5.0]
result2 = s.loc[5.0]
result3 = s.loc[5.0]
assert result1 == result2
assert result1 == result3
result1 = s[5]
result2 = s.loc[5]
result3 = s.loc[5]
assert result1 == result2
assert result1 == result3
assert s[5.0] == s[5]
# value not found (and no fallbacking at all)
# scalar integers
with pytest.raises(KeyError, match=r"^4\.0$"):
s.loc[4]
with pytest.raises(KeyError, match=r"^4\.0$"):
s.loc[4]
with pytest.raises(KeyError, match=r"^4\.0$"):
s[4]
# fancy floats/integers create the correct entry (as nan)
# fancy tests
expected = Series([2, 0], index=Float64Index([5.0, 0.0]))
for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float
tm.assert_series_equal(s[fancy_idx], expected)
tm.assert_series_equal(s.loc[fancy_idx], expected)
tm.assert_series_equal(s.loc[fancy_idx], expected)
expected = Series([2, 0], index=Index([5, 0], dtype="int64"))
for fancy_idx in [[5, 0], np.array([5, 0])]: # int
tm.assert_series_equal(s[fancy_idx], expected)
tm.assert_series_equal(s.loc[fancy_idx], expected)
tm.assert_series_equal(s.loc[fancy_idx], expected)
# all should return the same as we are slicing 'the same'
result1 = s.loc[2:5]
result2 = s.loc[2.0:5.0]
result3 = s.loc[2.0:5]
result4 = s.loc[2.1:5]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, result4)
# previously this did fallback indexing
result1 = s[2:5]
result2 = s[2.0:5.0]
result3 = s[2.0:5]
result4 = s[2.1:5]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, result4)
result1 = s.loc[2:5]
result2 = s.loc[2.0:5.0]
result3 = s.loc[2.0:5]
result4 = s.loc[2.1:5]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, result4)
# combined test
result1 = s.loc[2:5]
result2 = s.loc[2:5]
result3 = s[2:5]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
# list selection
result1 = s[[0.0, 5, 10]]
result2 = s.loc[[0.0, 5, 10]]
result3 = s.loc[[0.0, 5, 10]]
result4 = s.iloc[[0, 2, 4]]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, result4)
with pytest.raises(KeyError, match="with any missing labels"):
s[[1.6, 5, 10]]
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[[1.6, 5, 10]]
with pytest.raises(KeyError, match="with any missing labels"):
s[[0, 1, 2]]
with pytest.raises(KeyError, match="with any missing labels"):
s.loc[[0, 1, 2]]
result1 = s.loc[[2.5, 5]]
result2 = s.loc[[2.5, 5]]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, Series([1, 2], index=[2.5, 5.0]))
result1 = s[[2.5]]
result2 = s.loc[[2.5]]
result3 = s.loc[[2.5]]
tm.assert_series_equal(result1, result2)
tm.assert_series_equal(result1, result3)
tm.assert_series_equal(result1, Series([1], index=[2.5]))
def test_floating_tuples(self):
# see gh-13509
s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.1, 0.2], name="foo")
result = s[0.0]
assert result == (1, 1)
expected = Series([(1, 1), (2, 2)], index=[0.0, 0.0], name="foo")
s = Series([(1, 1), (2, 2), (3, 3)], index=[0.0, 0.0, 0.2], name="foo")
result = s[0.0]
tm.assert_series_equal(result, expected)
def test_float64index_slicing_bug(self):
# GH 5557, related to slicing a float index
ser = {
256: 2321.0,
1: 78.0,
2: 2716.0,
3: 0.0,
4: 369.0,
5: 0.0,
6: 269.0,
7: 0.0,
8: 0.0,
9: 0.0,
10: 3536.0,
11: 0.0,
12: 24.0,
13: 0.0,
14: 931.0,
15: 0.0,
16: 101.0,
17: 78.0,
18: 9643.0,
19: 0.0,
20: 0.0,
21: 0.0,
22: 63761.0,
23: 0.0,
24: 446.0,
25: 0.0,
26: 34773.0,
27: 0.0,
28: 729.0,
29: 78.0,
30: 0.0,
31: 0.0,
32: 3374.0,
33: 0.0,
34: 1391.0,
35: 0.0,
36: 361.0,
37: 0.0,
38: 61808.0,
39: 0.0,
40: 0.0,
41: 0.0,
42: 6677.0,
43: 0.0,
44: 802.0,
45: 0.0,
46: 2691.0,
47: 0.0,
48: 3582.0,
49: 0.0,
50: 734.0,
51: 0.0,
52: 627.0,
53: 70.0,
54: 2584.0,
55: 0.0,
56: 324.0,
57: 0.0,
58: 605.0,
59: 0.0,
60: 0.0,
61: 0.0,
62: 3989.0,
63: 10.0,
64: 42.0,
65: 0.0,
66: 904.0,
67: 0.0,
68: 88.0,
69: 70.0,
70: 8172.0,
71: 0.0,
72: 0.0,
73: 0.0,
74: 64902.0,
75: 0.0,
76: 347.0,
77: 0.0,
78: 36605.0,
79: 0.0,
80: 379.0,
81: 70.0,
82: 0.0,
83: 0.0,
84: 3001.0,
85: 0.0,
86: 1630.0,
87: 7.0,
88: 364.0,
89: 0.0,
90: 67404.0,
91: 9.0,
92: 0.0,
93: 0.0,
94: 7685.0,
95: 0.0,
96: 1017.0,
97: 0.0,
98: 2831.0,
99: 0.0,
100: 2963.0,
101: 0.0,
102: 854.0,
103: 0.0,
104: 0.0,
105: 0.0,
106: 0.0,
107: 0.0,
108: 0.0,
109: 0.0,
110: 0.0,
111: 0.0,
112: 0.0,
113: 0.0,
114: 0.0,
115: 0.0,
116: 0.0,
117: 0.0,
118: 0.0,
119: 0.0,
120: 0.0,
121: 0.0,
122: 0.0,
123: 0.0,
124: 0.0,
125: 0.0,
126: 67744.0,
127: 22.0,
128: 264.0,
129: 0.0,
260: 197.0,
268: 0.0,
265: 0.0,
269: 0.0,
261: 0.0,
266: 1198.0,
267: 0.0,
262: 2629.0,
258: 775.0,
257: 0.0,
263: 0.0,
259: 0.0,
264: 163.0,
250: 10326.0,
251: 0.0,
252: 1228.0,
253: 0.0,
254: 2769.0,
255: 0.0,
}
# smoke test for the repr
s = Series(ser)
result = s.value_counts()
str(result)