98 lines
2.5 KiB
Python
98 lines
2.5 KiB
Python
"""
|
|
Tests the usecols functionality during parsing
|
|
for all of the parsers defined in parsers.py
|
|
"""
|
|
from io import StringIO
|
|
|
|
import pytest
|
|
|
|
from pandas import DataFrame
|
|
import pandas._testing as tm
|
|
|
|
_msg_validate_usecols_arg = (
|
|
"'usecols' must either be list-like "
|
|
"of all strings, all unicode, all "
|
|
"integers or a callable."
|
|
)
|
|
_msg_validate_usecols_names = (
|
|
"Usecols do not match columns, columns expected but not found: {0}"
|
|
)
|
|
|
|
|
|
def test_usecols_with_unicode_strings(all_parsers):
|
|
# see gh-13219
|
|
data = """AAA,BBB,CCC,DDD
|
|
0.056674973,8,True,a
|
|
2.613230982,2,False,b
|
|
3.568935038,7,False,a"""
|
|
parser = all_parsers
|
|
|
|
exp_data = {
|
|
"AAA": {
|
|
0: 0.056674972999999997,
|
|
1: 2.6132309819999997,
|
|
2: 3.5689350380000002,
|
|
},
|
|
"BBB": {0: 8, 1: 2, 2: 7},
|
|
}
|
|
expected = DataFrame(exp_data)
|
|
|
|
result = parser.read_csv(StringIO(data), usecols=["AAA", "BBB"])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
def test_usecols_with_single_byte_unicode_strings(all_parsers):
|
|
# see gh-13219
|
|
data = """A,B,C,D
|
|
0.056674973,8,True,a
|
|
2.613230982,2,False,b
|
|
3.568935038,7,False,a"""
|
|
parser = all_parsers
|
|
|
|
exp_data = {
|
|
"A": {
|
|
0: 0.056674972999999997,
|
|
1: 2.6132309819999997,
|
|
2: 3.5689350380000002,
|
|
},
|
|
"B": {0: 8, 1: 2, 2: 7},
|
|
}
|
|
expected = DataFrame(exp_data)
|
|
|
|
result = parser.read_csv(StringIO(data), usecols=["A", "B"])
|
|
tm.assert_frame_equal(result, expected)
|
|
|
|
|
|
@pytest.mark.parametrize("usecols", [["AAA", b"BBB"], [b"AAA", "BBB"]])
|
|
def test_usecols_with_mixed_encoding_strings(all_parsers, usecols):
|
|
data = """AAA,BBB,CCC,DDD
|
|
0.056674973,8,True,a
|
|
2.613230982,2,False,b
|
|
3.568935038,7,False,a"""
|
|
parser = all_parsers
|
|
|
|
with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
|
|
parser.read_csv(StringIO(data), usecols=usecols)
|
|
|
|
|
|
@pytest.mark.parametrize("usecols", [["あああ", "いい"], ["あああ", "いい"]])
|
|
def test_usecols_with_multi_byte_characters(all_parsers, usecols):
|
|
data = """あああ,いい,ううう,ええええ
|
|
0.056674973,8,True,a
|
|
2.613230982,2,False,b
|
|
3.568935038,7,False,a"""
|
|
parser = all_parsers
|
|
|
|
exp_data = {
|
|
"あああ": {
|
|
0: 0.056674972999999997,
|
|
1: 2.6132309819999997,
|
|
2: 3.5689350380000002,
|
|
},
|
|
"いい": {0: 8, 1: 2, 2: 7},
|
|
}
|
|
expected = DataFrame(exp_data)
|
|
|
|
result = parser.read_csv(StringIO(data), usecols=usecols)
|
|
tm.assert_frame_equal(result, expected)
|