LSR/env/lib/python3.6/site-packages/pandas/tests/io/test_feather.py
2020-06-04 17:24:47 +02:00

151 lines
4.5 KiB
Python

""" test feather-format compat """
from distutils.version import LooseVersion
import numpy as np
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.io.feather_format import read_feather, to_feather # noqa: E402 isort:skip
pyarrow = pytest.importorskip("pyarrow")
pyarrow_version = LooseVersion(pyarrow.__version__)
filter_sparse = pytest.mark.filterwarnings("ignore:The Sparse")
@filter_sparse
@pytest.mark.single
class TestFeather:
def check_error_on_write(self, df, exc):
# check that we are raising the exception
# on writing
with pytest.raises(exc):
with tm.ensure_clean() as path:
to_feather(df, path)
def check_round_trip(self, df, expected=None, **kwargs):
if expected is None:
expected = df
with tm.ensure_clean() as path:
to_feather(df, path)
result = read_feather(path, **kwargs)
tm.assert_frame_equal(result, expected)
def test_error(self):
for obj in [
pd.Series([1, 2, 3]),
1,
"foo",
pd.Timestamp("20130101"),
np.array([1, 2, 3]),
]:
self.check_error_on_write(obj, ValueError)
def test_basic(self):
df = pd.DataFrame(
{
"string": list("abc"),
"int": list(range(1, 4)),
"uint": np.arange(3, 6).astype("u1"),
"float": np.arange(4.0, 7.0, dtype="float64"),
"float_with_null": [1.0, np.nan, 3],
"bool": [True, False, True],
"bool_with_null": [True, np.nan, False],
"cat": pd.Categorical(list("abc")),
"dt": pd.date_range("20130101", periods=3),
"dttz": pd.date_range("20130101", periods=3, tz="US/Eastern"),
"dt_with_null": [
pd.Timestamp("20130101"),
pd.NaT,
pd.Timestamp("20130103"),
],
"dtns": pd.date_range("20130101", periods=3, freq="ns"),
}
)
assert df.dttz.dtype.tz.zone == "US/Eastern"
self.check_round_trip(df)
def test_duplicate_columns(self):
# https://github.com/wesm/feather/issues/53
# not currently able to handle duplicate columns
df = pd.DataFrame(np.arange(12).reshape(4, 3), columns=list("aaa")).copy()
self.check_error_on_write(df, ValueError)
def test_stringify_columns(self):
df = pd.DataFrame(np.arange(12).reshape(4, 3)).copy()
self.check_error_on_write(df, ValueError)
def test_read_columns(self):
# GH 24025
df = pd.DataFrame(
{
"col1": list("abc"),
"col2": list(range(1, 4)),
"col3": list("xyz"),
"col4": list(range(4, 7)),
}
)
columns = ["col1", "col3"]
self.check_round_trip(df, expected=df[columns], columns=columns)
def test_unsupported_other(self):
# period
df = pd.DataFrame({"a": pd.period_range("2013", freq="M", periods=3)})
# Some versions raise ValueError, others raise ArrowInvalid.
self.check_error_on_write(df, Exception)
def test_rw_use_threads(self):
df = pd.DataFrame({"A": np.arange(100000)})
self.check_round_trip(df, use_threads=True)
self.check_round_trip(df, use_threads=False)
def test_write_with_index(self):
df = pd.DataFrame({"A": [1, 2, 3]})
self.check_round_trip(df)
# non-default index
for index in [
[2, 3, 4],
pd.date_range("20130101", periods=3),
list("abc"),
[1, 3, 4],
pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]),
]:
df.index = index
self.check_error_on_write(df, ValueError)
# index with meta-data
df.index = [0, 1, 2]
df.index.name = "foo"
self.check_error_on_write(df, ValueError)
# column multi-index
df.index = [0, 1, 2]
df.columns = pd.MultiIndex.from_tuples([("a", 1)])
self.check_error_on_write(df, ValueError)
def test_path_pathlib(self):
df = tm.makeDataFrame().reset_index()
result = tm.round_trip_pathlib(df.to_feather, pd.read_feather)
tm.assert_frame_equal(df, result)
def test_path_localpath(self):
df = tm.makeDataFrame().reset_index()
result = tm.round_trip_localpath(df.to_feather, pd.read_feather)
tm.assert_frame_equal(df, result)