Traktor/myenv/Lib/site-packages/pandas/tests/test_downstream.py

363 lines
10 KiB
Python
Raw Permalink Normal View History

2024-05-26 05:12:46 +02:00
"""
Testing that we work in the downstream packages
"""
import array
import subprocess
import sys
import numpy as np
import pytest
from pandas.errors import IntCastingNaNError
import pandas.util._test_decorators as td
import pandas as pd
from pandas import (
DataFrame,
DatetimeIndex,
Series,
TimedeltaIndex,
)
import pandas._testing as tm
from pandas.core.arrays import (
DatetimeArray,
TimedeltaArray,
)
@pytest.fixture
def df():
return DataFrame({"A": [1, 2, 3]})
def test_dask(df):
# dask sets "compute.use_numexpr" to False, so catch the current value
# and ensure to reset it afterwards to avoid impacting other tests
olduse = pd.get_option("compute.use_numexpr")
try:
pytest.importorskip("toolz")
dd = pytest.importorskip("dask.dataframe")
ddf = dd.from_pandas(df, npartitions=3)
assert ddf.A is not None
assert ddf.compute() is not None
finally:
pd.set_option("compute.use_numexpr", olduse)
def test_dask_ufunc():
# dask sets "compute.use_numexpr" to False, so catch the current value
# and ensure to reset it afterwards to avoid impacting other tests
olduse = pd.get_option("compute.use_numexpr")
try:
da = pytest.importorskip("dask.array")
dd = pytest.importorskip("dask.dataframe")
s = Series([1.5, 2.3, 3.7, 4.0])
ds = dd.from_pandas(s, npartitions=2)
result = da.fix(ds).compute()
expected = np.fix(s)
tm.assert_series_equal(result, expected)
finally:
pd.set_option("compute.use_numexpr", olduse)
def test_construct_dask_float_array_int_dtype_match_ndarray():
# GH#40110 make sure we treat a float-dtype dask array with the same
# rules we would for an ndarray
dd = pytest.importorskip("dask.dataframe")
arr = np.array([1, 2.5, 3])
darr = dd.from_array(arr)
res = Series(darr)
expected = Series(arr)
tm.assert_series_equal(res, expected)
# GH#49599 in 2.0 we raise instead of silently ignoring the dtype
msg = "Trying to coerce float values to integers"
with pytest.raises(ValueError, match=msg):
Series(darr, dtype="i8")
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
arr[2] = np.nan
with pytest.raises(IntCastingNaNError, match=msg):
Series(darr, dtype="i8")
# which is the same as we get with a numpy input
with pytest.raises(IntCastingNaNError, match=msg):
Series(arr, dtype="i8")
def test_xarray(df):
pytest.importorskip("xarray")
assert df.to_xarray() is not None
def test_xarray_cftimeindex_nearest():
# https://github.com/pydata/xarray/issues/3751
cftime = pytest.importorskip("cftime")
xarray = pytest.importorskip("xarray")
times = xarray.cftime_range("0001", periods=2)
key = cftime.DatetimeGregorian(2000, 1, 1)
result = times.get_indexer([key], method="nearest")
expected = 1
assert result == expected
@pytest.mark.single_cpu
def test_oo_optimizable():
# GH 21071
subprocess.check_call([sys.executable, "-OO", "-c", "import pandas"])
@pytest.mark.single_cpu
def test_oo_optimized_datetime_index_unpickle():
# GH 42866
subprocess.check_call(
[
sys.executable,
"-OO",
"-c",
(
"import pandas as pd, pickle; "
"pickle.loads(pickle.dumps(pd.date_range('2021-01-01', periods=1)))"
),
]
)
def test_statsmodels():
smf = pytest.importorskip("statsmodels.formula.api")
df = DataFrame(
{"Lottery": range(5), "Literacy": range(5), "Pop1831": range(100, 105)}
)
smf.ols("Lottery ~ Literacy + np.log(Pop1831)", data=df).fit()
def test_scikit_learn():
pytest.importorskip("sklearn")
from sklearn import (
datasets,
svm,
)
digits = datasets.load_digits()
clf = svm.SVC(gamma=0.001, C=100.0)
clf.fit(digits.data[:-1], digits.target[:-1])
clf.predict(digits.data[-1:])
def test_seaborn():
seaborn = pytest.importorskip("seaborn")
tips = DataFrame(
{"day": pd.date_range("2023", freq="D", periods=5), "total_bill": range(5)}
)
seaborn.stripplot(x="day", y="total_bill", data=tips)
def test_pandas_datareader():
pytest.importorskip("pandas_datareader")
@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
def test_pyarrow(df):
pyarrow = pytest.importorskip("pyarrow")
table = pyarrow.Table.from_pandas(df)
result = table.to_pandas()
tm.assert_frame_equal(result, df)
def test_yaml_dump(df):
# GH#42748
yaml = pytest.importorskip("yaml")
dumped = yaml.dump(df)
loaded = yaml.load(dumped, Loader=yaml.Loader)
tm.assert_frame_equal(df, loaded)
loaded2 = yaml.load(dumped, Loader=yaml.UnsafeLoader)
tm.assert_frame_equal(df, loaded2)
@pytest.mark.single_cpu
def test_missing_required_dependency():
# GH 23868
# To ensure proper isolation, we pass these flags
# -S : disable site-packages
# -s : disable user site-packages
# -E : disable PYTHON* env vars, especially PYTHONPATH
# https://github.com/MacPython/pandas-wheels/pull/50
pyexe = sys.executable.replace("\\", "/")
# We skip this test if pandas is installed as a site package. We first
# import the package normally and check the path to the module before
# executing the test which imports pandas with site packages disabled.
call = [pyexe, "-c", "import pandas;print(pandas.__file__)"]
output = subprocess.check_output(call).decode()
if "site-packages" in output:
pytest.skip("pandas installed as site package")
# This test will fail if pandas is installed as a site package. The flags
# prevent pandas being imported and the test will report Failed: DID NOT
# RAISE <class 'subprocess.CalledProcessError'>
call = [pyexe, "-sSE", "-c", "import pandas"]
msg = (
rf"Command '\['{pyexe}', '-sSE', '-c', 'import pandas'\]' "
"returned non-zero exit status 1."
)
with pytest.raises(subprocess.CalledProcessError, match=msg) as exc:
subprocess.check_output(call, stderr=subprocess.STDOUT)
output = exc.value.stdout.decode()
for name in ["numpy", "pytz", "dateutil"]:
assert name in output
def test_frame_setitem_dask_array_into_new_col():
# GH#47128
# dask sets "compute.use_numexpr" to False, so catch the current value
# and ensure to reset it afterwards to avoid impacting other tests
olduse = pd.get_option("compute.use_numexpr")
try:
da = pytest.importorskip("dask.array")
dda = da.array([1, 2])
df = DataFrame({"a": ["a", "b"]})
df["b"] = dda
df["c"] = dda
df.loc[[False, True], "b"] = 100
result = df.loc[[1], :]
expected = DataFrame({"a": ["b"], "b": [100], "c": [2]}, index=[1])
tm.assert_frame_equal(result, expected)
finally:
pd.set_option("compute.use_numexpr", olduse)
def test_pandas_priority():
# GH#48347
class MyClass:
__pandas_priority__ = 5000
def __radd__(self, other):
return self
left = MyClass()
right = Series(range(3))
assert right.__add__(left) is NotImplemented
assert right + left is left
@pytest.fixture(
params=[
"memoryview",
"array",
pytest.param("dask", marks=td.skip_if_no("dask.array")),
pytest.param("xarray", marks=td.skip_if_no("xarray")),
]
)
def array_likes(request):
"""
Fixture giving a numpy array and a parametrized 'data' object, which can
be a memoryview, array, dask or xarray object created from the numpy array.
"""
# GH#24539 recognize e.g xarray, dask, ...
arr = np.array([1, 2, 3], dtype=np.int64)
name = request.param
if name == "memoryview":
data = memoryview(arr)
elif name == "array":
data = array.array("i", arr)
elif name == "dask":
import dask.array
data = dask.array.array(arr)
elif name == "xarray":
import xarray as xr
data = xr.DataArray(arr)
return arr, data
@pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"])
def test_from_obscure_array(dtype, array_likes):
# GH#24539 recognize e.g xarray, dask, ...
# Note: we dont do this for PeriodArray bc _from_sequence won't accept
# an array of integers
# TODO: could check with arraylike of Period objects
arr, data = array_likes
cls = {"M8[ns]": DatetimeArray, "m8[ns]": TimedeltaArray}[dtype]
depr_msg = f"{cls.__name__}.__init__ is deprecated"
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
expected = cls(arr)
result = cls._from_sequence(data, dtype=dtype)
tm.assert_extension_array_equal(result, expected)
if not isinstance(data, memoryview):
# FIXME(GH#44431) these raise on memoryview and attempted fix
# fails on py3.10
func = {"M8[ns]": pd.to_datetime, "m8[ns]": pd.to_timedelta}[dtype]
result = func(arr).array
expected = func(data).array
tm.assert_equal(result, expected)
# Let's check the Indexes while we're here
idx_cls = {"M8[ns]": DatetimeIndex, "m8[ns]": TimedeltaIndex}[dtype]
result = idx_cls(arr)
expected = idx_cls(data)
tm.assert_index_equal(result, expected)
def test_dataframe_consortium() -> None:
"""
Test some basic methods of the dataframe consortium standard.
Full testing is done at https://github.com/data-apis/dataframe-api-compat,
this is just to check that the entry point works as expected.
"""
pytest.importorskip("dataframe_api_compat")
df_pd = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df = df_pd.__dataframe_consortium_standard__()
result_1 = df.get_column_names()
expected_1 = ["a", "b"]
assert result_1 == expected_1
ser = Series([1, 2, 3], name="a")
col = ser.__column_consortium_standard__()
assert col.name == "a"
def test_xarray_coerce_unit():
# GH44053
xr = pytest.importorskip("xarray")
arr = xr.DataArray([1, 2, 3])
result = pd.to_datetime(arr, unit="ns")
expected = DatetimeIndex(
[
"1970-01-01 00:00:00.000000001",
"1970-01-01 00:00:00.000000002",
"1970-01-01 00:00:00.000000003",
],
dtype="datetime64[ns]",
freq=None,
)
tm.assert_index_equal(result, expected)