236 lines
6.9 KiB
Python
236 lines
6.9 KiB
Python
|
from datetime import timezone
|
||
|
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import (
|
||
|
Series,
|
||
|
date_range,
|
||
|
period_range,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"first_slice,second_slice",
|
||
|
[
|
||
|
[[2, None], [None, -5]],
|
||
|
[[None, 0], [None, -5]],
|
||
|
[[None, -5], [None, 0]],
|
||
|
[[None, 0], [None, 0]],
|
||
|
],
|
||
|
)
|
||
|
@pytest.mark.parametrize("fill", [None, -1])
|
||
|
def test_align(datetime_series, first_slice, second_slice, join_type, fill):
|
||
|
a = datetime_series[slice(*first_slice)]
|
||
|
b = datetime_series[slice(*second_slice)]
|
||
|
|
||
|
aa, ab = a.align(b, join=join_type, fill_value=fill)
|
||
|
|
||
|
join_index = a.index.join(b.index, how=join_type)
|
||
|
if fill is not None:
|
||
|
diff_a = aa.index.difference(join_index)
|
||
|
diff_b = ab.index.difference(join_index)
|
||
|
if len(diff_a) > 0:
|
||
|
assert (aa.reindex(diff_a) == fill).all()
|
||
|
if len(diff_b) > 0:
|
||
|
assert (ab.reindex(diff_b) == fill).all()
|
||
|
|
||
|
ea = a.reindex(join_index)
|
||
|
eb = b.reindex(join_index)
|
||
|
|
||
|
if fill is not None:
|
||
|
ea = ea.fillna(fill)
|
||
|
eb = eb.fillna(fill)
|
||
|
|
||
|
tm.assert_series_equal(aa, ea)
|
||
|
tm.assert_series_equal(ab, eb)
|
||
|
assert aa.name == "ts"
|
||
|
assert ea.name == "ts"
|
||
|
assert ab.name == "ts"
|
||
|
assert eb.name == "ts"
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"first_slice,second_slice",
|
||
|
[
|
||
|
[[2, None], [None, -5]],
|
||
|
[[None, 0], [None, -5]],
|
||
|
[[None, -5], [None, 0]],
|
||
|
[[None, 0], [None, 0]],
|
||
|
],
|
||
|
)
|
||
|
@pytest.mark.parametrize("method", ["pad", "bfill"])
|
||
|
@pytest.mark.parametrize("limit", [None, 1])
|
||
|
def test_align_fill_method(
|
||
|
datetime_series, first_slice, second_slice, join_type, method, limit
|
||
|
):
|
||
|
a = datetime_series[slice(*first_slice)]
|
||
|
b = datetime_series[slice(*second_slice)]
|
||
|
|
||
|
aa, ab = a.align(b, join=join_type, method=method, limit=limit)
|
||
|
|
||
|
join_index = a.index.join(b.index, how=join_type)
|
||
|
ea = a.reindex(join_index)
|
||
|
eb = b.reindex(join_index)
|
||
|
|
||
|
ea = ea.fillna(method=method, limit=limit)
|
||
|
eb = eb.fillna(method=method, limit=limit)
|
||
|
|
||
|
tm.assert_series_equal(aa, ea)
|
||
|
tm.assert_series_equal(ab, eb)
|
||
|
|
||
|
|
||
|
def test_align_nocopy(datetime_series, using_copy_on_write):
|
||
|
b = datetime_series[:5].copy()
|
||
|
|
||
|
# do copy
|
||
|
a = datetime_series.copy()
|
||
|
ra, _ = a.align(b, join="left")
|
||
|
ra[:5] = 5
|
||
|
assert not (a[:5] == 5).any()
|
||
|
|
||
|
# do not copy
|
||
|
a = datetime_series.copy()
|
||
|
ra, _ = a.align(b, join="left", copy=False)
|
||
|
ra[:5] = 5
|
||
|
if using_copy_on_write:
|
||
|
assert not (a[:5] == 5).any()
|
||
|
else:
|
||
|
assert (a[:5] == 5).all()
|
||
|
|
||
|
# do copy
|
||
|
a = datetime_series.copy()
|
||
|
b = datetime_series[:5].copy()
|
||
|
_, rb = a.align(b, join="right")
|
||
|
rb[:3] = 5
|
||
|
assert not (b[:3] == 5).any()
|
||
|
|
||
|
# do not copy
|
||
|
a = datetime_series.copy()
|
||
|
b = datetime_series[:5].copy()
|
||
|
_, rb = a.align(b, join="right", copy=False)
|
||
|
rb[:2] = 5
|
||
|
if using_copy_on_write:
|
||
|
assert not (b[:2] == 5).any()
|
||
|
else:
|
||
|
assert (b[:2] == 5).all()
|
||
|
|
||
|
|
||
|
def test_align_same_index(datetime_series, using_copy_on_write):
|
||
|
a, b = datetime_series.align(datetime_series, copy=False)
|
||
|
assert a.index is datetime_series.index
|
||
|
assert b.index is datetime_series.index
|
||
|
|
||
|
a, b = datetime_series.align(datetime_series, copy=True)
|
||
|
if not using_copy_on_write:
|
||
|
assert a.index is not datetime_series.index
|
||
|
assert b.index is not datetime_series.index
|
||
|
else:
|
||
|
assert a.index is datetime_series.index
|
||
|
assert b.index is datetime_series.index
|
||
|
|
||
|
|
||
|
def test_align_multiindex():
|
||
|
# GH 10665
|
||
|
|
||
|
midx = pd.MultiIndex.from_product(
|
||
|
[range(2), range(3), range(2)], names=("a", "b", "c")
|
||
|
)
|
||
|
idx = pd.Index(range(2), name="b")
|
||
|
s1 = Series(np.arange(12, dtype="int64"), index=midx)
|
||
|
s2 = Series(np.arange(2, dtype="int64"), index=idx)
|
||
|
|
||
|
# these must be the same results (but flipped)
|
||
|
res1l, res1r = s1.align(s2, join="left")
|
||
|
res2l, res2r = s2.align(s1, join="right")
|
||
|
|
||
|
expl = s1
|
||
|
tm.assert_series_equal(expl, res1l)
|
||
|
tm.assert_series_equal(expl, res2r)
|
||
|
expr = Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
|
||
|
tm.assert_series_equal(expr, res1r)
|
||
|
tm.assert_series_equal(expr, res2l)
|
||
|
|
||
|
res1l, res1r = s1.align(s2, join="right")
|
||
|
res2l, res2r = s2.align(s1, join="left")
|
||
|
|
||
|
exp_idx = pd.MultiIndex.from_product(
|
||
|
[range(2), range(2), range(2)], names=("a", "b", "c")
|
||
|
)
|
||
|
expl = Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
|
||
|
tm.assert_series_equal(expl, res1l)
|
||
|
tm.assert_series_equal(expl, res2r)
|
||
|
expr = Series([0, 0, 1, 1] * 2, index=exp_idx)
|
||
|
tm.assert_series_equal(expr, res1r)
|
||
|
tm.assert_series_equal(expr, res2l)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("method", ["backfill", "bfill", "pad", "ffill", None])
|
||
|
def test_align_with_dataframe_method(method):
|
||
|
# GH31788
|
||
|
ser = Series(range(3), index=range(3))
|
||
|
df = pd.DataFrame(0.0, index=range(3), columns=range(3))
|
||
|
|
||
|
result_ser, result_df = ser.align(df, method=method)
|
||
|
tm.assert_series_equal(result_ser, ser)
|
||
|
tm.assert_frame_equal(result_df, df)
|
||
|
|
||
|
|
||
|
def test_align_dt64tzindex_mismatched_tzs():
|
||
|
idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern")
|
||
|
ser = Series(np.random.randn(len(idx1)), index=idx1)
|
||
|
ser_central = ser.tz_convert("US/Central")
|
||
|
# different timezones convert to UTC
|
||
|
|
||
|
new1, new2 = ser.align(ser_central)
|
||
|
assert new1.index.tz is timezone.utc
|
||
|
assert new2.index.tz is timezone.utc
|
||
|
|
||
|
|
||
|
def test_align_periodindex(join_type):
|
||
|
rng = period_range("1/1/2000", "1/1/2010", freq="A")
|
||
|
ts = Series(np.random.randn(len(rng)), index=rng)
|
||
|
|
||
|
# TODO: assert something?
|
||
|
ts.align(ts[::2], join=join_type)
|
||
|
|
||
|
|
||
|
def test_align_left_fewer_levels():
|
||
|
# GH#45224
|
||
|
left = Series([2], index=pd.MultiIndex.from_tuples([(1, 3)], names=["a", "c"]))
|
||
|
right = Series(
|
||
|
[1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"])
|
||
|
)
|
||
|
result_left, result_right = left.align(right)
|
||
|
|
||
|
expected_right = Series(
|
||
|
[1], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"])
|
||
|
)
|
||
|
expected_left = Series(
|
||
|
[2], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"])
|
||
|
)
|
||
|
tm.assert_series_equal(result_left, expected_left)
|
||
|
tm.assert_series_equal(result_right, expected_right)
|
||
|
|
||
|
|
||
|
def test_align_left_different_named_levels():
|
||
|
# GH#45224
|
||
|
left = Series(
|
||
|
[2], index=pd.MultiIndex.from_tuples([(1, 4, 3)], names=["a", "d", "c"])
|
||
|
)
|
||
|
right = Series(
|
||
|
[1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"])
|
||
|
)
|
||
|
result_left, result_right = left.align(right)
|
||
|
|
||
|
expected_left = Series(
|
||
|
[2], index=pd.MultiIndex.from_tuples([(1, 3, 4, 2)], names=["a", "c", "d", "b"])
|
||
|
)
|
||
|
expected_right = Series(
|
||
|
[1], index=pd.MultiIndex.from_tuples([(1, 3, 4, 2)], names=["a", "c", "d", "b"])
|
||
|
)
|
||
|
tm.assert_series_equal(result_left, expected_left)
|
||
|
tm.assert_series_equal(result_right, expected_right)
|