190 lines
5.2 KiB
Python
190 lines
5.2 KiB
Python
import numpy as np
|
|
import pytest
|
|
|
|
from pandas import DataFrame
|
|
import pandas._testing as tm
|
|
|
|
|
|
@pytest.fixture
|
|
def df1():
|
|
return DataFrame(
|
|
{
|
|
"outer": [1, 1, 1, 2, 2, 2, 2, 3, 3, 4, 4],
|
|
"inner": [1, 2, 3, 1, 2, 3, 4, 1, 2, 1, 2],
|
|
"v1": np.linspace(0, 1, 11),
|
|
}
|
|
)
|
|
|
|
|
|
@pytest.fixture
|
|
def df2():
|
|
return DataFrame(
|
|
{
|
|
"outer": [1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3],
|
|
"inner": [1, 2, 2, 3, 3, 4, 2, 3, 1, 1, 2, 3],
|
|
"v2": np.linspace(10, 11, 12),
|
|
}
|
|
)
|
|
|
|
|
|
@pytest.fixture(params=[[], ["outer"], ["outer", "inner"]])
|
|
def left_df(request, df1):
|
|
"""Construct left test DataFrame with specified levels
|
|
(any of 'outer', 'inner', and 'v1')
|
|
"""
|
|
levels = request.param
|
|
if levels:
|
|
df1 = df1.set_index(levels)
|
|
|
|
return df1
|
|
|
|
|
|
@pytest.fixture(params=[[], ["outer"], ["outer", "inner"]])
|
|
def right_df(request, df2):
|
|
"""Construct right test DataFrame with specified levels
|
|
(any of 'outer', 'inner', and 'v2')
|
|
"""
|
|
levels = request.param
|
|
|
|
if levels:
|
|
df2 = df2.set_index(levels)
|
|
|
|
return df2
|
|
|
|
|
|
def compute_expected(df_left, df_right, on=None, left_on=None, right_on=None, how=None):
|
|
"""
|
|
Compute the expected merge result for the test case.
|
|
|
|
This method computes the expected result of merging two DataFrames on
|
|
a combination of their columns and index levels. It does so by
|
|
explicitly dropping/resetting their named index levels, performing a
|
|
merge on their columns, and then finally restoring the appropriate
|
|
index in the result.
|
|
|
|
Parameters
|
|
----------
|
|
df_left : DataFrame
|
|
The left DataFrame (may have zero or more named index levels)
|
|
df_right : DataFrame
|
|
The right DataFrame (may have zero or more named index levels)
|
|
on : list of str
|
|
The on parameter to the merge operation
|
|
left_on : list of str
|
|
The left_on parameter to the merge operation
|
|
right_on : list of str
|
|
The right_on parameter to the merge operation
|
|
how : str
|
|
The how parameter to the merge operation
|
|
|
|
Returns
|
|
-------
|
|
DataFrame
|
|
The expected merge result
|
|
"""
|
|
# Handle on param if specified
|
|
if on is not None:
|
|
left_on, right_on = on, on
|
|
|
|
# Compute input named index levels
|
|
left_levels = [n for n in df_left.index.names if n is not None]
|
|
right_levels = [n for n in df_right.index.names if n is not None]
|
|
|
|
# Compute output named index levels
|
|
output_levels = [i for i in left_on if i in right_levels and i in left_levels]
|
|
|
|
# Drop index levels that aren't involved in the merge
|
|
drop_left = [n for n in left_levels if n not in left_on]
|
|
if drop_left:
|
|
df_left = df_left.reset_index(drop_left, drop=True)
|
|
|
|
drop_right = [n for n in right_levels if n not in right_on]
|
|
if drop_right:
|
|
df_right = df_right.reset_index(drop_right, drop=True)
|
|
|
|
# Convert remaining index levels to columns
|
|
reset_left = [n for n in left_levels if n in left_on]
|
|
if reset_left:
|
|
df_left = df_left.reset_index(level=reset_left)
|
|
|
|
reset_right = [n for n in right_levels if n in right_on]
|
|
if reset_right:
|
|
df_right = df_right.reset_index(level=reset_right)
|
|
|
|
# Perform merge
|
|
expected = df_left.merge(df_right, left_on=left_on, right_on=right_on, how=how)
|
|
|
|
# Restore index levels
|
|
if output_levels:
|
|
expected = expected.set_index(output_levels)
|
|
|
|
return expected
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"on,how",
|
|
[
|
|
(["outer"], "inner"),
|
|
(["inner"], "left"),
|
|
(["outer", "inner"], "right"),
|
|
(["inner", "outer"], "outer"),
|
|
],
|
|
)
|
|
def test_merge_indexes_and_columns_on(left_df, right_df, on, how):
|
|
|
|
# Construct expected result
|
|
expected = compute_expected(left_df, right_df, on=on, how=how)
|
|
|
|
# Perform merge
|
|
result = left_df.merge(right_df, on=on, how=how)
|
|
tm.assert_frame_equal(result, expected, check_like=True)
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"left_on,right_on,how",
|
|
[
|
|
(["outer"], ["outer"], "inner"),
|
|
(["inner"], ["inner"], "right"),
|
|
(["outer", "inner"], ["outer", "inner"], "left"),
|
|
(["inner", "outer"], ["inner", "outer"], "outer"),
|
|
],
|
|
)
|
|
def test_merge_indexes_and_columns_lefton_righton(
|
|
left_df, right_df, left_on, right_on, how
|
|
):
|
|
|
|
# Construct expected result
|
|
expected = compute_expected(
|
|
left_df, right_df, left_on=left_on, right_on=right_on, how=how
|
|
)
|
|
|
|
# Perform merge
|
|
result = left_df.merge(right_df, left_on=left_on, right_on=right_on, how=how)
|
|
tm.assert_frame_equal(result, expected, check_like=True)
|
|
|
|
|
|
@pytest.mark.parametrize("left_index", ["inner", ["inner", "outer"]])
|
|
def test_join_indexes_and_columns_on(df1, df2, left_index, join_type):
|
|
|
|
# Construct left_df
|
|
left_df = df1.set_index(left_index)
|
|
|
|
# Construct right_df
|
|
right_df = df2.set_index(["outer", "inner"])
|
|
|
|
# Result
|
|
expected = (
|
|
left_df.reset_index()
|
|
.join(
|
|
right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y"
|
|
)
|
|
.set_index(left_index)
|
|
)
|
|
|
|
# Perform join
|
|
result = left_df.join(
|
|
right_df, on=["outer", "inner"], how=join_type, lsuffix="_x", rsuffix="_y"
|
|
)
|
|
|
|
tm.assert_frame_equal(result, expected, check_like=True)
|